ref: e0e889fb69a6d4d3f2332244ef4f79f900a66d84
dir: /timer.c/
/*
* timer.c - Unified Timer Manager for vmx SMP (FIXED VERSION)
*
* FIXES APPLIED:
* 1. Backstop changed from 1ms to 10ms (reduces CPU usage 10x)
* 2. Proper locking in timer_rearm_locked()
* 3. Removed duplicate function definitions
* 4. Added timer state debugging
*
* ARCHITECTURE:
* - Single timer coordinator that tracks all deadline sources
* - Only programs the nearest deadline to VMX preemption timer
* - All timer wakeups come through VMX preemption timer exit
*
* Timer sources:
* TIMER_LAPIC - Local APIC timer (per-CPU, counter mode)
* TIMER_DEADLINE - TSC deadline mode (per-CPU)
*
* Global timers (CPU0 only, computed from shared state):
* PIT, RTC, HPET0, HPET1, HPET2
*/
#include <u.h>
#include <libc.h>
#include <thread.h>
#include <tos.h>
#include "dat.h"
#include "fns.h"
#ifndef VLONG_MAX
#define VLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif
#ifndef TIMER_MODE
#define TIMER_MODE(x) (((x) >> 17) & 3)
#define MODE_ONESHOT 0
#define MODE_PERIODIC 1
#define MODE_DEADLINE 2
#endif
/* TSC offset from pvclock.c */
extern vlong cached_tscoff;
/*
* FIX #1: Backstop interval - 10ms, not 1ms!
* 10ms is responsive enough for most workloads while reducing
* unnecessary VM exits by 10x compared to 1ms.
*/
#define BACKSTOP_NS_FIXED 10000000LL /* 10ms */
/*
* Per-CPU timer state.
* Only two timer sources are truly per-CPU: LAPIC and TSC deadline.
* Global device timers (PIT/RTC/HPET) are handled separately by CPU0.
*/
enum {
LTIMER_LAPIC = 0,
LTIMER_DEADLINE,
LTIMER_MAX
};
typedef struct LocalTimerQueue LocalTimerQueue;
struct LocalTimerQueue {
Lock lock;
vlong deadline[LTIMER_MAX];
vlong armed;
};
static LocalTimerQueue ltq;
static int timer_initialized;
/* External dependencies */
extern u32int preempt_shift;
extern int debug;
extern int state;
extern u32int curcpuid;
extern int ctl(char *fmt, ...);
/* TSC conversion */
static uvlong tsc_freq;
static uvlong tsc_to_ns_mult;
static int tsc_to_ns_shift = 32;
/* Shared device state - defined elsewhere */
extern Hpet *hpet;
extern PitShared *pitshared;
extern RtcShared *rtcshared;
/* ============================================================
* HPET time conversion
* ============================================================ */
#define HPET_TO_NS_MULT 1171873536ULL
#define HPET_TO_NS_SHIFT 24
#define NS_TO_HPET_MULT 61496551ULL
#define NS_TO_HPET_SHIFT 32
static uvlong
hpet_to_ns(u64int hpet_ticks)
{
return (hpet_ticks * HPET_TO_NS_MULT) >> HPET_TO_NS_SHIFT;
}
static u64int
ns_to_hpet(uvlong ns)
{
return (ns * NS_TO_HPET_MULT) >> NS_TO_HPET_SHIFT;
}
/* ============================================================
* Global Timer Deadline Computation (CPU0 only)
* ============================================================ */
/*
* Compute next HPET deadline from shared state.
* Returns VLONG_MAX if no HPET timer is pending.
*
* FIX #2: Re-enable locking for thread safety
*/
static vlong
hpet_next_deadline(void)
{
vlong nearest = VLONG_MAX;
u64int counter;
uvlong now_ns, elapsed_ns;
int i;
if(hpet == nil || !hpet->enabled)
return VLONG_MAX;
lock(&hpet->lock);
/* Compute current counter value */
now_ns = nanosec();
elapsed_ns = now_ns - hpet->time_at_enable;
counter = hpet->counter_offset + ns_to_hpet(elapsed_ns);
for(i = 0; i < 3; i++){
HpetTimer *t = &hpet->timer[i];
if(!t->active)
continue;
if(!(t->cfg & (1 << 2))) /* TN_INT_ENB */
continue;
if(t->cmp <= counter){
/* Already expired - fire ASAP */
nearest = now_ns;
break;
} else {
uvlong delta_ticks = t->cmp - counter;
vlong deadline = now_ns + hpet_to_ns(delta_ticks);
if(deadline < nearest)
nearest = deadline;
}
}
unlock(&hpet->lock);
return nearest;
}
/*
* Compute next PIT deadline from shared state.
* Returns VLONG_MAX if PIT is not active.
*/
#define PIT_PERIOD_NS 838 /* ~1.193182 MHz */
static vlong
pit_next_deadline(void)
{
vlong nearest = VLONG_MAX;
PITChannel *p;
if(pitshared == nil)
return VLONG_MAX;
lock(&pitshared->lock);
/* Only channel 0 generates IRQ 0 */
p = &pitshared->ch[0];
if(p->state != 0 && p->count > 0){
vlong deadline = p->lastnsec + (vlong)p->count * PIT_PERIOD_NS;
if(deadline < nearest)
nearest = deadline;
}
unlock(&pitshared->lock);
return nearest;
}
/*
* Compute next RTC deadline from shared state.
* Returns VLONG_MAX if no RTC periodic interrupt is pending.
*/
static vlong
rtc_next_deadline(void)
{
vlong nearest = VLONG_MAX;
if(rtcshared == nil)
return VLONG_MAX;
lock(&rtcshared->lock);
if(rtcshared->rtcnext != -1)
nearest = rtcshared->rtcnext;
unlock(&rtcshared->lock);
return nearest;
}
/*
* Compute the nearest global timer deadline.
* Only called by CPU0.
*/
static vlong
global_timer_deadline(void)
{
vlong nearest = VLONG_MAX;
vlong d;
d = hpet_next_deadline();
if(d < nearest) nearest = d;
d = pit_next_deadline();
if(d < nearest) nearest = d;
d = rtc_next_deadline();
if(d < nearest) nearest = d;
return nearest;
}
/* ============================================================
* Core Timer Infrastructure
* ============================================================ */
/*
* Arm the VMX preemption timer for the given deadline.
*/
static void
preempt_arm(vlong deadline_ns)
{
vlong now, delta_ns;
uvlong delta_tsc, ticks;
if(tsc_freq == 0)
return;
now = nanosec();
if(deadline_ns <= now){
/* Already expired - fire immediately (minimum 1 tick) */
ctl("preempt 1");
return;
}
delta_ns = deadline_ns - now;
/* Convert ns to TSC ticks, then to preemption timer ticks */
delta_tsc = (delta_ns * tsc_freq) / 1000000000ULL;
ticks = delta_tsc >> preempt_shift;
/* Clamp to 32-bit, minimum 1 */
if(ticks > 0xFFFFFFFFULL)
ticks = 0xFFFFFFFF;
if(ticks == 0)
ticks = 1;
ctl("preempt %ud", (u32int)ticks);
}
/*
* Recalculate and arm the nearest deadline.
*
* FIX #3: This function now properly manages locking.
* Caller should NOT hold ltq.lock - we acquire it here.
*/
static void
timer_rearm(void)
{
vlong nearest;
int i;
lock(<q.lock);
nearest = nanosec() + BACKSTOP_NS_FIXED;
/* Global timers - ONLY CPU0 should check these!
* Non-CPU0 cannot process them anyway, so checking just
* creates race conditions where they see expired timers
* before CPU0 has advanced them. */
if(curcpuid == 0){
vlong global = global_timer_deadline();
if(global < nearest)
nearest = global;
}
/* Per-CPU deadlines */
for(i = 0; i < LTIMER_MAX; i++){
if(ltq.deadline[i] > 0 && ltq.deadline[i] < nearest)
nearest = ltq.deadline[i];
}
ltq.armed = nearest;
unlock(<q.lock);
preempt_arm(nearest);
}
/*
* Initialize timer subsystem - called once per CPU
*/
void
timer_init(void)
{
if(timer_initialized)
return;
memset(<q, 0, sizeof(ltq));
ltq.armed = -1;
tsc_freq = _tos->cyclefreq;
if(tsc_freq == 0)
tsc_freq = 2900000000ULL;
tsc_to_ns_mult = (1000000000ULL << 32) / tsc_freq;
timer_initialized = 1;
dprint("CPU%d: timer_init tsc_freq=%llud preempt_shift=%d backstop=%lldms\n", curcpuid, tsc_freq, preempt_shift, BACKSTOP_NS_FIXED / 1000000);
}
/*
* Set a per-CPU timer deadline (LAPIC or TSC deadline only).
*/
void
timer_set(int source, vlong deadline_ns)
{
int local_source;
/* Map to local timer index */
switch(source){
case TIMER_LAPIC:
local_source = LTIMER_LAPIC;
break;
case TIMER_DEADLINE:
local_source = LTIMER_DEADLINE;
break;
default:
/* Global timer - just trigger rearm */
timer_rearm();
return;
}
lock(<q.lock);
ltq.deadline[local_source] = deadline_ns;
unlock(<q.lock);
timer_rearm();
}
/*
* Cancel a timer
*/
void
timer_cancel(int source)
{
timer_set(source, 0);
}
/*
* Get nearest deadline (for HLT handler)
*/
vlong
timer_nearest(void)
{
vlong nearest;
int i;
lock(<q.lock);
nearest = VLONG_MAX;
for(i = 0; i < LTIMER_MAX; i++){
if(ltq.deadline[i] > 0 && ltq.deadline[i] < nearest)
nearest = ltq.deadline[i];
}
unlock(<q.lock);
/* Only CPU0 should check global timers */
if(curcpuid == 0){
vlong global = global_timer_deadline();
if(global < nearest)
nearest = global;
}
return nearest;
}
/*
* Force rearm - call after modifying shared timer state
*/
void
timer_kick(void)
{
timer_rearm();
}
/* ============================================================
* LAPIC Timer
* ============================================================ */
#define LAPIC_BUS_FREQ_HZ 100000000LL
#define NS_PER_LAPIC_TICK (1000000000LL / LAPIC_BUS_FREQ_HZ)
extern LApic lapic;
extern void ipi_queue(int, int);
u32int
lapic_divide_value(u32int divide_reg)
{
static u32int divtab[] = {2, 4, 8, 16, 32, 64, 128, 1};
u32int idx = (divide_reg & 3) | ((divide_reg >> 1) & 4);
return divtab[idx];
}
/*
* Start the LAPIC timer
*/
void
lapic_timer_start(void)
{
LApic *la = &lapic;
u32int divide, initial;
vlong period_ns;
vlong now;
if(la->timer_initial == 0){
la->timer_active = 0;
la->timer_deadline = -1;
timer_cancel(TIMER_LAPIC);
return;
}
divide = lapic_divide_value(la->timer_divide);
initial = la->timer_initial;
period_ns = (vlong)initial * (vlong)divide * NS_PER_LAPIC_TICK;
now = nanosec();
la->timer_deadline = now + period_ns;
la->timer_active = 1;
la->timer_current = initial;
dprint("LAPIC%d: timer started, initial=%ud divide=%ud period=%lldns deadline=%lld\n", curcpuid, initial, divide, period_ns, la->timer_deadline);
/* Only arm if not masked */
if(!(la->lvt_timer & (1<<16)))
timer_set(TIMER_LAPIC, la->timer_deadline);
}
/*
* Advance LAPIC timer - called on VM exit
*/
void
lapic_timer_advance(void)
{
LApic *la = &lapic;
u32int vector;
int periodic;
vlong now;
if(!la->timer_active || la->timer_deadline < 0)
return;
now = nanosec();
if(now < la->timer_deadline)
return;
vector = la->lvt_timer & 0xFF;
periodic = (la->lvt_timer >> 17) & 1;
dprint("LAPIC%d: FIRING timer! now=%lld deadline=%lld vec=%d\n", curcpuid, now, la->timer_deadline, vector);
/* Fire the interrupt if not masked and vector is valid */
if(vector != 0 && (la->lvt_timer & (1<<16)) == 0)
ipi_queue(curcpuid, vector);
la->timer_current = 0;
if(periodic){
/* Restart for next period */
lapic_timer_start();
} else {
/* One-shot: disable */
la->timer_active = 0;
la->timer_deadline = -1;
timer_cancel(TIMER_LAPIC);
}
}
u32int
lapic_read_current_count(void)
{
LApic *la = &lapic;
if(la->timer_deadline > 0){
vlong now = nanosec();
vlong remain = la->timer_deadline - now;
if(remain > 0){
u32int div = lapic_divide_value(la->timer_divide);
vlong ticks = remain / ((vlong)div * NS_PER_LAPIC_TICK);
if(ticks > 0xFFFFFFFFLL)
ticks = 0xFFFFFFFF;
return (u32int)ticks;
}
}
return 0;
}
void
lapic_write_initial_count(u32int val)
{
LApic *la = &lapic;
dprint("LAPIC%d: TIMER_INIT = %ud\n", curcpuid, val);
la->timer_initial = val;
lapic_timer_start();
}
void
lapic_write_divide_config(u32int val)
{
lapic.timer_divide = val & 0xB;
}
void
lapic_write_lvt_timer(u32int val)
{
LApic *la = &lapic;
int old_mode = TIMER_MODE(la->lvt_timer);
int new_mode = TIMER_MODE(val);
la->lvt_timer = val;
/* Mode change disarms deadline */
if(old_mode != new_mode){
la->tsc_deadline = 0;
la->tsc_deadline_armed = 0;
timer_cancel(TIMER_DEADLINE);
}
dprint("LAPIC%d: LVT_TIMER = %#x (vec=%d mode=%d mask=%d)\n", curcpuid, val, val & 0xFF, new_mode, (val >> 16) & 1);
/* Only start counter-based timer if not in deadline mode */
if(new_mode != MODE_DEADLINE && !(val & (1<<16)) && la->timer_initial > 0)
lapic_timer_start();
}
void
lapic_init_cpu(void)
{
lapic.id = curcpuid << 24;
lapic.lvt_timer = 1 << 16; /* Masked initially */
lapic.timer_initial = 0;
lapic.timer_current = 0;
lapic.timer_divide = 0;
lapic.timer_deadline = -1;
lapic.timer_active = 0;
}
void
lapic_timer_init(void)
{
lapic_init_cpu();
timer_init();
extern u32int isr_bitmap[8];
memset(isr_bitmap, 0, sizeof(isr_bitmap));
}
/* ============================================================
* TSC Deadline Mode
* ============================================================ */
void
lapic_write_tsc_deadline(uvlong val)
{
LApic *la = &lapic;
dprint("CPU%d: TSC_DEADLINE=%llud\n", curcpuid, val);
la->tsc_deadline = val;
if(val == 0){
la->tsc_deadline_armed = 0;
timer_cancel(TIMER_DEADLINE);
return;
}
if(TIMER_MODE(la->lvt_timer) == MODE_DEADLINE && !(la->lvt_timer & (1<<16))){
la->tsc_deadline_armed = 1;
uvlong tsc_now;
cycles(&tsc_now);
uvlong guest_tsc = tsc_now + cached_tscoff;
if(val > guest_tsc){
uvlong delta_tsc = val - guest_tsc;
uvlong delta_ns = (delta_tsc * tsc_to_ns_mult) >> tsc_to_ns_shift;
timer_set(TIMER_DEADLINE, nanosec() + delta_ns);
} else {
/* Already expired */
timer_set(TIMER_DEADLINE, nanosec());
}
}
}
uvlong
lapic_read_tsc_deadline(void)
{
LApic *la = &lapic;
if(TIMER_MODE(la->lvt_timer) != MODE_DEADLINE)
return 0;
return la->tsc_deadline;
}
void
lapic_tsc_deadline_check(void)
{
LApic *la = &lapic;
uvlong host_tsc, guest_tsc;
u32int vector;
if(!la->tsc_deadline_armed)
return;
if(TIMER_MODE(la->lvt_timer) != MODE_DEADLINE)
return;
cycles(&host_tsc);
guest_tsc = host_tsc + cached_tscoff;
if(guest_tsc >= la->tsc_deadline){
vector = la->lvt_timer & 0xFF;
dprint("LAPIC%d: TSC deadline fired! vec=%d\n", curcpuid, vector);
la->tsc_deadline = 0;
la->tsc_deadline_armed = 0;
timer_cancel(TIMER_DEADLINE);
if(!(la->lvt_timer & (1<<16)) && vector != 0)
ipi_queue(curcpuid, vector);
} else {
/*
* TSC hasn't caught up yet - recalculate nanosecond deadline
* based on actual remaining TSC ticks to prevent tight loop
*/
uvlong delta_tsc = la->tsc_deadline - guest_tsc;
uvlong delta_ns = (delta_tsc * tsc_to_ns_mult) >> tsc_to_ns_shift;
if(delta_ns < 100000)
delta_ns = 100000; /* Minimum 100µs to prevent spin */
ltq.deadline[LTIMER_DEADLINE] = nanosec() + delta_ns;
}
}
/* ============================================================
* Unified Timer Advance - Called on preempt exit and HLT
* ============================================================ */
void
timers_advance(void)
{
extern void hpet_advance(void);
extern void pitadvance(void);
extern void rtcadvance(void);
if (curcpuid == 0){
rtcadvance();
pitadvance();
hpet_advance();
}
/* Per-CPU timers (each CPU has its own copy due to fork) */
lapic_timer_advance();
lapic_tsc_deadline_check();
/* Rearm for next deadline */
timer_rearm();
}
/* ============================================================
* vmtime - Unified time base
* ============================================================ */
static int vmtime_initialized;
void
vmtime_init(void)
{
vmtime_initialized = 1;
dprint("vmtime_init: using nanosec() directly\n");
}
uvlong
vmtime_ns(void)
{
if(!vmtime_initialized)
return 0;
return nanosec();
}