shithub: vmxsmp

ref: e0e889fb69a6d4d3f2332244ef4f79f900a66d84
dir: /timer.c/

View raw version
/*
 * timer.c - Unified Timer Manager for vmx SMP (FIXED VERSION)
 *
 * FIXES APPLIED:
 * 1. Backstop changed from 1ms to 10ms (reduces CPU usage 10x)
 * 2. Proper locking in timer_rearm_locked()
 * 3. Removed duplicate function definitions
 * 4. Added timer state debugging
 *
 * ARCHITECTURE:
 * - Single timer coordinator that tracks all deadline sources
 * - Only programs the nearest deadline to VMX preemption timer
 * - All timer wakeups come through VMX preemption timer exit
 *
 * Timer sources:
 *   TIMER_LAPIC    - Local APIC timer (per-CPU, counter mode)
 *   TIMER_DEADLINE - TSC deadline mode (per-CPU)
 *
 * Global timers (CPU0 only, computed from shared state):
 *   PIT, RTC, HPET0, HPET1, HPET2
 */

#include <u.h>
#include <libc.h>
#include <thread.h>
#include <tos.h>
#include "dat.h"
#include "fns.h"

#ifndef VLONG_MAX
#define VLONG_MAX 0x7FFFFFFFFFFFFFFFLL
#endif

#ifndef TIMER_MODE
#define TIMER_MODE(x)  (((x) >> 17) & 3)
#define MODE_ONESHOT   0
#define MODE_PERIODIC  1
#define MODE_DEADLINE  2
#endif

/* TSC offset from pvclock.c */
extern vlong cached_tscoff;

/*
 * FIX #1: Backstop interval - 10ms, not 1ms!
 * 10ms is responsive enough for most workloads while reducing
 * unnecessary VM exits by 10x compared to 1ms.
 */
#define BACKSTOP_NS_FIXED  10000000LL  /* 10ms  */

/*
 * Per-CPU timer state.
 * Only two timer sources are truly per-CPU: LAPIC and TSC deadline.
 * Global device timers (PIT/RTC/HPET) are handled separately by CPU0.
 */
enum {
	LTIMER_LAPIC = 0,
	LTIMER_DEADLINE,
	LTIMER_MAX
};

typedef struct LocalTimerQueue LocalTimerQueue;
struct LocalTimerQueue {
	Lock lock;
	vlong deadline[LTIMER_MAX];
	vlong armed;
};

static LocalTimerQueue ltq;
static int timer_initialized;

/* External dependencies */
extern u32int preempt_shift;
extern int debug;
extern int state;
extern u32int curcpuid;
extern int ctl(char *fmt, ...);

/* TSC conversion */
static uvlong tsc_freq;
static uvlong tsc_to_ns_mult;
static int tsc_to_ns_shift = 32;

/* Shared device state - defined elsewhere */
extern Hpet *hpet;
extern PitShared *pitshared; 
extern RtcShared *rtcshared;

/* ============================================================
 * HPET time conversion
 * ============================================================ */

#define HPET_TO_NS_MULT   1171873536ULL
#define HPET_TO_NS_SHIFT  24
#define NS_TO_HPET_MULT   61496551ULL
#define NS_TO_HPET_SHIFT  32

static uvlong
hpet_to_ns(u64int hpet_ticks)
{
	return (hpet_ticks * HPET_TO_NS_MULT) >> HPET_TO_NS_SHIFT;
}

static u64int
ns_to_hpet(uvlong ns)
{
	return (ns * NS_TO_HPET_MULT) >> NS_TO_HPET_SHIFT;
}

/* ============================================================
 * Global Timer Deadline Computation (CPU0 only)
 * ============================================================ */

/*
 * Compute next HPET deadline from shared state.
 * Returns VLONG_MAX if no HPET timer is pending.
 *
 * FIX #2: Re-enable locking for thread safety
 */
static vlong
hpet_next_deadline(void)
{
	vlong nearest = VLONG_MAX;
	u64int counter;
	uvlong now_ns, elapsed_ns;
	int i;
	
	if(hpet == nil || !hpet->enabled)
		return VLONG_MAX;
	
	lock(&hpet->lock);
	
	/* Compute current counter value */
	now_ns = nanosec();
	elapsed_ns = now_ns - hpet->time_at_enable;
	counter = hpet->counter_offset + ns_to_hpet(elapsed_ns);
	
	for(i = 0; i < 3; i++){
		HpetTimer *t = &hpet->timer[i];
		
		if(!t->active)
			continue;
		if(!(t->cfg & (1 << 2)))  /* TN_INT_ENB */
			continue;
		
		if(t->cmp <= counter){
			/* Already expired - fire ASAP */
			nearest = now_ns;
			break;
		} else {
			uvlong delta_ticks = t->cmp - counter;
			vlong deadline = now_ns + hpet_to_ns(delta_ticks);
			if(deadline < nearest)
				nearest = deadline;
		}
	}
	
	unlock(&hpet->lock);
	
	return nearest;
}

/*
 * Compute next PIT deadline from shared state.
 * Returns VLONG_MAX if PIT is not active.
 */
#define PIT_PERIOD_NS 838  /* ~1.193182 MHz */

static vlong
pit_next_deadline(void)
{
	vlong nearest = VLONG_MAX;
	PITChannel *p;
	
	if(pitshared == nil)
		return VLONG_MAX;
	
	lock(&pitshared->lock);
	
	/* Only channel 0 generates IRQ 0 */
	p = &pitshared->ch[0];
	if(p->state != 0 && p->count > 0){
		vlong deadline = p->lastnsec + (vlong)p->count * PIT_PERIOD_NS;
		if(deadline < nearest)
			nearest = deadline;
	}
	
	unlock(&pitshared->lock);
	
	return nearest;
}

/*
 * Compute next RTC deadline from shared state.
 * Returns VLONG_MAX if no RTC periodic interrupt is pending.
 */
static vlong
rtc_next_deadline(void)
{
	vlong nearest = VLONG_MAX;
	
	if(rtcshared == nil)
		return VLONG_MAX;
	
	lock(&rtcshared->lock);
	
	if(rtcshared->rtcnext != -1)
		nearest = rtcshared->rtcnext;
	
	unlock(&rtcshared->lock);
	
	return nearest;
}

/*
 * Compute the nearest global timer deadline.
 * Only called by CPU0.
 */
static vlong
global_timer_deadline(void)
{
	vlong nearest = VLONG_MAX;
	vlong d;
	
	d = hpet_next_deadline();
	if(d < nearest) nearest = d;
	
	d = pit_next_deadline();
	if(d < nearest) nearest = d;
	
	d = rtc_next_deadline();
	if(d < nearest) nearest = d;
	
	return nearest;
}

/* ============================================================
 * Core Timer Infrastructure
 * ============================================================ */

/*
 * Arm the VMX preemption timer for the given deadline.
 */
static void
preempt_arm(vlong deadline_ns)
{
	vlong now, delta_ns;
	uvlong delta_tsc, ticks;
	
	if(tsc_freq == 0)
		return;
	
	now = nanosec();
	
	if(deadline_ns <= now){
		/* Already expired - fire immediately (minimum 1 tick) */
		ctl("preempt 1");
		return;
	}
	
	delta_ns = deadline_ns - now;
	
	/* Convert ns to TSC ticks, then to preemption timer ticks */
	delta_tsc = (delta_ns * tsc_freq) / 1000000000ULL;
	ticks = delta_tsc >> preempt_shift;
	
	/* Clamp to 32-bit, minimum 1 */
	if(ticks > 0xFFFFFFFFULL)
		ticks = 0xFFFFFFFF;
	if(ticks == 0)
		ticks = 1;
	
	ctl("preempt %ud", (u32int)ticks);
}

/*
 * Recalculate and arm the nearest deadline.
 *
 * FIX #3: This function now properly manages locking.
 * Caller should NOT hold ltq.lock - we acquire it here.
 */
static void
timer_rearm(void)
{
    vlong nearest;
    int i;
    
    lock(&ltq.lock);
    
    nearest = nanosec() + BACKSTOP_NS_FIXED;
    
    /* Global timers - ONLY CPU0 should check these!
     * Non-CPU0 cannot process them anyway, so checking just
     * creates race conditions where they see expired timers
     * before CPU0 has advanced them. */
    if(curcpuid == 0){
        vlong global = global_timer_deadline();
        if(global < nearest)
            nearest = global;
    }
    
    /* Per-CPU deadlines */
    for(i = 0; i < LTIMER_MAX; i++){
        if(ltq.deadline[i] > 0 && ltq.deadline[i] < nearest)
            nearest = ltq.deadline[i];
    }
    
    ltq.armed = nearest;
    
    unlock(&ltq.lock);
    
    preempt_arm(nearest);
}

/*
 * Initialize timer subsystem - called once per CPU
 */
void
timer_init(void)
{
	if(timer_initialized)
		return;
	
	memset(&ltq, 0, sizeof(ltq));
	ltq.armed = -1;
	
	tsc_freq = _tos->cyclefreq;
	if(tsc_freq == 0)
		tsc_freq = 2900000000ULL;
	
	tsc_to_ns_mult = (1000000000ULL << 32) / tsc_freq;
	
	timer_initialized = 1;
	
	dprint("CPU%d: timer_init tsc_freq=%llud preempt_shift=%d backstop=%lldms\n", curcpuid, tsc_freq, preempt_shift, BACKSTOP_NS_FIXED / 1000000);
}

/*
 * Set a per-CPU timer deadline (LAPIC or TSC deadline only).
 */
void
timer_set(int source, vlong deadline_ns)
{
	int local_source;
	
	/* Map to local timer index */
	switch(source){
	case TIMER_LAPIC:
		local_source = LTIMER_LAPIC;
		break;
	case TIMER_DEADLINE:
		local_source = LTIMER_DEADLINE;
		break;
	default:
		/* Global timer - just trigger rearm */
		timer_rearm();
		return;
	}
	
	lock(&ltq.lock);
	ltq.deadline[local_source] = deadline_ns;
	unlock(&ltq.lock);
	
	timer_rearm();
}

/*
 * Cancel a timer
 */
void
timer_cancel(int source)
{
	timer_set(source, 0);
}

/*
 * Get nearest deadline (for HLT handler)
 */
vlong
timer_nearest(void)
{
    vlong nearest;
    int i;
    
    lock(&ltq.lock);
    
    nearest = VLONG_MAX;
    
    for(i = 0; i < LTIMER_MAX; i++){
        if(ltq.deadline[i] > 0 && ltq.deadline[i] < nearest)
            nearest = ltq.deadline[i];
    }
    
    unlock(&ltq.lock);
    
    /* Only CPU0 should check global timers */
    if(curcpuid == 0){
        vlong global = global_timer_deadline();
        if(global < nearest)
            nearest = global;
    }
    
    return nearest;
}

/*
 * Force rearm - call after modifying shared timer state
 */
void
timer_kick(void)
{
	timer_rearm();
}


/* ============================================================
 * LAPIC Timer
 * ============================================================ */

#define LAPIC_BUS_FREQ_HZ  100000000LL
#define NS_PER_LAPIC_TICK  (1000000000LL / LAPIC_BUS_FREQ_HZ)

extern LApic lapic;
extern void ipi_queue(int, int);

u32int
lapic_divide_value(u32int divide_reg)
{
	static u32int divtab[] = {2, 4, 8, 16, 32, 64, 128, 1};
	u32int idx = (divide_reg & 3) | ((divide_reg >> 1) & 4);
	return divtab[idx];
}

/*
 * Start the LAPIC timer
 */
void
lapic_timer_start(void)
{
	LApic *la = &lapic;
	u32int divide, initial;
	vlong period_ns;
	vlong now;
	
	if(la->timer_initial == 0){
		la->timer_active = 0;
		la->timer_deadline = -1;
		timer_cancel(TIMER_LAPIC);
		return;
	}
	
	divide = lapic_divide_value(la->timer_divide);
	initial = la->timer_initial;
	period_ns = (vlong)initial * (vlong)divide * NS_PER_LAPIC_TICK;
	
	now = nanosec();
	la->timer_deadline = now + period_ns;
	la->timer_active = 1;
	la->timer_current = initial;
	
	dprint("LAPIC%d: timer started, initial=%ud divide=%ud period=%lldns deadline=%lld\n", curcpuid, initial, divide, period_ns, la->timer_deadline);
	
	/* Only arm if not masked */
	if(!(la->lvt_timer & (1<<16)))
		timer_set(TIMER_LAPIC, la->timer_deadline);
}

/*
 * Advance LAPIC timer - called on VM exit
 */
void
lapic_timer_advance(void)
{
	LApic *la = &lapic;
	u32int vector;
	int periodic;
	vlong now;
	
	if(!la->timer_active || la->timer_deadline < 0)
		return;
	
	now = nanosec();
	if(now < la->timer_deadline)
		return;
	
	vector = la->lvt_timer & 0xFF;
	periodic = (la->lvt_timer >> 17) & 1;
	
	dprint("LAPIC%d: FIRING timer! now=%lld deadline=%lld vec=%d\n", curcpuid, now, la->timer_deadline, vector);
	
	/* Fire the interrupt if not masked and vector is valid */
	if(vector != 0 && (la->lvt_timer & (1<<16)) == 0)
		ipi_queue(curcpuid, vector);
	
	la->timer_current = 0;
	
	if(periodic){
		/* Restart for next period */
		lapic_timer_start();
	} else {
		/* One-shot: disable */
		la->timer_active = 0;
		la->timer_deadline = -1;
		timer_cancel(TIMER_LAPIC);
	}
}

u32int
lapic_read_current_count(void)
{
	LApic *la = &lapic;
	
	if(la->timer_deadline > 0){
		vlong now = nanosec();
		vlong remain = la->timer_deadline - now;
		if(remain > 0){
			u32int div = lapic_divide_value(la->timer_divide);
			vlong ticks = remain / ((vlong)div * NS_PER_LAPIC_TICK);
			if(ticks > 0xFFFFFFFFLL)
				ticks = 0xFFFFFFFF;
			return (u32int)ticks;
		}
	}
	return 0;
}

void
lapic_write_initial_count(u32int val)
{
	LApic *la = &lapic;
	
	dprint("LAPIC%d: TIMER_INIT = %ud\n", curcpuid, val);
	
	la->timer_initial = val;
	lapic_timer_start();
}

void
lapic_write_divide_config(u32int val)
{
	lapic.timer_divide = val & 0xB;
}

void
lapic_write_lvt_timer(u32int val)
{
	LApic *la = &lapic;
	int old_mode = TIMER_MODE(la->lvt_timer);
	int new_mode = TIMER_MODE(val);
	
	la->lvt_timer = val;
	
	/* Mode change disarms deadline */
	if(old_mode != new_mode){
		la->tsc_deadline = 0;
		la->tsc_deadline_armed = 0;
		timer_cancel(TIMER_DEADLINE);
	}
	
	dprint("LAPIC%d: LVT_TIMER = %#x (vec=%d mode=%d mask=%d)\n", curcpuid, val, val & 0xFF, new_mode, (val >> 16) & 1);
	
	/* Only start counter-based timer if not in deadline mode */
	if(new_mode != MODE_DEADLINE && !(val & (1<<16)) && la->timer_initial > 0)
		lapic_timer_start();
}

void
lapic_init_cpu(void)
{
	lapic.id = curcpuid << 24;
	lapic.lvt_timer = 1 << 16;  /* Masked initially */
	lapic.timer_initial = 0;
	lapic.timer_current = 0;
	lapic.timer_divide = 0;
	lapic.timer_deadline = -1;
	lapic.timer_active = 0;
}

void
lapic_timer_init(void)
{
	lapic_init_cpu();
	timer_init();
	
	extern u32int isr_bitmap[8];
	memset(isr_bitmap, 0, sizeof(isr_bitmap));
}

/* ============================================================
 * TSC Deadline Mode
 * ============================================================ */

void
lapic_write_tsc_deadline(uvlong val)
{
	LApic *la = &lapic;
	
	dprint("CPU%d: TSC_DEADLINE=%llud\n", curcpuid, val);
	
	la->tsc_deadline = val;
	
	if(val == 0){
		la->tsc_deadline_armed = 0;
		timer_cancel(TIMER_DEADLINE);
		return;
	}
	
	if(TIMER_MODE(la->lvt_timer) == MODE_DEADLINE && !(la->lvt_timer & (1<<16))){
		la->tsc_deadline_armed = 1;
		
		uvlong tsc_now;
		cycles(&tsc_now);
		uvlong guest_tsc = tsc_now + cached_tscoff;
		
		if(val > guest_tsc){
			uvlong delta_tsc = val - guest_tsc;
			uvlong delta_ns = (delta_tsc * tsc_to_ns_mult) >> tsc_to_ns_shift;
			timer_set(TIMER_DEADLINE, nanosec() + delta_ns);
		} else {
			/* Already expired */
			timer_set(TIMER_DEADLINE, nanosec());
		}
	}
}

uvlong
lapic_read_tsc_deadline(void)
{
	LApic *la = &lapic;
	
	if(TIMER_MODE(la->lvt_timer) != MODE_DEADLINE)
		return 0;
	
	return la->tsc_deadline;
}

void
lapic_tsc_deadline_check(void)
{
	LApic *la = &lapic;
	uvlong host_tsc, guest_tsc;
	u32int vector;
	
	if(!la->tsc_deadline_armed)
		return;
	
	if(TIMER_MODE(la->lvt_timer) != MODE_DEADLINE)
		return;
	
	cycles(&host_tsc);
	guest_tsc = host_tsc + cached_tscoff;
	
	if(guest_tsc >= la->tsc_deadline){
		vector = la->lvt_timer & 0xFF;
		
		dprint("LAPIC%d: TSC deadline fired! vec=%d\n", curcpuid, vector);
		
		la->tsc_deadline = 0;
		la->tsc_deadline_armed = 0;
		timer_cancel(TIMER_DEADLINE);
		
		if(!(la->lvt_timer & (1<<16)) && vector != 0)
			ipi_queue(curcpuid, vector);
	} else {
		/* 
		 * TSC hasn't caught up yet - recalculate nanosecond deadline
		 * based on actual remaining TSC ticks to prevent tight loop
		 */
		uvlong delta_tsc = la->tsc_deadline - guest_tsc;
		uvlong delta_ns = (delta_tsc * tsc_to_ns_mult) >> tsc_to_ns_shift;
		if(delta_ns < 100000)
			delta_ns = 100000;  /* Minimum 100µs to prevent spin */
		ltq.deadline[LTIMER_DEADLINE] = nanosec() + delta_ns;
	}
}
/* ============================================================
 * Unified Timer Advance - Called on preempt exit and HLT
 * ============================================================ */

void
timers_advance(void)
{
	extern void hpet_advance(void);
	extern void pitadvance(void);
	extern void rtcadvance(void);
	
	if (curcpuid == 0){ 
		rtcadvance();
		pitadvance();
		hpet_advance();
	}
		
	/* Per-CPU timers (each CPU has its own copy due to fork) */
	lapic_timer_advance();
	lapic_tsc_deadline_check();
	
	/* Rearm for next deadline */
	timer_rearm();
}

/* ============================================================
 * vmtime - Unified time base
 * ============================================================ */

static int vmtime_initialized;

void
vmtime_init(void)
{
	vmtime_initialized = 1;
	dprint("vmtime_init: using nanosec() directly\n");
}

uvlong
vmtime_ns(void)
{
	if(!vmtime_initialized)
		return 0;
	return nanosec();
}