ref: e0e889fb69a6d4d3f2332244ef4f79f900a66d84
dir: /pvclock.c/
/*
* pvclock.c - KVM paravirtual clock
*
* The guest computes time as:
* time_ns = system_time + ((rdtsc - tsc_timestamp) * mul >> shift)
*
* CRITICAL: tsc_timestamp must be in GUEST TSC units (host + offset)
* because that's what guest's rdtsc instruction returns.
*/
#include <u.h>
#include <libc.h>
#include <tos.h>
#include <thread.h>
#include "dat.h"
#include "fns.h"
#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
typedef struct PVClock PVClock;
struct PVClock {
u32int version;
u32int pad0;
u64int tsc_timestamp;
u64int system_time;
u32int tsc_to_system_mul;
s8int tsc_shift;
u8int flags;
u8int pad[2];
};
typedef struct PVWallClock PVWallClock;
struct PVWallClock {
u32int version;
u32int sec;
u32int nsec;
};
static uvlong pvclock_gpa[MAXVCPU];
static uvlong wallclock_gpa;
static int pvclock_enabled[MAXVCPU];
/* Cached conversion parameters */
static u32int tsc_mul;
static s8int tsc_shift;
static uvlong tsc_freq;
/* Boot time references - set once at init */
static uvlong boot_ns;
/* From vmx.c - the TSC offset applied to guest */
extern vlong cached_tscoff;
/*
* Compute mul and shift for TSC -> nanosecond conversion.
*/
static void
compute_tsc_params(void)
{
uvlong mul64;
tsc_freq = _tos->cyclefreq;
if(tsc_freq == 0)
tsc_freq = 2900000000ULL;
tsc_shift = 0;
mul64 = (1000000000ULL << 32) / tsc_freq;
if(mul64 > 0xFFFFFFFFULL){
tsc_shift = -1;
mul64 = (1000000000ULL << 31) / tsc_freq;
while(mul64 > 0xFFFFFFFFULL && tsc_shift > -10){
tsc_shift--;
mul64 = (1000000000ULL << (32 + tsc_shift)) / tsc_freq;
}
}
tsc_mul = (u32int)mul64;
dprint("pvclock: tsc_freq=%llud mul=%ud shift=%d\n",
tsc_freq, tsc_mul, tsc_shift);
}
void
pvclock_init(void)
{
compute_tsc_params();
boot_ns = nanosec();
dprint("pvclock: boot_ns=%llud\n", boot_ns);
}
/*
* Update pvclock structure for a CPU.
* Sets the reference point so guest can compute current time.
*/
void
pvclock_update(int cpu)
{
PVClock *pv;
uvlong host_tsc, guest_tsc, now_ns;
if(cpu < 0 || cpu >= MAXVCPU || !pvclock_enabled[cpu])
return;
pv = gptr(pvclock_gpa[cpu], sizeof(PVClock));
if(pv == nil)
return;
/* Make version odd during update */
pv->version |= 1;
coherence();
/* Get current host TSC and convert to guest TSC */
cycles(&host_tsc);
guest_tsc = host_tsc + cached_tscoff;
/* Current time in nanoseconds since boot */
now_ns = nanosec() - boot_ns;
/*
* Set reference point using GUEST TSC.
* Guest computes: system_time + (rdtsc - tsc_timestamp) * mul >> shift
*
* rdtsc returns guest_tsc, so tsc_timestamp must also be guest_tsc.
*/
pv->tsc_timestamp = guest_tsc;
pv->system_time = now_ns;
pv->tsc_to_system_mul = tsc_mul;
pv->tsc_shift = tsc_shift;
pv->flags = 1; /* TSC stable */
coherence();
pv->version++; /* Make even = stable */
}
static void
pvclock_setup(int cpu)
{
dprint("CPU%d: pvclock setup gpa=%#llux tscoff=%lld\n",
cpu, pvclock_gpa[cpu], (vlong)cached_tscoff);
pvclock_update(cpu);
}
static void
wallclock_update(void)
{
PVWallClock *wc;
vlong wall_sec;
uvlong ns_since_boot;
if(wallclock_gpa == 0)
return;
wc = gptr(wallclock_gpa, sizeof(PVWallClock));
if(wc == nil)
return;
wc->version = 1;
coherence();
wall_sec = time(nil);
ns_since_boot = nanosec() - boot_ns;
wall_sec -= ns_since_boot / 1000000000ULL;
wc->sec = wall_sec;
wc->nsec = 0;
coherence();
wc->version = 2;
dprint("wallclock: sec=%lld at boot\n", wall_sec);
}
int
pvclock_msr(int cpu, int write, u32int msr, u64int *val)
{
if(cpu < 0 || cpu >= MAXVCPU)
return 0;
switch(msr){
case MSR_KVM_SYSTEM_TIME_NEW:
if(write){
pvclock_gpa[cpu] = *val & ~1ULL;
pvclock_enabled[cpu] = *val & 1;
dprint("CPU%d: pvclock MSR write gpa=%#llux enabled=%d\n",
cpu, pvclock_gpa[cpu], pvclock_enabled[cpu]);
if(pvclock_enabled[cpu])
pvclock_setup(cpu);
} else {
*val = pvclock_gpa[cpu] | pvclock_enabled[cpu];
}
return 1;
case MSR_KVM_WALL_CLOCK_NEW:
if(write){
wallclock_gpa = *val;
dprint("wallclock: GPA set to %#llux\n", wallclock_gpa);
wallclock_update();
} else {
*val = wallclock_gpa;
}
return 1;
}
return 0;
}