ref: 14cfa56253beb8a2ad8e4700c8c26a6ff6d16ee7
parent: 473a93d794e0e83295e012dd66969ad569044a8d
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Apr 7 14:37:40 EDT 2025
kernel: add monotonic time to /dev/time, /dev/bintime (thanks rsc) In many places in the system, from benchmarking to scheduling events, we want to use monotonic time. We didn't have it, so until now, we've been using epoch nanoseconds. The problem with this is that time moves backward when aux/timesync moves the time-of-day backward. This change adds an extra field to /dev/time and /dev/bintime giving a monotonic time in nanoseconds since system boot. Programs that do long enough reads will get this extra field. Programs that don't do long enough reads will not. No programs should be confused. Speaking of being confused, also rename vlong2le etc to vlong2be since they are converting to big-endian not little-endian.
--- a/sys/man/3/cons
+++ b/sys/man/3/cons
@@ -209,8 +209,9 @@
back to zero between subsequent accesses.
A read of
.B bintime
-returns 24 bytes, three 8 byte numbers, representing nanoseconds
-since start of epoch, clock ticks, and clock frequency.
+returns 32 bytes, four 8 byte numbers, representing nanoseconds
+since start of epoch, clock ticks, clock frequency,
+and a monotonically increasing count of nanoseconds with an unspecified epoch.
.PP
A write to
.B bintime
@@ -260,8 +261,9 @@
The
.B time
file holds one 32-bit number representing the seconds since start of epoch
-and three 64-bit numbers, representing nanoseconds since
-start of epoch, clock ticks, and clock frequency.
+and four 64-bit numbers, representing nanoseconds since start of epoch,
+clock ticks, clock frequency,
+and a monotonically increasing count of nanoseconds with an unspecified epoch.
.PP
A write of a decimal number to
.B time
--- a/sys/src/9/arm64/trap.c
+++ b/sys/src/9/arm64/trap.c
@@ -221,7 +221,7 @@
up->procctl = Proc_stopme;
procctl();
splx(s);
- startns = todget(nil);
+ todget(nil, &startns);
}
if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -244,7 +244,7 @@
}
ureg->r0 = ret;
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--- a/sys/src/9/cycv/trap.c
+++ b/sys/src/9/cycv/trap.c
@@ -233,7 +233,7 @@
up->procctl = Proc_stopme;
procctl();
splx(s);
- startns = todget(nil);
+ todget(nil, &startns);
}
if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -257,7 +257,7 @@
ureg->r0 = ret;
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--- a/sys/src/9/kw/syscall.c
+++ b/sys/src/9/kw/syscall.c
@@ -187,7 +187,7 @@
up->procctl = Proc_stopme;
procctl();
splx(s);
- startns = todget(nil);
+ todget(nil, &startns);
}
if(scallnr >= nsyscall || systab[scallnr] == nil){
postnote(up, 1, "sys: bad sys call", NDebug);
@@ -219,7 +219,7 @@
ureg->r0 = ret;
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--- a/sys/src/9/mt7688/syscall.c
+++ b/sys/src/9/mt7688/syscall.c
@@ -53,7 +53,7 @@
up->procctl = Proc_stopme;
procctl();
splx(s);
- startns = todget(nil);
+ todget(nil, &startns);
}
if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -89,7 +89,7 @@
ureg->r1 = ret;
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--- a/sys/src/9/pc/devlml.c
+++ b/sys/src/9/pc/devlml.c
@@ -396,7 +396,7 @@
statcom = lml->codedata->statCom[fno];
jpgheader = (FrameHeader *)(lml->codedata->frag[fno].hdr + 2);
jpgheader->frameNo = lml->jpgframeno;
- jpgheader->ftime = todget(nil);
+ jpgheader->ftime = todget(nil, nil);
jpgheader->frameSize = (statcom & 0x00ffffff) >> 1;
jpgheader->frameSeqNo = statcom >> 24;
wakeup(&lml->sleepjpg);
--- a/sys/src/9/pc/trap.c
+++ b/sys/src/9/pc/trap.c
@@ -493,7 +493,7 @@
up->procctl = Proc_stopme;
procctl();
splx(s);
- startns = todget(nil);
+ todget(nil, &startns);
}
if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -528,7 +528,7 @@
ureg->ax = ret;
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -184,7 +184,7 @@
i8250console();
quotefmtinstall();
screeninit();
- print("\nPlan 9\n");
+ print("\nPlan X\n");
cpuidentify();
meminit0();
archinit();
--- a/sys/src/9/pc64/trap.c
+++ b/sys/src/9/pc64/trap.c
@@ -472,7 +472,7 @@
up->procctl = Proc_stopme;
procctl();
splx(s);
- startns = todget(nil);
+ todget(nil, &startns);
}
if(scallnr >= nsyscall || systab[scallnr] == nil){
postnote(up, 1, "sys: bad sys call", NDebug);
@@ -504,7 +504,7 @@
}
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--- a/sys/src/9/port/devcons.c
+++ b/sys/src/9/port/devcons.c
@@ -817,7 +817,7 @@
static uvlong uvorder = 0x0001020304050607ULL;
static uchar*
-le2vlong(vlong *to, uchar *f)
+be2vlong(vlong *to, uchar *f)
{
uchar *t, *o;
int i;
@@ -830,7 +830,7 @@
}
static uchar*
-vlong2le(uchar *t, vlong from)
+vlong2be(uchar *t, vlong from)
{
uchar *f, *o;
int i;
@@ -845,7 +845,7 @@
static long order = 0x00010203;
static uchar*
-le2long(long *to, uchar *f)
+be2long(long *to, uchar *f)
{
uchar *t, *o;
int i;
@@ -857,19 +857,6 @@
return f+sizeof(long);
}
-static uchar*
-long2le(uchar *t, long from)
-{
- uchar *f, *o;
- int i;
-
- f = (uchar*)&from;
- o = (uchar*)ℴ
- for(i = 0; i < sizeof(long); i++)
- t[i] = f[o[i]];
- return t+sizeof(long);
-}
-
char *Ebadtimectl = "bad time control";
/*
@@ -880,19 +867,20 @@
static int
readtime(ulong off, char *buf, int n)
{
- vlong nsec, ticks;
+ vlong nsec, ticks, mono;
long sec;
- char str[7*NUMSIZE];
+ char str[9*NUMSIZE];
- nsec = todget(&ticks);
+ nsec = todget(&ticks, &mono);
if(fasthz == 0LL)
fastticks((uvlong*)&fasthz);
sec = nsec/1000000000ULL;
- snprint(str, sizeof(str), "%*lud %*llud %*llud %*llud ",
+ snprint(str, sizeof(str), "%*lud %*llud %*llud %*llud %*llud ",
NUMSIZE-1, sec,
VLNUMSIZE-1, nsec,
VLNUMSIZE-1, ticks,
- VLNUMSIZE-1, fasthz);
+ VLNUMSIZE-1, fasthz,
+ VLNUMSIZE-1, mono);
return readstr(off, buf, n, str);
}
@@ -926,23 +914,27 @@
readbintime(char *buf, int n)
{
int i;
- vlong nsec, ticks;
+ vlong nsec, ticks, mono;
uchar *b = (uchar*)buf;
i = 0;
if(fasthz == 0LL)
fastticks((uvlong*)&fasthz);
- nsec = todget(&ticks);
+ nsec = todget(&ticks, &mono);
+ if(n >= 4*sizeof(uvlong)){
+ vlong2be(b+3*sizeof(uvlong), mono);
+ i += sizeof(uvlong);
+ }
if(n >= 3*sizeof(uvlong)){
- vlong2le(b+2*sizeof(uvlong), fasthz);
+ vlong2be(b+2*sizeof(uvlong), fasthz);
i += sizeof(uvlong);
}
if(n >= 2*sizeof(uvlong)){
- vlong2le(b+sizeof(uvlong), ticks);
+ vlong2be(b+sizeof(uvlong), ticks);
i += sizeof(uvlong);
}
if(n >= 8){
- vlong2le(b, nsec);
+ vlong2be(b, nsec);
i += sizeof(vlong);
}
return i;
@@ -968,20 +960,20 @@
case 'n':
if(n < sizeof(vlong))
error(Ebadtimectl);
- le2vlong(&delta, p);
+ be2vlong(&delta, p);
todset(delta, 0, 0);
break;
case 'd':
if(n < sizeof(vlong)+sizeof(long))
error(Ebadtimectl);
- p = le2vlong(&delta, p);
- le2long(&period, p);
+ p = be2vlong(&delta, p);
+ be2long(&period, p);
todset(-1, delta, period);
break;
case 'f':
if(n < sizeof(uvlong))
error(Ebadtimectl);
- le2vlong(&fasthz, p);
+ be2vlong(&fasthz, p);
if(fasthz <= 0)
error(Ebadtimectl);
todsetfreq(fasthz);
--- a/sys/src/9/port/devloopback.c
+++ b/sys/src/9/port/devloopback.c
@@ -553,7 +553,7 @@
bp = padblock(bp, Tmsize);
if(BLEN(bp) < lb->minmtu)
bp = adjustblock(bp, lb->minmtu);
- ptime(bp->rp, todget(nil));
+ ptime(bp->rp, todget(nil, nil));
link->packets++;
link->bytes += n;
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -283,7 +283,7 @@
te->pid = p->pid;
te->etype = etype;
if (ts == 0)
- te->time = todget(nil);
+ todget(nil, &te->time);
else
te->time = ts;
tproduced++;
--- a/sys/src/9/port/edf.c
+++ b/sys/src/9/port/edf.c
@@ -195,7 +195,7 @@
DPRINT("%lud release %lud[%s], r=%lud, d=%lud, t=%lud, S=%lud\n",
now, p->pid, statename[p->state], e->r, e->d, e->t, e->S);
if(pt = proctrace){
- nowns = todget(nil);
+ todget(nil, &nowns);
pt(p, SRelease, nowns);
pt(p, SDeadline, nowns + 1000LL*e->D);
}
@@ -291,6 +291,7 @@
Edf *e;
void (*pt)(Proc*, int, vlong);
long tns;
+ vlong tnow;
e = p->edf;
/* Called with edflock held */
@@ -315,8 +316,10 @@
}else{
DPRINT("v");
}
- if(p->trace && (pt = proctrace))
- pt(p, SInte, todget(nil) + e->tns);
+ if(p->trace && (pt = proctrace)){
+ todget(nil, &tnow);
+ pt(p, SInte, tnow + e->tns);
+ }
e->tmode = Trelative;
e->tf = deadlineintr;
e->ta = p;
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -366,7 +366,7 @@
ulong tk2ms(ulong);
#define TK2MS(x) ((x)*(1000/HZ))
uvlong tod2fastticks(vlong);
-vlong todget(vlong*);
+vlong todget(vlong*, vlong*);
void todsetfreq(vlong);
void todinit(void);
void todset(vlong, vlong, int);
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -1257,12 +1257,12 @@
/* return in register on 64bit machine */
if(sizeof(uintptr) == sizeof(vlong)){
USED(list);
- return (uintptr)todget(nil);
+ return (uintptr)todget(nil, nil);
}
v = va_arg(list, vlong*);
evenaddr((uintptr)v);
validaddr((uintptr)v, sizeof(vlong), 1);
- *v = todget(nil);
+ *v = todget(nil, nil);
return 0;
}
--- a/sys/src/9/port/taslock.c
+++ b/sys/src/9/port/taslock.c
@@ -36,6 +36,7 @@
lock(Lock *l)
{
int i;
+ vlong mono;
uintptr pc;
pc = getcallerpc(&l);
--- a/sys/src/9/port/tod.c
+++ b/sys/src/9/port/tod.c
@@ -44,7 +44,9 @@
uvlong udivider; /* ticks = (µdivider*µs)>>31 */
vlong hz; /* frequency of fast clock */
vlong last; /* last reading of fast clock */
- vlong off; /* offset from epoch to last */
+ vlong off; /* offset from epoch to last (ns) */
+ vlong monolast; /* last reading of fast clocks for monotonic time */
+ vlong monooff; /* offset from 0 to monolast (ns) */
vlong lasttime; /* last return value from todget */
vlong delta; /* add 'delta' each slow clock tick from sstart to send */
ulong sstart; /* ... */
@@ -61,6 +63,7 @@
ilock(&tod);
tod.init = 1; /* prevent reentry via fastticks */
tod.last = fastticks((uvlong *)&tod.hz);
+ tod.monolast = tod.last;
iunlock(&tod);
todsetfreq(tod.hz);
addclock0link(todfix, 100);
@@ -67,14 +70,36 @@
}
/*
+ * return monotonic ns; tod must be locked
+ */
+static vlong
+todmono(vlong ticks)
+{
+ uvlong x;
+ vlong diff;
+
+ if(tod.hz == 0) /* called from first todsetfreq */
+ return 0;
+ diff = ticks - tod.monolast;
+ mul64fract(&x, diff, tod.multiplier);
+ x += tod.monooff;
+ return x;
+}
+
+/*
* calculate multiplier
*/
void
todsetfreq(vlong f)
{
+ vlong ticks;
+
if (f <= 0)
panic("todsetfreq: freq %lld <= 0", f);
ilock(&tod);
+ ticks = fastticks(nil);
+ tod.monooff = todmono(ticks);
+ tod.monolast = ticks;
tod.hz = f;
/* calculate multiplier for time conversion */
@@ -125,10 +150,10 @@
* get time of day
*/
vlong
-todget(vlong *ticksp)
+todget(vlong *ticksp, vlong *monop)
{
uvlong x;
- vlong ticks, diff;
+ vlong ticks, diff, mono;
ulong t;
if(!tod.init)
@@ -159,16 +184,21 @@
mul64fract(&x, diff, tod.multiplier);
x += tod.off;
- /* time can't go backwards */
+ /* time can't go backwards (except when /dev/[bin]time is written) */
if(x < tod.lasttime)
x = tod.lasttime;
else
tod.lasttime = x;
+ mono = 0;
+ if(monop != nil)
+ mono = todmono(ticks);
iunlock(&tod);
if(ticksp != nil)
*ticksp = ticks;
+ if(monop != nil)
+ *monop = mono;
return x;
}
@@ -219,7 +249,7 @@
long
seconds(void)
{
- return (vlong)todget(nil) / TODFREQ;
+ return (vlong)todget(nil, nil) / TODFREQ;
}
uvlong
--- a/sys/src/9/sgi/trap.c
+++ b/sys/src/9/sgi/trap.c
@@ -612,7 +612,7 @@
if(up->syscalltrace)
free(up->syscalltrace);
up->syscalltrace = nil;
- *startnsp = todget(nil);
+ todget(nil, startnsp);
}
}
@@ -619,12 +619,14 @@
static void
sctracefinish(ulong scallnr, ulong sp, int ret, vlong startns)
{
+ vlong stopns;
int s;
if(up->procctl == Proc_tracesyscall){
+ todget(nil, &stopns);
up->procctl = Proc_stopme;
sysretfmt(scallnr, (va_list)(sp+BY2WD), ret,
- startns, todget(nil));
+ startns, stopns);
s = splhi();
procctl();
splx(s);
--- a/sys/src/9/teg2/syscall.c
+++ b/sys/src/9/teg2/syscall.c
@@ -199,7 +199,7 @@
up->nerrlab = 0;
ret = -1;
- startns = todget(nil);
+ todget(nil, &startns);
l1cache->wb(); /* system is more stable with this */
if(!waserror()){
@@ -237,7 +237,7 @@
ureg->r0 = ret;
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--- a/sys/src/9/zynq/trap.c
+++ b/sys/src/9/zynq/trap.c
@@ -252,7 +252,7 @@
up->procctl = Proc_stopme;
procctl();
splx(s);
- startns = todget(nil);
+ todget(nil, &startns);
}
if(scallnr >= nsyscall || systab[scallnr] == nil){
postnote(up, 1, "sys: bad sys call", NDebug);
@@ -275,7 +275,7 @@
ureg->r0 = ret;
if(up->procctl == Proc_tracesyscall){
- stopns = todget(nil);
+ todget(nil, &stopns);
sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
s = splhi();
up->procctl = Proc_stopme;
--
⑨