shithub: riscv

Download patch

ref: 14cfa56253beb8a2ad8e4700c8c26a6ff6d16ee7
parent: 473a93d794e0e83295e012dd66969ad569044a8d
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Apr 7 14:37:40 EDT 2025

kernel: add monotonic time to /dev/time, /dev/bintime (thanks rsc)

In many places in the system, from benchmarking to scheduling
events, we want to use monotonic time. We didn't have it, so
until now, we've been using epoch nanoseconds. The problem
with this is that time moves backward when aux/timesync moves
the time-of-day backward.

This change adds an extra field to /dev/time and /dev/bintime
giving a monotonic time in nanoseconds since system boot.
Programs that do long enough reads will get this extra field.
Programs that don't do long enough reads will not.
No programs should be confused.

Speaking of being confused, also rename vlong2le etc to vlong2be
since they are converting to big-endian not little-endian.

--- a/sys/man/3/cons
+++ b/sys/man/3/cons
@@ -209,8 +209,9 @@
 back to zero between subsequent accesses.
 A read of
 .B bintime
-returns 24 bytes, three 8 byte numbers, representing nanoseconds
-since start of epoch, clock ticks, and clock frequency.
+returns 32 bytes, four 8 byte numbers, representing nanoseconds
+since start of epoch, clock ticks, clock frequency,
+and a monotonically increasing count of nanoseconds with an unspecified epoch.
 .PP
 A write to
 .B bintime
@@ -260,8 +261,9 @@
 The
 .B time
 file holds one 32-bit number representing the seconds since start of epoch
-and three 64-bit numbers, representing nanoseconds since
-start of epoch, clock ticks, and clock frequency.
+and four 64-bit numbers, representing nanoseconds since start of epoch,
+clock ticks, clock frequency,
+and a monotonically increasing count of nanoseconds with an unspecified epoch.
 .PP
 A write of a decimal number to
 .B time
--- a/sys/src/9/arm64/trap.c
+++ b/sys/src/9/arm64/trap.c
@@ -221,7 +221,7 @@
 			up->procctl = Proc_stopme;
 			procctl();
 			splx(s);
-			startns = todget(nil);
+			todget(nil, &startns);
 		}
 		
 		if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -244,7 +244,7 @@
 	}
 	ureg->r0 = ret;
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--- a/sys/src/9/cycv/trap.c
+++ b/sys/src/9/cycv/trap.c
@@ -233,7 +233,7 @@
 			up->procctl = Proc_stopme;
 			procctl();
 			splx(s);
-			startns = todget(nil);
+			todget(nil, &startns);
 		}
 		
 		if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -257,7 +257,7 @@
 	
 	ureg->r0 = ret;
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--- a/sys/src/9/kw/syscall.c
+++ b/sys/src/9/kw/syscall.c
@@ -187,7 +187,7 @@
 			up->procctl = Proc_stopme;
 			procctl();
 			splx(s);
-			startns = todget(nil);
+			todget(nil, &startns);
 		}
 		if(scallnr >= nsyscall || systab[scallnr] == nil){
 			postnote(up, 1, "sys: bad sys call", NDebug);
@@ -219,7 +219,7 @@
 	ureg->r0 = ret;
 
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--- a/sys/src/9/mt7688/syscall.c
+++ b/sys/src/9/mt7688/syscall.c
@@ -53,7 +53,7 @@
 			up->procctl = Proc_stopme;
 			procctl();
 			splx(s);
-			startns = todget(nil);
+			todget(nil, &startns);
 		}
 
 		if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -89,7 +89,7 @@
 	ureg->r1 = ret;
 
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--- a/sys/src/9/pc/devlml.c
+++ b/sys/src/9/pc/devlml.c
@@ -396,7 +396,7 @@
 		statcom = lml->codedata->statCom[fno];
 		jpgheader = (FrameHeader *)(lml->codedata->frag[fno].hdr + 2);
 		jpgheader->frameNo = lml->jpgframeno;
-		jpgheader->ftime  = todget(nil);
+		jpgheader->ftime  = todget(nil, nil);
 		jpgheader->frameSize = (statcom & 0x00ffffff) >> 1;
 		jpgheader->frameSeqNo = statcom >> 24;
 		wakeup(&lml->sleepjpg);
--- a/sys/src/9/pc/trap.c
+++ b/sys/src/9/pc/trap.c
@@ -493,7 +493,7 @@
 			up->procctl = Proc_stopme;
 			procctl();
 			splx(s);
-			startns = todget(nil);
+			todget(nil, &startns);
 		}
 
 		if(scallnr >= nsyscall || systab[scallnr] == nil){
@@ -528,7 +528,7 @@
 	ureg->ax = ret;
 
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -184,7 +184,7 @@
 	i8250console();
 	quotefmtinstall();
 	screeninit();
-	print("\nPlan 9\n");
+	print("\nPlan X\n");
 	cpuidentify();
 	meminit0();
 	archinit();
--- a/sys/src/9/pc64/trap.c
+++ b/sys/src/9/pc64/trap.c
@@ -472,7 +472,7 @@
 			up->procctl = Proc_stopme;
 			procctl();
 			splx(s);
-			startns = todget(nil);
+			todget(nil, &startns);
 		}
 		if(scallnr >= nsyscall || systab[scallnr] == nil){
 			postnote(up, 1, "sys: bad sys call", NDebug);
@@ -504,7 +504,7 @@
 	}
 
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--- a/sys/src/9/port/devcons.c
+++ b/sys/src/9/port/devcons.c
@@ -817,7 +817,7 @@
 static uvlong uvorder = 0x0001020304050607ULL;
 
 static uchar*
-le2vlong(vlong *to, uchar *f)
+be2vlong(vlong *to, uchar *f)
 {
 	uchar *t, *o;
 	int i;
@@ -830,7 +830,7 @@
 }
 
 static uchar*
-vlong2le(uchar *t, vlong from)
+vlong2be(uchar *t, vlong from)
 {
 	uchar *f, *o;
 	int i;
@@ -845,7 +845,7 @@
 static long order = 0x00010203;
 
 static uchar*
-le2long(long *to, uchar *f)
+be2long(long *to, uchar *f)
 {
 	uchar *t, *o;
 	int i;
@@ -857,19 +857,6 @@
 	return f+sizeof(long);
 }
 
-static uchar*
-long2le(uchar *t, long from)
-{
-	uchar *f, *o;
-	int i;
-
-	f = (uchar*)&from;
-	o = (uchar*)&order;
-	for(i = 0; i < sizeof(long); i++)
-		t[i] = f[o[i]];
-	return t+sizeof(long);
-}
-
 char *Ebadtimectl = "bad time control";
 
 /*
@@ -880,19 +867,20 @@
 static int
 readtime(ulong off, char *buf, int n)
 {
-	vlong	nsec, ticks;
+	vlong	nsec, ticks, mono;
 	long sec;
-	char str[7*NUMSIZE];
+	char str[9*NUMSIZE];
 
-	nsec = todget(&ticks);
+	nsec = todget(&ticks, &mono);
 	if(fasthz == 0LL)
 		fastticks((uvlong*)&fasthz);
 	sec = nsec/1000000000ULL;
-	snprint(str, sizeof(str), "%*lud %*llud %*llud %*llud ",
+	snprint(str, sizeof(str), "%*lud %*llud %*llud %*llud %*llud ",
 		NUMSIZE-1, sec,
 		VLNUMSIZE-1, nsec,
 		VLNUMSIZE-1, ticks,
-		VLNUMSIZE-1, fasthz);
+		VLNUMSIZE-1, fasthz,
+		VLNUMSIZE-1, mono);
 	return readstr(off, buf, n, str);
 }
 
@@ -926,23 +914,27 @@
 readbintime(char *buf, int n)
 {
 	int i;
-	vlong nsec, ticks;
+	vlong nsec, ticks, mono;
 	uchar *b = (uchar*)buf;
 
 	i = 0;
 	if(fasthz == 0LL)
 		fastticks((uvlong*)&fasthz);
-	nsec = todget(&ticks);
+	nsec = todget(&ticks, &mono);
+	if(n >= 4*sizeof(uvlong)){
+		vlong2be(b+3*sizeof(uvlong), mono);
+		i += sizeof(uvlong);
+	}
 	if(n >= 3*sizeof(uvlong)){
-		vlong2le(b+2*sizeof(uvlong), fasthz);
+		vlong2be(b+2*sizeof(uvlong), fasthz);
 		i += sizeof(uvlong);
 	}
 	if(n >= 2*sizeof(uvlong)){
-		vlong2le(b+sizeof(uvlong), ticks);
+		vlong2be(b+sizeof(uvlong), ticks);
 		i += sizeof(uvlong);
 	}
 	if(n >= 8){
-		vlong2le(b, nsec);
+		vlong2be(b, nsec);
 		i += sizeof(vlong);
 	}
 	return i;
@@ -968,20 +960,20 @@
 	case 'n':
 		if(n < sizeof(vlong))
 			error(Ebadtimectl);
-		le2vlong(&delta, p);
+		be2vlong(&delta, p);
 		todset(delta, 0, 0);
 		break;
 	case 'd':
 		if(n < sizeof(vlong)+sizeof(long))
 			error(Ebadtimectl);
-		p = le2vlong(&delta, p);
-		le2long(&period, p);
+		p = be2vlong(&delta, p);
+		be2long(&period, p);
 		todset(-1, delta, period);
 		break;
 	case 'f':
 		if(n < sizeof(uvlong))
 			error(Ebadtimectl);
-		le2vlong(&fasthz, p);
+		be2vlong(&fasthz, p);
 		if(fasthz <= 0)
 			error(Ebadtimectl);
 		todsetfreq(fasthz);
--- a/sys/src/9/port/devloopback.c
+++ b/sys/src/9/port/devloopback.c
@@ -553,7 +553,7 @@
 	bp = padblock(bp, Tmsize);
 	if(BLEN(bp) < lb->minmtu)
 		bp = adjustblock(bp, lb->minmtu);
-	ptime(bp->rp, todget(nil));
+	ptime(bp->rp, todget(nil, nil));
 
 	link->packets++;
 	link->bytes += n;
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -283,7 +283,7 @@
 	te->pid = p->pid;
 	te->etype = etype;
 	if (ts == 0)
-		te->time = todget(nil);
+		todget(nil, &te->time);
 	else
 		te->time = ts;
 	tproduced++;
--- a/sys/src/9/port/edf.c
+++ b/sys/src/9/port/edf.c
@@ -195,7 +195,7 @@
 		DPRINT("%lud release %lud[%s], r=%lud, d=%lud, t=%lud, S=%lud\n",
 			now, p->pid, statename[p->state], e->r, e->d, e->t, e->S);
 		if(pt = proctrace){
-			nowns = todget(nil);
+			todget(nil, &nowns);
 			pt(p, SRelease, nowns);
 			pt(p, SDeadline, nowns + 1000LL*e->D);
 		}
@@ -291,6 +291,7 @@
 	Edf *e;
 	void (*pt)(Proc*, int, vlong);
 	long tns;
+	vlong tnow;
 
 	e = p->edf;
 	/* Called with edflock held */
@@ -315,8 +316,10 @@
 		}else{
 			DPRINT("v");
 		}
-		if(p->trace && (pt = proctrace))
-			pt(p, SInte, todget(nil) + e->tns);
+		if(p->trace && (pt = proctrace)){
+			todget(nil, &tnow);
+			pt(p, SInte, tnow + e->tns);
+		}
 		e->tmode = Trelative;
 		e->tf = deadlineintr;
 		e->ta = p;
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -366,7 +366,7 @@
 ulong		tk2ms(ulong);
 #define		TK2MS(x) ((x)*(1000/HZ))
 uvlong		tod2fastticks(vlong);
-vlong		todget(vlong*);
+vlong		todget(vlong*, vlong*);
 void		todsetfreq(vlong);
 void		todinit(void);
 void		todset(vlong, vlong, int);
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -1257,12 +1257,12 @@
 	/* return in register on 64bit machine */
 	if(sizeof(uintptr) == sizeof(vlong)){
 		USED(list);
-		return (uintptr)todget(nil);
+		return (uintptr)todget(nil, nil);
 	}
 
 	v = va_arg(list, vlong*);
 	evenaddr((uintptr)v);
 	validaddr((uintptr)v, sizeof(vlong), 1);
-	*v = todget(nil);
+	*v = todget(nil, nil);
 	return 0;
 }
--- a/sys/src/9/port/taslock.c
+++ b/sys/src/9/port/taslock.c
@@ -36,6 +36,7 @@
 lock(Lock *l)
 {
 	int i;
+	vlong mono;
 	uintptr pc;
 
 	pc = getcallerpc(&l);
--- a/sys/src/9/port/tod.c
+++ b/sys/src/9/port/tod.c
@@ -44,7 +44,9 @@
 	uvlong	udivider;	/* ticks = (µdivider*µs)>>31 */
 	vlong	hz;		/* frequency of fast clock */
 	vlong	last;		/* last reading of fast clock */
-	vlong	off;		/* offset from epoch to last */
+	vlong	off;		/* offset from epoch to last (ns) */
+	vlong	monolast;	/* last reading of fast clocks for monotonic time */
+	vlong	monooff;	/* offset from 0 to monolast (ns) */
 	vlong	lasttime;	/* last return value from todget */
 	vlong	delta;	/* add 'delta' each slow clock tick from sstart to send */
 	ulong	sstart;		/* ... */
@@ -61,6 +63,7 @@
 	ilock(&tod);
 	tod.init = 1;			/* prevent reentry via fastticks */
 	tod.last = fastticks((uvlong *)&tod.hz);
+	tod.monolast = tod.last;
 	iunlock(&tod);
 	todsetfreq(tod.hz);
 	addclock0link(todfix, 100);
@@ -67,14 +70,36 @@
 }
 
 /*
+ *  return monotonic ns; tod must be locked
+ */
+static vlong
+todmono(vlong ticks)
+{
+	uvlong x;
+	vlong diff;
+
+	if(tod.hz == 0) /* called from first todsetfreq */
+		return 0;
+	diff = ticks - tod.monolast;
+	mul64fract(&x, diff, tod.multiplier);
+	x += tod.monooff;
+	return x;
+}
+
+/*
  *  calculate multiplier
  */
 void
 todsetfreq(vlong f)
 {
+	vlong ticks;
+
 	if (f <= 0)
 		panic("todsetfreq: freq %lld <= 0", f);
 	ilock(&tod);
+	ticks = fastticks(nil);
+	tod.monooff = todmono(ticks);
+	tod.monolast = ticks;
 	tod.hz = f;
 
 	/* calculate multiplier for time conversion */
@@ -125,10 +150,10 @@
  *  get time of day
  */
 vlong
-todget(vlong *ticksp)
+todget(vlong *ticksp, vlong *monop)
 {
 	uvlong x;
-	vlong ticks, diff;
+	vlong ticks, diff, mono;
 	ulong t;
 
 	if(!tod.init)
@@ -159,16 +184,21 @@
 	mul64fract(&x, diff, tod.multiplier);
 	x += tod.off;
 
-	/* time can't go backwards */
+	/* time can't go backwards (except when /dev/[bin]time is written) */
 	if(x < tod.lasttime)
 		x = tod.lasttime;
 	else
 		tod.lasttime = x;
 
+	mono = 0;
+	if(monop != nil)
+		mono = todmono(ticks);
 	iunlock(&tod);
 
 	if(ticksp != nil)
 		*ticksp = ticks;
+	if(monop != nil)
+		*monop = mono;
 
 	return x;
 }
@@ -219,7 +249,7 @@
 long
 seconds(void)
 {
-	return (vlong)todget(nil) / TODFREQ;
+	return (vlong)todget(nil, nil) / TODFREQ;
 }
 
 uvlong
--- a/sys/src/9/sgi/trap.c
+++ b/sys/src/9/sgi/trap.c
@@ -612,7 +612,7 @@
 		if(up->syscalltrace)
 			free(up->syscalltrace);
 		up->syscalltrace = nil;
-		*startnsp = todget(nil);
+		todget(nil, startnsp);
 	}
 }
 
@@ -619,12 +619,14 @@
 static void
 sctracefinish(ulong scallnr, ulong sp, int ret, vlong startns)
 {
+	vlong stopns;
 	int s;
 
 	if(up->procctl == Proc_tracesyscall){
+		todget(nil, &stopns);
 		up->procctl = Proc_stopme;
 		sysretfmt(scallnr, (va_list)(sp+BY2WD), ret,
-			startns, todget(nil));
+			startns, stopns);
 		s = splhi();
 		procctl();
 		splx(s);
--- a/sys/src/9/teg2/syscall.c
+++ b/sys/src/9/teg2/syscall.c
@@ -199,7 +199,7 @@
 
 	up->nerrlab = 0;
 	ret = -1;
-	startns = todget(nil);
+	todget(nil, &startns);
 
 	l1cache->wb();			/* system is more stable with this */
 	if(!waserror()){
@@ -237,7 +237,7 @@
 	ureg->r0 = ret;
 
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--- a/sys/src/9/zynq/trap.c
+++ b/sys/src/9/zynq/trap.c
@@ -252,7 +252,7 @@
 			up->procctl = Proc_stopme;
 			procctl();
 			splx(s);
-			startns = todget(nil);
+			todget(nil, &startns);
 		}
 		if(scallnr >= nsyscall || systab[scallnr] == nil){
 			postnote(up, 1, "sys: bad sys call", NDebug);
@@ -275,7 +275,7 @@
 	
 	ureg->r0 = ret;
 	if(up->procctl == Proc_tracesyscall){
-		stopns = todget(nil);
+		todget(nil, &stopns);
 		sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
 		s = splhi();
 		up->procctl = Proc_stopme;
--