shithub: front

Download patch

ref: 49f1b55b7a3ddc7bb97a53ab70fc3e88e3a897aa
parent: 7b4febff70a365887d380ed4f50c7027ae982b37
author: mia soweli <mia@soweli.net>
date: Fri Aug 15 14:09:55 EDT 2025

libc: import atomics from gefs (thanks ori)

extract, fix, and document the atomic operations from gefs
and put them in libc. the underlying types have been
wrapped in a structure to prevent accidental misuse.

thanks to ori for doing the tests and the gefs part of this work.

--- a/sys/include/libc.h
+++ b/sys/include/libc.h
@@ -546,6 +546,42 @@
 #pragma varargck argpos procsetname 1
 
 /*
+ * atomic operations
+ */
+
+typedef struct Along
+{
+	long v;
+} Along;
+
+typedef struct Avlong
+{
+	vlong v;
+} Avlong;
+
+typedef struct Aptr
+{
+	void *v;
+} Aptr;
+
+extern	long	agetl(Along *);
+extern	vlong	agetv(Avlong *);
+extern	void*	agetp(Aptr *);
+
+extern	long	aswapl(Along *, long);
+extern	vlong	aswapv(Avlong *, vlong);
+extern	void*	aswapp(Aptr *, void*);
+
+extern	long	aincl(Along *, long);
+extern	vlong	aincv(Avlong *, vlong);
+
+extern	int	acasl(Along *, long, long);
+extern	int	acasv(Avlong *, vlong, vlong);
+extern	int	acasp(Aptr *, void*, void*);
+
+extern	void	coherence(void);
+
+/*
  *  network dialing
  */
 #define NETPATHLEN 40
--- /dev/null
+++ b/sys/man/2/atomic
@@ -1,0 +1,95 @@
+.TH atomic 2
+.SH NAME
+agetl,
+agetv,
+agetp,
+asetl,
+asetv,
+asetp,
+aincl,
+aincv,
+acasl,
+acasv,
+acasp,
+coherence
+\- atomic operations
+.SH SYNOPSIS
+.ta 0.7i +0.7i +0.7i +0.7i +0.7i +0.7i +0.7i
+.EX
+#include <u.h>
+#include <libc.h>
+
+typedef struct Along Along;
+typedef struct Avlong Avlong;
+typedef struct Aptr Aptr;
+
+long	agetl(Along *);
+vlong	agetv(Avlong *);
+void*	agetp(Aptr *);
+
+long	aswapl(Along *, long new);
+vlong	aswapv(Avlong *, vlong new);
+void*	aswapp(Aptr *, void* new);
+
+long	aincl(Along *, long);
+vlong	aincv(Avlong *, vlong);
+
+int	acasl(Along *, long old, long new);
+int	acasv(Avlong *, vlong old, vlong new);
+int	acasp(Aptr *, void* old, void* new);
+
+void	coherence(void);
+.EE
+
+.SH DESCRIPTION
+.P
+These routines provide atomic operations that can be used to synchronize processes.
+They are sequentially consistent, that is, operations in a multi-process program are executed in some sequential order; operations in each process are executed in program order. No non-atomic operations will be re-orded to be observable before or after an atomic operation.
+.P
+.IR Agetl ,
+.IR agetv ,
+and
+.I agetp
+perform an atomic read.
+.IR Asetl ,
+.IR asetv ,
+and
+.I asetp
+perform an atomic swap, replacing the value with
+.B new
+then returning the previous value.
+.I Aincl
+and
+.I aincv
+perform an atomic add, replacing the value with the sum of it and
+.BR new ,
+then returning the result.
+
+.P
+.IR Acasl ,
+.IR acasv ,
+and
+.I acasp
+perform an atomic compare and swap.
+If the value is equal to
+.B old
+then it is replaced by
+.B new
+and the function returns 1.
+If not, the function returns 0.
+
+.P
+.I Coherence
+provides only the re-ordering barrier.
+
+.SH SOURCE
+.B /sys/src/libc/port/atomic.c
+.br
+.B /sys/src/libc/port/atomic64.c
+.br
+.B /sys/src/libc/$objtype/atomic.s
+.br
+.B /sys/src/libc/$objtype/atomic64.s
+
+.SH BUGS
+The vlong operations may be emulated in software and therefore must not be called from a note handler.
--- a/sys/src/cmd/gefs/atomic-386.s
+++ /dev/null
@@ -1,100 +1,0 @@
-/*  get variants */
-TEXT ageti+0(SB),1,$0
-TEXT agetl+0(SB),1,$0
-TEXT agetp+0(SB),1,$0
-	MOVL	p+0(FP), AX
-	MOVL	0(AX), AX
-	RET
-
-TEXT agetv+0(SB),1,$0
-	MOVL	r+0(FP), AX
-	MOVL	p+4(FP), BX
-	FMOVD	(BX), F0
-	FMOVDP	F0, (AX)
-	RET
-
-/*  set variants */
-TEXT aseti+0(SB),1,$0
-TEXT asetl+0(SB),1,$0
-TEXT asetp+0(SB),1,$0
-	MOVL		p+0(FP), BX
-	MOVL		v+4(FP), AX
-	LOCK; XCHGL	(BX), AX
-	RET
-
-TEXT asetv+0(SB),1,$0
-	MOVL	p+4(FP), DI
-	MOVL	nv+8(FP), BX
-	MOVL	nv+12(FP), CX
-	MOVL	0(DI), AX
-	MOVL	4(DI), DX
-loop:
-	LOCK;	CMPXCHG8B (DI)
-        JNE     loop
-	MOVL	p+0(FP),DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
-	RET
-
-/*  inc variants */
-TEXT ainci+0(SB),1,$0
-TEXT aincl+0(SB),1,$0
-TEXT aincp+0(SB),1,$0
-	MOVL	p+0(FP), BX
-	MOVL	v+4(FP), CX
-	MOVL	CX, AX
-	LOCK; XADDL AX, (BX)
-	ADDL	CX, AX
-	RET
-
-TEXT aincv+0(SB),1,$0
-	MOVL	p+4(FP), DI
-retry:
-	MOVL	0(DI), AX
-	MOVL	4(DI), DX
-	MOVL 	AX, BX
-	MOVL	DX, CX
-	ADDL	v+8(FP), BX
-	ADCL	v+12(FP), CX
-	LOCK; CMPXCHG8B (DI)
-	JNE	retry
-	MOVL	r+0(FP), DI
-	MOVL	BX, 0x0(DI)
-	MOVL	CX, 0x4(DI)
-	RET
-
-/*  cas variants */
-TEXT acasi+0(SB),1,$0
-TEXT acasl+0(SB),1,$0
-TEXT acasp+0(SB),1,$0
-	MOVL	p+0(FP), CX
-	MOVL	ov+4(FP), AX
-	MOVL	nv+8(FP), DX
-	LOCK; CMPXCHGL DX, (CX)
-	JNE	fail32
-	MOVL	$1,AX
-	RET
-fail32:
-	MOVL	$0,AX
-	RET
-
-TEXT acasv+0(SB),1,$0
-	MOVL	p+0(FP), DI
-	MOVL	ov+4(FP), AX
-	MOVL	ov+8(FP), DX
-	MOVL	nv+12(FP), BX
-	MOVL	nv+16(FP), CX
-	LOCK; CMPXCHG8B (DI)
-	JNE	fail64
-	MOVL	$1,AX
-	RET
-fail64:
-	MOVL	$0,AX
-	RET
-
-/* barriers (do we want to distinguish types?) */
-TEXT coherence+0(SB),1,$0
-	/* this is essentially mfence but that requires sse2 */
-	XORL	AX, AX
-	LOCK; XADDL AX, (SP)
-	RET
--- a/sys/src/cmd/gefs/atomic-amd64.s
+++ /dev/null
@@ -1,59 +1,0 @@
-/*  get variants */
-TEXT agetl+0(SB),1,$0
-	MOVL	(RARG), AX
-	RET
-TEXT agetv+0(SB),1,$0
-TEXT agetp+0(SB),1,$0
-	MOVQ	(RARG), AX
-	RET
-
-/*  set variants */
-TEXT asetl+0(SB),1,$0
-	MOVL		v+8(FP), AX
-	LOCK; XCHGL	(RARG), AX
-	RET
-
-TEXT asetv+0(SB),1,$0
-TEXT asetp+0(SB),1,$0
-	MOVQ		v+8(FP), AX
-	LOCK; XCHGQ	(RARG), AX
-	RET
-
-/*  inc variants */
-TEXT aincl+0(SB),1,$0
-	MOVQ		v+8(FP), BX
-	MOVQ		BX, AX
-	LOCK; XADDL	AX, (RARG)
-	ADDQ		BX, AX
-	RET
-
-TEXT aincv+0(SB),1,$0
-TEXT aincp+0(SB),1,$0
-	MOVQ		v+8(FP), BX
-	MOVQ		BX, AX
-	LOCK; XADDQ	AX, (RARG)
-	ADDQ		BX, AX
-	RET
-
-/*  cas variants */
-TEXT acasl+0(SB),1,$0
-	MOVL	c+8(FP), AX
-	MOVL	v+16(FP), BX
-	LOCK; CMPXCHGL	BX, (RARG)
-	SETEQ	AX
-	MOVBLZX	AX, AX
-	RET
-
-TEXT acasv+0(SB),1,$0
-TEXT acasp+0(SB),1,$0
-	MOVQ	c+8(FP), AX
-	MOVQ	v+16(FP), BX
-	LOCK; CMPXCHGQ BX, (RARG)
-	SETEQ	AX
-	MOVBLZX	AX, AX
-	RET
-
-/* barriers (do we want to distinguish types?) */
-TEXT coherence+0(SB),1,$0
-	MFENCE
-	RET
--- a/sys/src/cmd/gefs/atomic-arm.c
+++ /dev/null
@@ -1,95 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-#include "atomic.h"
-
-static Lock locktab[128];
-
-static u32int
-ihash(void *p)
-{
-	uintptr x = (uintptr)p;
-
-	/* constants from splitmix32 rng */
-	x = (x ^ (x >> 16)) * 0x85ebca6b;
-	x = (x ^ (x >> 13)) * 0xc2b2ae35;
-	x = (x ^ (x >> 16));
-	return x & (nelem(locktab)-1);
-}
-
-#define GET(T, n) \
-	T n(T *p)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		r = *p;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define SET(T, n) \
-	T n(T *p, T v)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		r = *p;			\
-		*p = v;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define INC(T, n) \
-	T n(T *p, T dv)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		*p += dv;		\
-		r = *p;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define CAS(T, n) \
-	int n(T *p, T ov, T nv)		\
-	{				\
-		uintptr h;		\
-		int r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		if(*p == ov){		\
-			*p = nv;	\
-			r = 1;		\
-		}else			\
-			r = 0;		\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-GET(int, ageti)
-GET(long, agetl)
-GET(vlong, agetv)
-GET(void*, agetp)
-
-SET(int, aseti)
-SET(long, asetl)
-SET(vlong, asetv)
-SET(void*, asetp)
-
-INC(int, ainci)
-INC(long, aincl)
-INC(vlong, aincv)
-
-CAS(int, acasi)
-CAS(long, acasl)
-CAS(vlong, acasv)
-CAS(void*, acasp)
--- a/sys/src/cmd/gefs/atomic-arm64.s
+++ /dev/null
@@ -1,79 +1,0 @@
-/*  get variants */
-TEXT agetl+0(SB),1,$0
-	MOVW	(R0), R0
-	RETURN
-TEXT agetv+0(SB),1,$0
-TEXT agetp+0(SB),1,$0
-	MOV	(R0), R0
-	RETURN
-
-/*  set variants */
-TEXT asetl+0(SB),1,$0
-	MOV	0x08(FP), R1
-	MOV	R0, R2
-_setl:
-	LDAXRW	(R2), R0
-	STLXRW	R1, (R2), R3
-	CBNZW	R3, _setl
-	RETURN
-TEXT asetv+0(SB),1,$0
-TEXT asetp+0(SB),1,$0
-	MOV	0x08(FP), R1
-	MOV	R0, R2
-_setp:
-	LDAXR	(R2), R0
-	STLXR	R1, (R2), R3
-	CBNZW	R3, _setp
-	RETURN
-
-/*  inc variants */
-TEXT aincl+0(SB),1,$0
-	MOV	0x08(FP), R1
-	MOV	R0, R2
-_incl:
-	LDAXRW	(R2), R0
-	ADDW	R1, R0, R3
-	STLXRW	R3, (R2), R4
-	CBNZW	R4, _incl
-	RETURN
-TEXT aincv+0(SB),1,$0
-TEXT aincp+0(SB),1,$0
-	MOV	0x08(FP), R1
-	MOV	R0, R2
-_incp:
-	LDAXR	(R2), R0
-	ADD	R1, R0, R3
-	STLXR	R3, (R2), R4
-	CBNZW	R4, _incp
-	RETURN
-
-/*  cas variants */
-TEXT acasl+0(SB),1,$0
-	MOV	0x08(FP), R1
-	MOV	0x10(FP), R2
-	LDAXRW	(R0), R3
-	CMPW	R1, R3
-	BNE	_casl
-	STLXRW	R2, (R0), R4
-	CMPW	$0, R4
-_casl:
-	CSETW	EQ, R0
-	RETURN
-TEXT acasv+0(SB),1,$0
-TEXT acasp+0(SB),1,$0
-	MOV	0x08(FP), R1
-	MOV	0x10(FP), R2
-	LDAXR	(R0), R3
-	CMP	R1, R3
-	BNE	_casp
-	STLXR	R2, (R0), R4
-	CMPW	$0, R4
-_casp:
-	CSETW	EQ, R0
-	RETURN
-
-/* barriers */
-#define ISH	(2<<2 | 3)
-TEXT coherence+0(SB),1,$0
-	DMB	$ISH
-	RETURN
--- a/sys/src/cmd/gefs/atomic-mips.c
+++ /dev/null
@@ -1,95 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-#include "atomic.h"
-
-static Lock locktab[128];
-
-static u32int
-ihash(void *p)
-{
-	uintptr x = (uintptr)p;
-
-	/* constants from splitmix32 rng */
-	x = (x ^ (x >> 16)) * 0x85ebca6b;
-	x = (x ^ (x >> 13)) * 0xc2b2ae35;
-	x = (x ^ (x >> 16));
-	return x & (nelem(locktab)-1);
-}
-
-#define GET(T, n) \
-	T n(T *p)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		r = *p;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define SET(T, n) \
-	T n(T *p, T v)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		r = *p;			\
-		*p = v;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define INC(T, n) \
-	T n(T *p, T dv)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		*p += dv;		\
-		r = *p;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define CAS(T, n) \
-	int n(T *p, T ov, T nv)		\
-	{				\
-		uintptr h;		\
-		int r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		if(*p == ov){		\
-			*p = nv;	\
-			r = 1;		\
-		}else			\
-			r = 0;		\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-GET(int, ageti)
-GET(long, agetl)
-GET(vlong, agetv)
-GET(void*, agetp)
-
-SET(int, aseti)
-SET(long, asetl)
-SET(vlong, asetv)
-SET(void*, asetp)
-
-INC(int, ainci)
-INC(long, aincl)
-INC(vlong, aincv)
-
-CAS(int, acasi)
-CAS(long, acasl)
-CAS(vlong, acasv)
-CAS(void*, acasp)
--- a/sys/src/cmd/gefs/atomic-power64.s
+++ /dev/null
@@ -1,101 +1,0 @@
-/*  get variants */
-TEXT agetl+0(SB),1,$0
-	SYNC
-	// See ISA 3.0B section B.2.3, "Safe Fetch"
-	MOVWZ	0(RARG), RARG
-	CMPW	RARG, RARG, CR7
-	BC	4, 30, 1(PC) // bne- cr7,0x4
-	ISYNC
-	RETURN
-
-TEXT agetv+0(SB),1,$0
-TEXT agetp+0(SB),1,$0
-	SYNC
-	// See ISA 3.0B section B.2.3, "Safe Fetch"
-	MOVD	0(RARG), RARG
-	CMP	RARG, RARG, CR7
-	BC	4, 30, 1(PC) // bne- cr7,0x4
-	ISYNC
-	RETURN
-
-/*  set variants */
-TEXT asetl+0(SB),1,$0
-	MOVW	val+8(FP), R4
-	SYNC
-	MOVW	R4, 0(RARG)
-	RETURN
-
-TEXT asetv+0(SB),1,$0
-TEXT asetp+0(SB),1,$0
-	MOVD	val+8(FP), R4
-	SYNC
-	MOVD	R4, 0(RARG)
-	RETURN
-
-/*  inc variants */
-TEXT aincl+0(SB),1,$0
-	MOVD	RARG, R4
-	MOVW	delta+8(FP), R5
-	LWSYNC
-	LWAR	(R4), RARG
-	ADD	R5, RARG
-	STWCCC	RARG, (R4)
-	BNE	-3(PC)
-	RETURN
-
-TEXT aincv+0(SB),1,$0
-TEXT aincp+0(SB),1,$0
-	MOVD	RARG, R4
-	MOVD	delta+8(FP), R5
-	LWSYNC
-	LDAR	(R4), RARG
-	ADD	R5, RARG
-	STDCCC	RARG, (R4)
-	BNE	-3(PC)
-	RETURN
-
-/*  cas variants */
-TEXT acasl+0(SB),1,$0
-	MOVWZ	old+8(FP), R4
-	MOVWZ	new+16(FP), R5
-	LWSYNC
-casagain:
-	LWAR	(RARG), R6
-	CMPW	R6, R4
-	BNE	casfail
-	STWCCC	R5, (RARG)
-	BNE	casagain
-	MOVD	$1, RARG
-	LWSYNC
-	RETURN
-casfail:
-	LWSYNC
-	AND	R0, RARG
-	RETURN
-
-TEXT acasv+0(SB),1,$0
-TEXT acasp+0(SB),1,$0
-	MOVD	old+8(FP), R4
-	MOVD	new+16(FP), R5
-	LWSYNC
-cas64again:
-	LDAR	(RARG), R6
-	CMP	R6, R4
-	BNE	cas64fail
-	STDCCC	R5, (RARG)
-	BNE	cas64again
-	MOVD	$1, RARG
-	LWSYNC
-	RETURN
-cas64fail:
-	LWSYNC
-	AND	R0, RARG
-	RETURN
-
-/* barriers */
-TEXT coherence+0(SB),1,$0
-	// LWSYNC is the "export" barrier recommended by Power ISA
-	// v2.07 book II, appendix B.2.2.2.
-	// LWSYNC is a load/load, load/store, and store/store barrier.
-	LWSYNC
-	RETURN
--- a/sys/src/cmd/gefs/atomic-spim.c
+++ /dev/null
@@ -1,95 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-#include "atomic.h"
-
-static Lock locktab[128];
-
-static u32int
-ihash(void *p)
-{
-	uintptr x = (uintptr)p;
-
-	/* constants from splitmix32 rng */
-	x = (x ^ (x >> 16)) * 0x85ebca6b;
-	x = (x ^ (x >> 13)) * 0xc2b2ae35;
-	x = (x ^ (x >> 16));
-	return x & (nelem(locktab)-1);
-}
-
-#define GET(T, n) \
-	T n(T *p)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		r = *p;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define SET(T, n) \
-	T n(T *p, T v)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		r = *p;			\
-		*p = v;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define INC(T, n) \
-	T n(T *p, T dv)			\
-	{				\
-		uintptr h;		\
-		T r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		*p += dv;		\
-		r = *p;			\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-#define CAS(T, n) \
-	int n(T *p, T ov, T nv)		\
-	{				\
-		uintptr h;		\
-		int r;			\
-					\
-		h = ihash(p);		\
-		lock(&locktab[h]);	\
-		if(*p == ov){		\
-			*p = nv;	\
-			r = 1;		\
-		}else			\
-			r = 0;		\
-		unlock(&locktab[h]);	\
-		return r;		\
-	}
-
-GET(int, ageti)
-GET(long, agetl)
-GET(vlong, agetv)
-GET(void*, agetp)
-
-SET(int, aseti)
-SET(long, asetl)
-SET(vlong, asetv)
-SET(void*, asetp)
-
-INC(int, ainci)
-INC(long, aincl)
-INC(vlong, aincv)
-
-CAS(int, acasi)
-CAS(long, acasl)
-CAS(vlong, acasv)
-CAS(void*, acasp)
--- a/sys/src/cmd/gefs/atomic.h
+++ /dev/null
@@ -1,16 +1,0 @@
-long	agetl(long*);
-vlong	agetv(vlong*);
-void*	agetp(void**);
-
-long	asetl(long*, long);
-vlong	asetv(vlong*, vlong);
-void*	asetp(void**, void*);
-
-long	aincl(long*, long);
-vlong	aincv(vlong*, vlong);
-
-int	acasl(long*, long, long);
-int	acasv(vlong*, vlong, vlong);
-int	acasp(void**, void*, void*);
-
-void	coherence(void);
--- a/sys/src/cmd/gefs/blk.c
+++ b/sys/src/cmd/gefs/blk.c
@@ -5,7 +5,6 @@
 
 #include "dat.h"
 #include "fns.h"
-#include "atomic.h"
 
 static vlong	blkalloc_lk(Arena*, int);
 static vlong	blkalloc(int, uint, int);
@@ -239,8 +238,8 @@
 	lb = a->logbuf[0];
 	if(lb == a->logtl)
 		lb = a->logbuf[1];
-	assert(lb->ref == 1);
-	lb->flag = Bstatic;
+	assert(agetl(&lb->ref) == 1);
+	aswapl(&lb->flag, Bstatic);
 	initblk(lb, o, -1, Tlog);
 	traceb("logblk" , lb->bp);
 	lb->lasthold0 = lb->lasthold;
@@ -271,7 +270,7 @@
 		assert(off < end);
 	}
 	lb = a->logtl;
-	assert(lb->ref > 0);
+	assert(agetl(&lb->ref) > 0);
 	assert(lb->type == Tlog);
 	assert(lb->logsz >= 0);
 	dprint("logop %d: %llx+%llx@%x\n", op, off, len, lb->logsz);
@@ -351,7 +350,7 @@
 			case LogSync:
 				gen = ent >> 8;
 				dprint("\tlog@%x: sync %lld\n", i, gen);
-				if(gen >= fs->qgen){
+				if(gen >= agetv(&fs->qgen)){
 					if(a->logtl == nil){
 						b->logsz = i;
 						a->logtl = b;
@@ -752,7 +751,7 @@
 Blk*
 holdblk(Blk *b)
 {
-	ainc(&b->ref);
+	aincl(&b->ref, 1);
 	b->lasthold = getcallerpc(&b);
 	return b;
 }
@@ -763,7 +762,7 @@
 	if(b == nil)
 		return;
 	b->lastdrop = getcallerpc(&b);
-	if(adec(&b->ref) != 0)
+	if(aincl(&b->ref, -1) != 0)
 		return;
 	/*
 	 * freed blocks go to the LRU bottom
@@ -801,7 +800,7 @@
 		p = agetp(&fs->limbo[ge]);
 		l->next = p;
 		if(acasp(&fs->limbo[ge], p, l)){
-			ainc(&fs->nlimbo);
+			aincl(&fs->nlimbo, 1);
 			break;
 		}
 	}
@@ -819,7 +818,7 @@
 	tracex("freeb", b->bp, getcallerpc(&t), -1);
 	setflag(b, Blimbo, 0);
 	holdblk(b);
-	assert(b->ref > 1);
+	assert(agetl(&b->ref) > 1);
 	limbo(DFblk, b);
 }
 
@@ -853,7 +852,7 @@
 
 	assert(tid >= 0);
 	ge = agetl(&fs->epoch);
-	asetl(&fs->lepoch[tid], ge | Eactive);
+	aswapl(&fs->lepoch[tid], ge | Eactive);
 }
 
 void
@@ -863,7 +862,7 @@
 
 	assert(tid >= 0);
 	le = agetl(&fs->lepoch[tid]);
-	asetl(&fs->lepoch[tid], le &~ Eactive);
+	aswapl(&fs->lepoch[tid], le &~ Eactive);
 }
 
 void
@@ -875,7 +874,7 @@
 	delay = 0;
 Again:
 	ge = agetl(&fs->epoch);
-	for(i = 0; i < fs->nworker; i++){
+	for(i = 0; i < agetl(&fs->nworker); i++){
 		e = agetl(&fs->lepoch[i]);
 		if((e & Eactive) && e != (ge | Eactive)){
 			if(delay < 1000)
@@ -901,7 +900,7 @@
 
 	c = agetl(&fs->nlimbo);
 	ge = agetl(&fs->epoch);
-	for(i = 0; i < fs->nworker; i++){
+	for(i = 0; i < agetl(&fs->nworker); i++){
 		e = agetl(&fs->lepoch[i]);
 		if((e & Eactive) && e != (ge | Eactive)){
 			if(c < fs->cmax/4)
@@ -910,8 +909,8 @@
 		}
 	}
 	epochwait();
-	p = asetp(&fs->limbo[(ge+1)%3], nil);
-	asetl(&fs->epoch, (ge+1)%3);
+	p = aswapp(&fs->limbo[(ge+1)%3], nil);
+	aswapl(&fs->epoch, (ge+1)%3);
 
 	for(; p != nil; p = n){
 		n = p->next;
@@ -952,7 +951,7 @@
 		default:
 			abort();
 		}
-		adec(&fs->nlimbo);
+		aincl(&fs->nlimbo, -1);
 	}
 }
 
@@ -1014,7 +1013,7 @@
 	else
 		abort();
 	if(qe.b != nil)
-		assert(qe.b->ref > 0);
+		assert(agetl(&qe.b->ref) > 0);
 	qlock(&q->lk);
 	qe.qgen = agetv(&fs->qgen);
 	while(q->nheap == q->heapsz)
@@ -1079,7 +1078,7 @@
 
 	q = p;
 	if(waserror()){
-		ainc(&fs->rdonly);
+		aincl(&fs->rdonly, 1);
 		fprint(2, "error syncing: %s\n", errmsg());
 		return;
 	}
--- a/sys/src/cmd/gefs/cache.c
+++ b/sys/src/cmd/gefs/cache.c
@@ -33,7 +33,7 @@
 	 */
 	assert(b->magic == Magic);
 	assert(checkflag(b, 0, Bstatic));
-	if(b->ref != 0){
+	if(agetl(&b->ref) != 0){
 		qunlock(&fs->lrulk);
 		return;
 	}
@@ -60,7 +60,7 @@
 	 */
 	assert(b->magic == Magic);
 	assert(checkflag(b, 0, Bstatic));
-	if(b->ref != 0){
+	if(agetl(&b->ref) != 0){
 		qunlock(&fs->lrulk);
 		return;
 	}
@@ -172,7 +172,7 @@
 
 	b = fs->ctail;
 	assert(b->magic == Magic);
-	assert(b->ref == 0);
+	assert(agetl(&b->ref) == 0);
 	if(checkflag(b, Bcached, 0))
 		cachedel_lk(b->bp.addr);
 	if(checkflag(b, Bcached, 0))
@@ -179,7 +179,7 @@
 		fprint(2, "%B cached %#p freed %#p\n", b->bp, b->cached, b->freed);
 	assert(checkflag(b, 0, Bcached));
 	lrudel(b);
-	b->flag = 0;
+	aswapl(&b->flag, 0);
 	b->lasthold = 0;
 	b->lastdrop = 0;
 	b->freed = 0;
--- a/sys/src/cmd/gefs/check.c
+++ b/sys/src/cmd/gefs/check.c
@@ -2,7 +2,6 @@
 #include <libc.h>
 #include <fcall.h>
 #include <avl.h>
-#include <atomic.h>
 
 #include "dat.h"
 #include "fns.h"
--- a/sys/src/cmd/gefs/cons.c
+++ b/sys/src/cmd/gefs/cons.c
@@ -137,7 +137,7 @@
 		clunkmount(mnt);
 		return;
 	}
-	loadusers(fd, mnt->root);
+	loadusers(fd, agetp(&mnt->root));
 	fprint(fd, "refreshed users\n");
 	clunkmount(mnt);
 }
@@ -147,18 +147,20 @@
 {
 	char *p, fbuf[8];
 	Blk *b;
+	int f;
 
 	for(b = blkbuf; b != blkbuf+fs->cmax; b++){
 		p = fbuf;
-		if(b->flag & Bdirty)	*p++ = 'd';
-		if(b->flag & Bfinal)	*p++ = 'f';
-		if(b->flag & Bfreed)	*p++ = 'F';
-		if(b->flag & Bcached)	*p++ = 'c';
-		if(b->flag & Bqueued)	*p++ = 'q';
-		if(b->flag & Blimbo)	*p++ = 'L';
+		f = agetl(&b->flag);
+		if(f & Bdirty)	*p++ = 'd';
+		if(f & Bfinal)	*p++ = 'f';
+		if(f & Bfreed)	*p++ = 'F';
+		if(f & Bcached)	*p++ = 'c';
+		if(f & Bqueued)	*p++ = 'q';
+		if(f & Blimbo)	*p++ = 'L';
 		*p = 0;
 		fprint(fd, "blk %#p type %d flag %s bp %B ref %ld alloc %#p queued %#p, hold %#p drop %#p cached %#p\n",
-			b, b->type, fbuf, b->bp, b->ref, b->alloced, b->queued, b->lasthold, b->lastdrop, b->cached);
+			b, b->type, fbuf, b->bp, agetl(&b->ref), b->alloced, b->queued, b->lasthold, b->lastdrop, b->cached);
 	}
 }
 
@@ -295,6 +297,7 @@
 {
 	Biobuf *bfd;
 	Trace *t;
+	long ti;
 	int i;
 
 	if(na == 0)
@@ -306,7 +309,8 @@
 		return;
 	}
 	for(i = 0; i < fs->ntrace; i++){
-		t = &fs->trace[(fs->traceidx + i) % fs->ntrace];
+		ti = agetl(&fs->traceidx);
+		t = &fs->trace[(ti+ i) % fs->ntrace];
 		if(t->msg[0] == 0)
 			continue;
 		Bprint(bfd, "[%d@%d] %s", t->tid, t->qgen, t->msg);
--- a/sys/src/cmd/gefs/dat.h
+++ b/sys/src/cmd/gefs/dat.h
@@ -514,9 +514,9 @@
 	Dlist	snapdl;
 	int	narena;
 	vlong	flag;
-	vlong	nextqid;
-	vlong	nextgen;
-	vlong	qgen;
+	vlong	nextqid;	/* protected by mutlk */
+	vlong	nextgen;	/* protected by mutlk */
+	Avlong	qgen;
 	Bptr	*arenabp;
 
 	/* superblocks */
@@ -529,13 +529,13 @@
 	long	syncing;
 	long	nsyncers;
 	long	nreaders;
-	long	nprocs;
+	Along	nprocs;
 
 	QLock	synclk;
 	Rendez	syncrz;
 
 	QLock	mountlk;
-	Mount	*mounts;
+	Aptr	mounts;	/* Mount* */
 	Mount	*snapmnt;
 	Lock	connlk;
 	Conn	*conns;
@@ -545,16 +545,16 @@
 	Chan	**rdchan;
 
 	QLock	mutlk;
-	long	nworker;
-	long	epoch;
-	long	lepoch[32];
-	Limbo	*limbo[3];
-	long	nlimbo;
+	Along	nworker;
+	Along	epoch;
+	Along	lepoch[32];
+	Aptr	limbo[3]; /* Limbo* */
+	Along	nlimbo;
 
 	Syncq	syncq[32];
 
 	int	fd;
-	long	rdonly;
+	Along	rdonly;
 	int	noauth;
 
 	/* user list */
@@ -589,7 +589,7 @@
 	RWLock	flushq[Nflushtab];
 
 	Trace	*trace;
-	long	traceidx;
+	Along	traceidx;
 	long	ntrace;
 };
 
@@ -662,7 +662,7 @@
 	long	ref;
 	vlong	gen;
 	char	name[64];
-	Tree	*root;	/* EBR protected */
+	Aptr	root;	/* Tree*, EBR protected */
 	int	flag;
 
 	/* open directory entries */
@@ -785,8 +785,8 @@
 	uintptr	freed;
 
 	Bptr	bp;
-	long	ref;
-	long	flag;
+	Along	ref;
+	Along	flag;
 	char	*data;
 	char	buf[Blksz];
 	vlong	magic;
@@ -794,8 +794,8 @@
 
 struct Chan {
 	int	size;	/* size of queue */
-	long	count;	/* how many in queue (semaphore) */
-	long	avail;	/* how many available to send (semaphore) */
+	Along	count;	/* how many in queue (semaphore) */
+	Along	avail;	/* how many available to send (semaphore) */
 	Lock	rl, wl;	/* circular pointers */
 	void	**rp;
 	void	**wp;
--- a/sys/src/cmd/gefs/fns.h
+++ b/sys/src/cmd/gefs/fns.h
@@ -74,7 +74,7 @@
 Mount*	getmount(char*);
 void	clunkmount(Mount*);
 
-void	updatesnap(Tree**, Tree*, char*, int);
+Tree*	updatesnap(Tree*, char*, int);
 void	tagsnap(Tree*, char*, int);
 void	delsnap(Tree*, vlong, char*);
 void	freedl(Dlist*, int);
--- a/sys/src/cmd/gefs/fs.c
+++ b/sys/src/cmd/gefs/fs.c
@@ -6,7 +6,6 @@
 
 #include "dat.h"
 #include "fns.h"
-#include "atomic.h"
 
 static void	respond(Fmsg*, Fcall*);
 static void	rerror(Fmsg*, char*, ...);
@@ -53,7 +52,7 @@
 static void
 wrbarrier(void)
 {
-	tracev("barrier", fs->qgen);
+	tracev("barrier", agetv(&fs->qgen));
 	aincv(&fs->qgen, 1);
 }
 
@@ -63,7 +62,7 @@
 	Qent qe;
 	int i;
 
-	tracev("wrwait", fs->qgen);
+	tracev("wrwait", agetv(&fs->qgen));
 	aincv(&fs->qgen, 1);
 	fs->syncing = fs->nsyncers;
 	for(i = 0; i < fs->nsyncers; i++){
@@ -77,7 +76,7 @@
 	aincv(&fs->qgen, 1);
 	while(fs->syncing != 0)
 		rsleep(&fs->syncrz);
-	tracev("flushed", fs->qgen);
+	tracev("flushed", agetv(&fs->qgen));
 }
 
 static void
@@ -86,6 +85,7 @@
 	Mount *mnt;
 	Arena *a;
 	Dlist dl;
+	Tree *r;
 	int i;
 
 	if(agetl(&fs->rdonly))
@@ -119,8 +119,12 @@
 	}
 	tracem("packb");
 
-	for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next)
-		updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
+	for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
+		r = agetp(&mnt->root);
+		r = updatesnap(r, mnt->name, mnt->flag);
+		aswapp(&mnt->root, r);
+	}
+
 	/*
 	 * Now that we've updated the snaps, we can sync the
 	 * dlist; the snap tree will not change from here.
@@ -217,9 +221,10 @@
 	*tp = nil;
 	for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
 		if(strcmp(a->old, mnt->name) == 0){
-			updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
 			t = agetp(&mnt->root);
+			t = updatesnap(t, mnt->name, mnt->flag);
 			ainc(&t->memref);
+			aswapp(&mnt->root, t);
 			break;
 		}
 	}
@@ -302,9 +307,9 @@
 
 	if((c = mallocz(sizeof(Chan) + size*sizeof(void*), 1)) == nil)
 		sysfatal("create channel");
+	aswapl(&c->avail, size);
+	aswapl(&c->count, 0);
 	c->size = size;
-	c->avail = size;
-	c->count = 0;
 	c->rp = c->args;
 	c->wp = c->args;
 	return c;
@@ -319,13 +324,13 @@
 
 	v = agetl(&c->count);
 	if(v == 0 || !acasl(&c->count, v, v-1))
-		semacquire(&c->count, 1);
+		semacquire(&c->count.v, 1);
 	lock(&c->rl);
 	a = *c->rp;
 	if(++c->rp >= &c->args[c->size])
 		c->rp = c->args;
 	unlock(&c->rl);
-	semrelease(&c->avail, 1);
+	semrelease(&c->avail.v, 1);
 	return a;
 }
 
@@ -337,7 +342,7 @@
 
 	v = agetl(&c->avail);
 	if(v == 0 || !acasl(&c->avail, v, v-1)){
-		while((r = semacquire(&c->avail, block)) == -1)
+		while((r = semacquire(&c->avail.v, block)) == -1)
 			continue;
 		if(r == 0)
 			return 0;
@@ -347,7 +352,7 @@
 	if(++c->wp >= &c->args[c->size])
 		c->wp = c->args;
 	unlock(&c->wl);
-	semrelease(&c->count, 1);
+	semrelease(&c->count.v, 1);
 	return 1;
 }
 
@@ -424,11 +429,16 @@
 static void
 upsert(Mount *mnt, Msg *m, int nm)
 {
+	Tree *r;
+
 	if(!(mnt->flag & Lmut))
 		error(Erdonly);
-	if(mnt->root->nlbl != 1 || mnt->root->nref != 0)
-		updatesnap(&mnt->root, mnt->root, mnt->name, mnt->flag);
-	btupsert(mnt->root, m, nm);
+	r = agetp(&mnt->root);
+	if(r->nlbl != 1 || r->nref != 0) {
+		r = updatesnap(r, mnt->name, mnt->flag);
+		aswapp(&mnt->root, r);
+	}
+	btupsert(r, m, nm);
 }
 
 /*
@@ -511,14 +521,15 @@
 		seq = 1;
 	else
 		seq = 0;
-	b = newdblk(f->mnt->root, f->qpath, seq);
+	r = agetp(&f->mnt->root);
+	b = newdblk(r, f->qpath, seq);
 	if(waserror()){
-		freeblk(f->mnt->root, b);
+		freeblk(r, b);
 		dropblk(b);
 		nexterror();
 	}
 	t = nil;
-	r = f->mnt->root;
+	r = agetp(&f->mnt->root);
 	if(btlookup(r, m, &kv, buf, sizeof(buf))){
 		bp = unpackbp(kv.v, kv.nv);
 		if(fb < sz && (fo != 0 || n != Blksz)){
@@ -634,7 +645,7 @@
 	n = snprint(pfx+1, sizeof(pfx)-1, "retain");
 	kv.k = pfx;
 	kv.nk = n+1;
-	if(btlookup(mnt->root, &kv, &r, rbuf, sizeof(rbuf)-1)
+	if(btlookup(agetp(&mnt->root), &kv, &r, rbuf, sizeof(rbuf)-1)
 	|| btlookup(&fs->snap, &kv, &r, rbuf, sizeof(rbuf)-1)){
 		p = r.v;
 		p[r.nv] = 0;
@@ -681,7 +692,7 @@
 Mount *
 getmount(char *name)
 {
-	Mount *mnt;
+	Mount *mnt, *hd;
 	Tree *t;
 	int flg;
 
@@ -691,7 +702,8 @@
 	}
 
 	qlock(&fs->mountlk);
-	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
+	hd = agetp(&fs->mounts);
+	for(mnt = hd; mnt != nil; mnt = mnt->next){
 		if(strcmp(name, mnt->name) == 0){
 			ainc(&mnt->ref);
 			qunlock(&fs->mountlk);
@@ -709,11 +721,11 @@
 	if((t = opensnap(name, &flg)) == nil)
 		error(Enosnap);
 	mnt->flag = flg;
-	mnt->root = t;
-	mnt->next = fs->mounts;
+	aswapp(&mnt->root, t);
+	mnt->next = hd;
 	loadautos(mnt);
 
-	asetp(&fs->mounts, mnt);
+	aswapp(&fs->mounts, mnt);
 	qunlock(&fs->mountlk);
 	poperror();
 	return mnt;
@@ -722,19 +734,24 @@
 void
 clunkmount(Mount *mnt)
 {
-	Mount *me, **p;
+	Mount *p, **pp;
 
 	if(mnt == nil)
 		return;
 	qlock(&fs->mountlk);
 	if(adec(&mnt->ref) == 0){
-		for(p = &fs->mounts; (me = *p) != nil; p = &me->next){
-			if(me == mnt)
+		pp = nil;
+		for(p = agetp(&fs->mounts); p != nil; p = p->next){
+			if(p == mnt)
 				break;
+			pp = &p->next;
 		}
-		assert(me != nil);
-		*p = me->next;
-		limbo(DFmnt, me);
+		assert(p != nil);
+		if(pp == nil)
+			aswapp(&fs->mounts, p->next);
+		else
+			*pp = p->next;
+		limbo(DFmnt, p);
 	}
 	qunlock(&fs->mountlk);
 }
@@ -1083,7 +1100,9 @@
 	}
 	de->ref = 0;
 	de->qid.type = QTAUTH;
-	de->qid.path = aincv(&fs->nextqid, 1);
+	qlock(&fs->mutlk);
+	de->qid.path = fs->nextqid++;
+	qunlock(&fs->mutlk);
 	de->qid.vers = 0;
 	de->length = 0;
 	de->k = nil;
@@ -1370,7 +1389,7 @@
 	}
 	if(o->mode != -1)
 		error(Einuse);
-	t = o->mnt->root;
+	t = agetp(&o->mnt->root);
 	mnt = o->mnt;
 	up = o->pqpath;
 	prev = o->qpath;
@@ -1401,7 +1420,7 @@
 			mnt = getmount(name);	/* mnt leaked on error() */
 			name = "";
 			prev = -1ULL;
-			t = mnt->root;
+			t = agetp(&mnt->root);
 		}
 		up = prev;
 		duid = d.uid;
@@ -1799,7 +1818,7 @@
 	if(f->mode != -1)
 		error(Einuse);
 	de = f->dent;
-	if(walk1(f->mnt->root, f->qpath, m->name, &old, &oldlen) == 0)
+	if(walk1(agetp(&f->mnt->root), f->qpath, m->name, &old, &oldlen) == 0)
 		error(Eexist);
 	rlock(de);
 	if(fsaccess(f, de->mode, de->uid, de->gid, DMWRITE) == -1){
@@ -1821,7 +1840,7 @@
 		d.qid.type |= QTEXCL;
 	if(m->perm & DMTMP)
 		d.qid.type |= QTTMP;
-	d.qid.path = aincv(&fs->nextqid, 1);
+	d.qid.path = fs->nextqid++;
 	d.qid.vers = 0;
 	d.mode = m->perm;
 	if(m->perm & DMDIR)
@@ -1922,7 +1941,7 @@
 
 	if((f = getfid(m->conn, m->fid)) == nil)
 		error(Enofid);
-	t = f->mnt->root;
+	t = agetp(&f->mnt->root);
 	nm = 0;
 	wlock(f);
 	clunkfid(m->conn, f, ao);
@@ -2377,7 +2396,7 @@
 		kv[i].v = vbuf[i];
 		kv[i].nv = sizeof(vbuf[i]);
 		if(waserror()){
-			if(!fs->rdonly)
+			if(!agetl(&fs->rdonly))
 				for(j = 0; j < i; j++)
 					freebp(t, bp[j]);
 			nexterror();
@@ -2598,7 +2617,7 @@
 	while(1){
 		a = nil;
 		m = chrecv(fs->wrchan);
-		if(fs->rdonly){
+		if(agetl(&fs->rdonly)){
 			/*
 			 * special case: even if Tremove fails, we need
 			 * to clunk the fid.
@@ -2744,7 +2763,7 @@
 	}
 	if(snap[0] != 0){
 		mnt = getmount(snap);
-		t = mnt->root;
+		t = agetp(&mnt->root);
 	}
 	kbuf[0] = Kconf;
 	strecpy(kbuf+1, kbuf+sizeof(kbuf), key);
@@ -2792,7 +2811,7 @@
 			break;
 		case AOhalt:
 			if(!agetl(&fs->rdonly)){
-				ainc(&fs->rdonly);
+				aincl(&fs->rdonly, 1);
 				/* cycle through all epochs to clear them.  */
 				for(i = 0; i < 4; i++){
 					epochwait();
@@ -2809,7 +2828,7 @@
 				fprint(2, "sync error: %s\n", errmsg());
 				if(am->m != nil)
 					rerror(am->m, Eio);
-				ainc(&fs->rdonly);
+				aincl(&fs->rdonly, 1);
 				break;
 			}
 			if(!fs->snap.dirty || agetl(&fs->rdonly))
@@ -2878,7 +2897,7 @@
 			}
 			if(waserror()){
 				fprint(2, "taking snap: %s\n", errmsg());
-				ainc(&fs->rdonly);
+				aincl(&fs->rdonly, 1);
 				break;
 			}
 
@@ -2943,7 +2962,7 @@
 			tracem("bgclear");
 			if(waserror()){
 				fprint(2, "clear file %llx: %s\n", am->qpath, errmsg());
-				ainc(&fs->rdonly);
+				aincl(&fs->rdonly, 1);
 				break;
 			}
 			if(am->dent != nil){
@@ -3060,7 +3079,7 @@
 	tmnow(&tm, nil);
 	while(1){
 		sleep(5000);
-		if(fs->rdonly)
+		if(agetl(&fs->rdonly))
 			continue;
 		if(waserror()){
 			fprint(2, "task error: %s\n", errmsg());
--- a/sys/src/cmd/gefs/load.c
+++ b/sys/src/cmd/gefs/load.c
@@ -83,7 +83,7 @@
 	snprint(dump->name, sizeof(dump->name), "dump");
 	dump->ref = 1;
 	dump->gen = -1;
-	dump->root = &fs->snap;
+	aswapp(&dump->root, &fs->snap);
 
 	fs->snapmnt = dump;
 	fs->narena = 1;
@@ -136,7 +136,7 @@
 	fprint(2, "\tnarenas:\t%d\n", fs->narena);
 	fprint(2, "\tfeatures:\t%lld\n", fs->flag);
 	fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
-	fprint(2, "\tlastqgen:\t%lld\n", fs->qgen);
+	fprint(2, "\tlastqgen:\t%lld\n", agetv(&fs->qgen));
 	fprint(2, "\tnextgen:\t%lld\n", fs->nextgen);
 	fprint(2, "\tblocksize:\t%lld\n", Blksz);
 	fprint(2, "\tcachesz:\t%lld MiB\n", fs->cmax*Blksz/MiB);
--- a/sys/src/cmd/gefs/main.c
+++ b/sys/src/cmd/gefs/main.c
@@ -6,7 +6,6 @@
 
 #include "dat.h"
 #include "fns.h"
-#include "atomic.h"
 
 Gefs *fs;
 
@@ -100,7 +99,7 @@
 {
 	va_list ap;
 
-	ainc(&fs->rdonly);
+	aincl(&fs->rdonly, 1);
 	va_start(ap, fmt);
 	errorv(fmt, ap, 1);
 }
@@ -148,7 +147,8 @@
 		fs->trace = emalloc(tracesz, 1);
 		fs->ntrace = tracesz/sizeof(Trace);
 	}
-	fs->rdonly = rdonly;
+
+	aswapl(&fs->rdonly, rdonly);
 	fs->lrurz.l = &fs->lrulk;
 	fs->syncrz.l = &fs->synclk;
 	fs->bfreerz.l = &fs->bfreelk;
--- a/sys/src/cmd/gefs/mkfile
+++ b/sys/src/cmd/gefs/mkfile
@@ -18,12 +18,9 @@
 	snap.$O\
 	tree.$O\
 	user.$O\
-	\
-	atomic-$objtype.$O
 
 HFILES=\
 	dat.h\
 	fns.h\
-	atomic.h
 
 </sys/src/cmd/mkone
--- a/sys/src/cmd/gefs/pack.c
+++ b/sys/src/cmd/gefs/pack.c
@@ -436,6 +436,7 @@
 char*
 packsb(char *p0, int sz, Gefs *fi)
 {
+	vlong nextqid, nextgen, qgen;
 	uvlong h;
 	char *p;
 	int i;
@@ -443,6 +444,9 @@
 	assert(sz == Blksz);
 	assert(fi->narena < 512);
 	p = p0;
+	nextqid = fi->nextqid;
+	nextgen = fi->nextgen;
+	qgen = agetv(&fi->qgen);
 	memcpy(p, "gefs9.00", 8);	p += 8;
 	PACK32(p, Blksz);		p += 4;
 	PACK32(p, Bufspc);		p += 4;
@@ -455,9 +459,9 @@
 	PACK64(p, fi->snapdl.tl.addr);	p += 8;
 	PACK64(p, fi->snapdl.tl.hash);	p += 8;
 	PACK64(p, fi->flag);		p += 8;
-	PACK64(p, fi->nextqid);		p += 8;
-	PACK64(p, fi->nextgen);		p += 8;
-	PACK64(p, fi->qgen);		p += 8;
+	PACK64(p, nextqid);		p += 8;
+	PACK64(p, nextgen);		p += 8;
+	PACK64(p, qgen);		p += 8;
 	for(i = 0; i < fi->narena; i++){
 		PACK64(p, fi->arenabp[i].addr);	p += 8;
 		PACK64(p, fi->arenabp[i].hash);	p += 8;
@@ -470,6 +474,7 @@
 char*
 unpacksb(Gefs *fi, char *p0, int sz)
 {
+	vlong qgen;
 	uvlong dh, xh;
 	char *p;
 	int i;
@@ -497,7 +502,8 @@
 	fi->flag = UNPACK64(p);			p += 8;
 	fi->nextqid = UNPACK64(p);		p += 8;
 	fi->nextgen = UNPACK64(p);		p += 8;
-	fi->qgen = UNPACK64(p);	p += 8;
+	qgen = UNPACK64(p);			p += 8;
+	aswapv(&fs->qgen, qgen);
 	fi->arenabp = emalloc(fi->narena * sizeof(Bptr), 0);
 	for(i = 0; i < fi->narena; i++){
 		fi->arenabp[i].addr = UNPACK64(p);	p += 8;
--- a/sys/src/cmd/gefs/ream.c
+++ b/sys/src/cmd/gefs/ream.c
@@ -229,6 +229,7 @@
 	vlong sz, asz, off;
 	Mount *mnt, *adm;
 	Arena *a;
+	Tree *r;
 	char *utab;
 	Dir *d;
 	int i;
@@ -246,9 +247,9 @@
 	if(sz < 128*MiB+Blksz)
 		sysfatal("ream: disk too small");
 	mnt = emalloc(sizeof(Mount), 1);
-	mnt->root = mallocz(sizeof(Tree), 1);
+	aswapp(&mnt->root, mallocz(sizeof(Tree), 1));
 	adm = mallocz(sizeof(Mount), 1);
-	adm->root = mallocz(sizeof(Tree), 1);
+	aswapp(&adm->root, mallocz(sizeof(Tree), 1));
 
 	sz = sz - sz%Blksz - 2*Blksz;
 	fs->narena = (sz + 4096ULL*GiB - 1) / (4096ULL*GiB);
@@ -284,7 +285,7 @@
 		loadlog(a, a->loghd);
 	}
 
-	if((mb = newblk(mnt->root, Tleaf)) == nil)
+	if((mb = newblk(agetp(&mnt->root), Tleaf)) == nil)
 		sysfatal("ream: allocate root: %r");
 	holdblk(mb);
 	initroot(mb);
@@ -291,12 +292,14 @@
 	finalize(mb);
 	syncblk(mb);
 
-	mnt->root->ht = 1;
-	mnt->root->bp = mb->bp;
+	r = agetp(&mnt->root);
+	r->ht = 1;
+	r->bp = mb->bp;
 
-	if((ab = newblk(adm->root, Tleaf)) == nil)
+	r = agetp(&adm->root);
+	if((ab = newblk(r, Tleaf)) == nil)
 		sysfatal("ream: allocate root: %r");
-	if((ub = newdblk(adm->root, 0, 1)) == nil)
+	if((ub = newdblk(r, 0, 1)) == nil)
 		sysfatal("ream: allocate root: %r");
 	holdblk(ab);
 	holdblk(ub);
@@ -312,8 +315,8 @@
 	finalize(ab);
 	syncblk(ab);
 
-	adm->root->ht = 1;
-	adm->root->bp = ab->bp;
+	r->ht = 1;
+	r->bp = ab->bp;
 
 	/*
 	 * Now that we have a completely empty fs, give it
@@ -320,7 +323,7 @@
 	 * a single snap block that the tree will insert
 	 * into, and take a snapshot as the initial state.
 	 */
-	if((tb = newblk(mnt->root, Tleaf)) == nil)
+	if((tb = newblk(agetp(&mnt->root), Tleaf)) == nil)
 		sysfatal("ream: allocate snaps: %r");
 	holdblk(tb);
 	initsnap(tb, mb, ab);
@@ -333,7 +336,6 @@
 	fs->snapdl.hd.hash = -1;
 	fs->snapdl.tl.addr = -1;
 	fs->snapdl.tl.hash = -1;
-	fs->nextqid = Nreamqid;
 
 	dropblk(mb);
 	dropblk(ab);
--- a/sys/src/cmd/gefs/snap.c
+++ b/sys/src/cmd/gefs/snap.c
@@ -3,7 +3,6 @@
 #include <fcall.h>
 #include <avl.h>
 
-#include "atomic.h"
 #include "dat.h"
 #include "fns.h"
 
@@ -319,6 +318,7 @@
 	char *p, buf[4][Kvmax];
 	int nm, deltree;
 	Mount *mnt;
+	Tree *r;
 	Msg m[4];
 
 	nm = 0;
@@ -363,10 +363,11 @@
 	if(deltree){
 		reclaimblocks(t->gen, succ, t->pred);
 		for(mnt = agetp(&fs->mounts); mnt != nil; mnt = mnt->next){
-			if(mnt->root->gen == t->succ)
-				mnt->root->pred = t->pred;
-			if(mnt->root->gen == t->pred)
-				mnt->root->succ = t->succ;
+			r = agetp(&mnt->root);
+			if(r->gen == t->succ)
+				r->pred = t->pred;
+			if(r->gen == t->pred)
+				r->succ = t->succ;
 		}
 	}
 }
@@ -406,8 +407,8 @@
 		n->succ = -1;
 		n->pred = t->gen;
 		n->base = t->gen;
-		n->gen = aincv(&fs->nextgen, 1);
-		n->memgen = aincv(&fs->nextgen, 1);
+		n->gen = fs->nextgen++;
+		n->memgen = fs->nextgen++;
 
 		t->nref++;
 		m[i].op = Orelink;
@@ -443,8 +444,8 @@
  * list; once it's observable by a derived snapshot it must be
  * immutable.
  */
-void
-updatesnap(Tree **r, Tree *o, char *lbl, int flg)
+Tree*
+updatesnap(Tree *o, char *lbl, int flg)
 {
 	char buf[4][Kvmax];
 	Msg m[4];
@@ -452,9 +453,9 @@
 	int i;
 
 	if(!o->dirty)
-		return;
+		return o;
 
-	traceb("updatesnap", o->bp);
+	tracex("updatesnap", o->bp, o->memgen, getcallerpc(&o));
 	/* update the old kvp */
 	o->nlbl--;
 	o->nref++;
@@ -476,7 +477,7 @@
 	t->succ = -1;
 	t->base = o->base;
 	t->gen = o->memgen;
-	t->memgen = aincv(&fs->nextgen, 1);
+	t->memgen = fs->nextgen++;
 
 	i = 0;
 	m[i].op = Orelink;
@@ -504,8 +505,8 @@
 	if(o->nlbl == 0 && o->nref == 1)
 		delsnap(o, t->gen, nil);
 	closesnap(o);
-	asetp(r, t);
 	poperror();
+	return t;
 }
 
 /*
@@ -544,7 +545,7 @@
 		broke(Efs);
 	unpacktree(t, kv.v, kv.nv);
 	t->memref = 1;
-	t->memgen = aincv(&fs->nextgen, 1);
+	t->memgen = fs->nextgen++;
 	poperror();
 	return t;
 }
--- a/sys/src/cmd/gefs/tree.c
+++ b/sys/src/cmd/gefs/tree.c
@@ -3,7 +3,6 @@
 #include <fcall.h>
 #include <avl.h>
 
-#include "atomic.h"
 #include "dat.h"
 #include "fns.h"
 
--- /dev/null
+++ b/sys/src/libc/386/atomic.s
@@ -1,0 +1,46 @@
+/* get variants */
+TEXT ageti+0(SB),1,$0
+TEXT agetl+0(SB),1,$0
+TEXT agetp+0(SB),1,$0
+	MOVL	p+0(FP), AX
+	MOVL	0(AX), AX
+	RET
+
+/* set variants */
+TEXT aseti+0(SB),1,$0
+TEXT aswapl+0(SB),1,$0
+TEXT aswapp+0(SB),1,$0
+	MOVL		p+0(FP), BX
+	MOVL		v+4(FP), AX
+	LOCK; XCHGL	(BX), AX
+	RET
+
+/* inc variants */
+TEXT aincl+0(SB),1,$0
+	MOVL	p+0(FP), BX
+	MOVL	v+4(FP), CX
+	MOVL	CX, AX
+	LOCK; XADDL AX, (BX)
+	ADDL	CX, AX
+	RET
+
+/* cas variants */
+TEXT acasl+0(SB),1,$0
+TEXT acasp+0(SB),1,$0
+	MOVL	p+0(FP), CX
+	MOVL	ov+4(FP), AX
+	MOVL	nv+8(FP), DX
+	LOCK; CMPXCHGL DX, (CX)
+	JNE	fail32
+	MOVL	$1,AX
+	RET
+fail32:
+	MOVL	$0,AX
+	RET
+
+/* barriers */
+TEXT coherence+0(SB),1,$0
+	/* this is essentially mfence but that requires sse2 */
+	XORL	AX, AX
+	LOCK; XADDL AX, (SP)
+	RET
--- /dev/null
+++ b/sys/src/libc/386/atomic64.s
@@ -1,0 +1,54 @@
+/* get variants */
+TEXT agetv+0(SB),1,$0
+	MOVL	r+0(FP), AX
+	MOVL	p+4(FP), BX
+	FMOVD	(BX), F0
+	FMOVDP	F0, (AX)
+	RET
+
+/* set variants */
+TEXT aswapv+0(SB),1,$0
+	MOVL	p+4(FP), DI
+	MOVL	nv+8(FP), BX
+	MOVL	nv+12(FP), CX
+	MOVL	0(DI), AX
+	MOVL	4(DI), DX
+loop:
+	LOCK;	CMPXCHG8B (DI)
+	JNE	loop
+	MOVL	p+0(FP),DI
+	MOVL	AX, 0(DI)
+	MOVL	DX, 4(DI)
+	RET
+
+/* inc variants */
+TEXT aincv+0(SB),1,$0
+	MOVL	p+4(FP), DI
+retry:
+	MOVL	0(DI), AX
+	MOVL	4(DI), DX
+	MOVL 	AX, BX
+	MOVL	DX, CX
+	ADDL	v+8(FP), BX
+	ADCL	v+12(FP), CX
+	LOCK; CMPXCHG8B (DI)
+	JNE	retry
+	MOVL	r+0(FP), DI
+	MOVL	BX, 0x0(DI)
+	MOVL	CX, 0x4(DI)
+	RET
+
+/* cas variants */
+TEXT acasv+0(SB),1,$0
+	MOVL	p+0(FP), DI
+	MOVL	ov+4(FP), AX
+	MOVL	ov+8(FP), DX
+	MOVL	nv+12(FP), BX
+	MOVL	nv+16(FP), CX
+	LOCK; CMPXCHG8B (DI)
+	JNE	fail64
+	MOVL	$1,AX
+	RET
+fail64:
+	MOVL	$0,AX
+	RET
--- a/sys/src/libc/386/mkfile
+++ b/sys/src/libc/386/mkfile
@@ -5,6 +5,8 @@
 SFILES=\
 	argv0.s\
 	atom.s\
+	atomic.s\
+	atomic64.s\
 	getfcr.s\
 	main9.s\
 	main9p.s\
--- /dev/null
+++ b/sys/src/libc/amd64/atomic.s
@@ -1,0 +1,48 @@
+/* get variants */
+TEXT agetl+0(SB),1,$0
+	MOVL	(RARG), AX
+	RET
+TEXT agetp+0(SB),1,$0
+	MOVQ	(RARG), AX
+	RET
+
+/* set variants */
+TEXT aswapl+0(SB),1,$0
+	MOVL		v+8(FP), AX
+	LOCK; XCHGL	(RARG), AX
+	RET
+
+TEXT aswapp+0(SB),1,$0
+	MOVQ		v+8(FP), AX
+	LOCK; XCHGQ	(RARG), AX
+	RET
+
+/* inc variants */
+TEXT aincl+0(SB),1,$0
+	MOVQ		v+8(FP), BX
+	MOVQ		BX, AX
+	LOCK; XADDL	AX, (RARG)
+	ADDQ		BX, AX
+	RET
+
+/* cas variants */
+TEXT acasl+0(SB),1,$0
+	MOVL	c+8(FP), AX
+	MOVL	v+16(FP), BX
+	LOCK; CMPXCHGL	BX, (RARG)
+	SETEQ	AX
+	MOVBLZX	AX, AX
+	RET
+
+TEXT acasp+0(SB),1,$0
+	MOVQ	c+8(FP), AX
+	MOVQ	v+16(FP), BX
+	LOCK; CMPXCHGQ BX, (RARG)
+	SETEQ	AX
+	MOVBLZX	AX, AX
+	RET
+
+/* barriers */
+TEXT coherence+0(SB),1,$0
+	MFENCE
+	RET
--- /dev/null
+++ b/sys/src/libc/amd64/atomic64.s
@@ -1,0 +1,27 @@
+/* get variants */
+TEXT agetv+0(SB),1,$0
+	MOVQ	(RARG), AX
+	RET
+
+/* set variants */
+TEXT aswapv+0(SB),1,$0
+	MOVQ		v+8(FP), AX
+	LOCK; XCHGQ	(RARG), AX
+	RET
+
+/* inc variants */
+TEXT aincv+0(SB),1,$0
+	MOVQ		v+8(FP), BX
+	MOVQ		BX, AX
+	LOCK; XADDQ	AX, (RARG)
+	ADDQ		BX, AX
+	RET
+
+/* cas variants */
+TEXT acasv+0(SB),1,$0
+	MOVQ	c+8(FP), AX
+	MOVQ	v+16(FP), BX
+	LOCK; CMPXCHGQ BX, (RARG)
+	SETEQ	AX
+	MOVBLZX	AX, AX
+	RET
--- a/sys/src/libc/amd64/mkfile
+++ b/sys/src/libc/amd64/mkfile
@@ -5,6 +5,8 @@
 SFILES=\
 	argv0.s\
 	atom.s\
+	atomic.s\
+	atomic64.s\
 	cycles.s\
 	getfcr.s\
 	main9.s\
--- /dev/null
+++ b/sys/src/libc/arm/atomic.s
@@ -1,0 +1,60 @@
+#define ISH	(2<<2|3)
+
+/* get variants */
+TEXT agetl+0(SB),1,$0
+TEXT agetp+0(SB),1,$0
+	MOVW	$0, R5
+	MOVW	(R0), R0
+	DMB	$ISH
+	RET
+
+/* set variants */
+TEXT aswapl+0(SB),1,$0
+TEXT aswapp+0(SB),1,$0
+	MOVW	new+4(FP), R1
+	MOVW	(R0), R2
+	DMB	$ISH
+	MOVW	R1, (R0)
+	MOVW	R2, R0
+	DMB	$ISH
+	RET
+
+/* inc variants */
+TEXT aincl+0(SB),1,$0
+	MOVW	delta+4(FP), R1
+_aincl:
+	LDREX	(R0), R3
+	ADD	R1, R3
+	STREX	R3, (R0), R4
+	CMP	$0, R4
+	BNE	_aincl
+	MOVW	R3, R0
+	DMB	$ISH
+	RET
+
+/* cas variants */
+TEXT acasl+0(SB),1,$0
+TEXT acasp+0(SB),1,$0
+	MOVW	old+4(FP), R1
+	MOVW	new+8(FP), R2
+	DMB	$ISH
+_acasl:
+	LDREX	(R0), R3
+	CMP	R1, R3
+	BNE	_acaslf
+	STREX	R2, (R0), R4
+	CMP	$0, R4
+	BNE	_acasl
+	MOVW	$1, R0
+	DMB	$ISH
+	RET
+_acaslf:
+	CLREX
+	MOVW	$0, R0
+	DMB	$ISH
+	RET
+
+/* barriers */
+TEXT coherence+0(SB),1,$0
+	DMB	$ISH
+	RET
--- a/sys/src/libc/arm/mkfile
+++ b/sys/src/libc/arm/mkfile
@@ -5,6 +5,7 @@
 SFILES=\
 	argv0.s\
 	atom.s\
+	atomic.s\
 	div.s\
 	getcallerpc.s\
 	getfcr.s\
--- /dev/null
+++ b/sys/src/libc/arm64/atomic.s
@@ -1,0 +1,90 @@
+#define ISH	(2<<2|3)
+
+/* get variants */
+TEXT agetl+0(SB),1,$0
+	LDARW	(R0), R0
+	DMB	$ISH
+	RETURN
+
+TEXT agetp+0(SB),1,$0
+	LDAR	(R0), R0
+	DMB	$ISH
+	RETURN
+
+/* set variants */
+TEXT aswapl+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	R0, R2
+_setl:
+	LDXRW	(R2), R0
+	STXRW	R1, (R2), R3
+	CBNZW	R3, _setl
+	DMB	$ISH
+	RETURN
+
+TEXT aswapp+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	R0, R2
+_setp:
+	LDXR	(R2), R0
+	STXR	R1, (R2), R3
+	CBNZW	R3, _setp
+	DMB	$ISH
+	RETURN
+
+/* inc variants */
+TEXT aincl+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	R0, R2
+_incl:
+	LDXRW	(R2), R0
+	ADDW	R1, R0, R3
+	STXRW	R3, (R2), R4
+	CBNZW	R4, _incl
+	DMB	$ISH
+	MOVW	R3, R0
+	RETURN
+
+/* cas variants */
+TEXT acasl+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	0x10(FP), R2
+	DMB	$ISH
+_casl:
+	LDXRW	(R0), R3
+	CMPW	R1, R3
+	BNE	_caslf
+	STXRW	R2, (R0), R4
+	CBNZ	R4, _casl
+	MOV	$1, R0
+	DMB	$ISH
+	RETURN
+_caslf:
+	CLREX
+	MOV	$0, R0
+	DMB	$ISH
+	RETURN
+
+TEXT acasp+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	0x10(FP), R2
+	DMB	$ISH
+_casp:
+	LDXR	(R0), R3
+	CMP	R1, R3
+	BNE	_caspf
+	STXR	R2, (R0), R4
+	CBNZW	R4, _casp
+	MOV	$1, R0
+	DMB	$ISH
+	RETURN
+_caspf:
+	CLREX
+	MOV	$0, R0
+	DMB	$ISH
+	RETURN
+
+/* barriers */
+TEXT coherence+0(SB),1,$0
+	DMB	$ISH
+	RETURN
--- /dev/null
+++ b/sys/src/libc/arm64/atomic64.s
@@ -1,0 +1,51 @@
+#define ISH	(2<<2|3)
+
+/* get variants */
+TEXT agetv+0(SB),1,$0
+	LDAR	(R0), R0
+	DMB	$ISH
+	RETURN
+
+/* set variants */
+TEXT aswapv+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	R0, R2
+_setv:
+	LDXR	(R2), R0
+	STXR	R1, (R2), R3
+	CBNZW	R3, _setv
+	DMB	$ISH
+	RETURN
+
+/* inc variants */
+TEXT aincv+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	R0, R2
+_incv:
+	LDXR	(R2), R0
+	ADD	R1, R0, R3
+	STXR	R3, (R2), R4
+	CBNZW	R4, _incv
+	DMB	$ISH
+	MOV	R3, R0
+	RETURN
+
+/* cas variants */
+TEXT acasv+0(SB),1,$0
+	MOV	0x08(FP), R1
+	MOV	0x10(FP), R2
+	DMB	$ISH
+_casv:
+	LDXR	(R0), R3
+	CMP	R1, R3
+	BNE	_casvf
+	STXR	R2, (R0), R4
+	CBNZW	R4, _casv
+	MOV	$1, R0
+	DMB	$ISH
+	RETURN
+_casvf:
+	CLREX
+	MOV	$0, R0
+	DMB	$ISH
+	RETURN
--- a/sys/src/libc/arm64/mkfile
+++ b/sys/src/libc/arm64/mkfile
@@ -6,6 +6,8 @@
 SFILES=\
 	argv0.s\
 	atom.s\
+	atomic.s\
+	atomic64.s\
 	cycles.s\
 	fabs.s\
 	floor.s\
--- /dev/null
+++ b/sys/src/libc/mips/atomic.s
@@ -1,0 +1,55 @@
+/* get variants */
+TEXT agetl+0(SB),1,$0
+TEXT agetp+0(SB),1,$0
+	SYNC
+	LL	(R1), R1
+	SYNC
+	RET
+
+/* set variants */
+TEXT aswapl+0(SB),1,$0
+TEXT aswapp+0(SB),1,$0
+	MOVW	new+4(FP), R2
+	SYNC
+_aswapl:
+	LL	(R1), R3
+	MOVW	R2, R4
+	SC	R4, (R1)
+	BEQ	R4, _aswapl
+	SYNC
+	MOVW	R3, R1
+	RET
+
+/* inc variants */
+TEXT aincl+0(SB),1,$0
+	MOVW	delta+4(FP), R2
+	SYNC
+_aincl:
+	LL	(R1), R3
+	ADD	R3, R2, R4
+	MOVW	R4, R5
+	SC	R5, (R1)
+	BEQ	R5, _aincl
+	SYNC
+	MOVW	R4, R1
+	RET
+
+/* cas variants */
+TEXT acasl+0(SB),1,$0
+TEXT acasp+0(SB),1,$0
+	MOVW	old+4(FP), R2
+	MOVW	new+8(FP), R3
+	SYNC
+_acasl:
+	LL	(R1), R4
+	BNE	R4, R2, _acaslf
+	MOVW	R3, R5
+	SC	R5, (R1)
+	BEQ	R5, _acasl
+	SYNC
+	MOVW	$1, R1
+	RET
+_acaslf:
+	SYNC
+	MOVW	$0, R1
+	RET
--- a/sys/src/libc/mips/mkfile
+++ b/sys/src/libc/mips/mkfile
@@ -5,6 +5,7 @@
 SFILES=\
 	argv0.s\
 	atom.s\
+	atomic.s\
 	getcallerpc.s\
 	getfcr.s\
 	main9.s\
--- /dev/null
+++ b/sys/src/libc/port/atomic64.c
@@ -1,0 +1,82 @@
+#include <u.h>
+#include <libc.h>
+
+static Lock locktab[128];
+
+static u32int
+ihash(void *p)
+{
+	uintptr x = (uintptr)p;
+
+	/* constants from splitmix32 rng */
+	x = (x ^ (x >> 16)) * 0x85ebca6b;
+	x = (x ^ (x >> 13)) * 0xc2b2ae35;
+	x = (x ^ (x >> 16));
+	return x & (nelem(locktab)-1);
+}
+
+#define GET(A, T, n) \
+	T n(A *p)			\
+	{				\
+		uintptr h;		\
+		T r;			\
+					\
+		h = ihash(p);		\
+		lock(&locktab[h]);	\
+		r = p->v;		\
+		unlock(&locktab[h]);	\
+		return r;		\
+	}
+
+#define SET(A, T, n) \
+	T n(A *p, T v)			\
+	{				\
+		uintptr h;		\
+		T r;			\
+					\
+		h = ihash(p);		\
+		lock(&locktab[h]);	\
+		r = p->v;		\
+		p->v = v;		\
+		unlock(&locktab[h]);	\
+		return r;		\
+	}
+
+#define INC(A, T, n) \
+	T n(A *p, T dv)			\
+	{				\
+		uintptr h;		\
+		T r;			\
+					\
+		h = ihash(p);		\
+		lock(&locktab[h]);	\
+		p->v += dv;		\
+		r = p->v;		\
+		unlock(&locktab[h]);	\
+		return r;		\
+	}
+
+#define CAS(A, T, n) \
+	int n(A *p, T ov, T nv)		\
+	{				\
+		uintptr h;		\
+		int r;			\
+					\
+		h = ihash(p);		\
+		lock(&locktab[h]);	\
+		if(p->v == ov){		\
+			p->v = nv;	\
+			r = 1;		\
+		}else			\
+			r = 0;		\
+		unlock(&locktab[h]);	\
+		return r;		\
+	}
+
+GET(Avlong, vlong, agetv)
+
+SET(Avlong, vlong, aswapv)
+
+INC(Avlong, vlong, aincv)
+
+CAS(Avlong, vlong, acasv)
--- a/sys/src/libc/port/mkfile
+++ b/sys/src/libc/port/mkfile
@@ -12,6 +12,7 @@
 	atof.c\
 	atol.c\
 	atoll.c\
+	atomic64.c\
 	calloc.c\
 	cistrcmp.c\
 	cistrncmp.c\
--- a/sys/src/libc/port/reduce
+++ b/sys/src/libc/port/reduce
@@ -2,7 +2,7 @@
 shift
 objtype=$1
 shift
-if(ls -p ../$objtype/*.[cs] >[2]/dev/null | sed 's/..$//;s/^/^/' > /tmp/reduce.$pid) {
+if(ls -p ../$objtype/*.[cs] >[2]/dev/null | sed 's/..$/\$/;s/^/^/' > /tmp/reduce.$pid) {
 	echo $* | tr ' ' \012 | grep -v -f /tmp/reduce.$pid | tr \012 ' '
 }
 if not {
@@ -10,3 +10,4 @@
 	echo $*
 }
 rm /tmp/reduce.$pid
+
--- /dev/null
+++ b/sys/src/libc/power/atomic.s
@@ -1,0 +1,58 @@
+/* get variants */
+TEXT agetl+0(SB),1,$0
+TEXT agetp+0(SB),1,$0
+	SYNC
+	LWAR	(R3), R3
+	CMP	R3, R3
+	BNE	-1(PC)
+	ISYNC
+	RETURN
+
+/* set variants */
+TEXT aswapl+0(SB),1,$0
+TEXT aswapp+0(SB),1,$0
+	MOVW	R3, R4
+	MOVW	val+4(FP), R5
+	SYNC
+_aswapl:
+	LWAR	(R4), R3
+	STWCCC	R5, (R4)
+	BNE	_aswapl
+	RETURN
+
+/* inc variants */
+TEXT aincl+0(SB),1,$0
+	MOVW	R3, R4
+	MOVW	delta+4(FP), R5
+	SYNC
+_aincl:
+	LWAR	(R4), R3
+	ADD	R5, R3
+	STWCCC	R3, (R4)
+	BNE	_aincl
+	RETURN
+
+/* cas variants */
+TEXT acasl+0(SB),1,$0
+TEXT acasp+0(SB),1,$0
+	MOVW	old+4(FP), R4
+	MOVW	new+8(FP), R5
+	SYNC
+_casl:
+	LWAR	(R3), R6
+	CMP	R6, R4
+	BNE	_caslf
+	STWCCC	R5, (R3)
+	BNE	_casl
+	MOVW	$1, R3
+	SYNC
+	RETURN
+_caslf:
+	SYNC
+	AND	R0, R3
+	RETURN
+
+/* barriers */
+TEXT coherence+0(SB),1,$0
+	SYNC
+	RETURN
--- a/sys/src/libc/power/mkfile
+++ b/sys/src/libc/power/mkfile
@@ -5,6 +5,7 @@
 SFILES=\
 	argv0.s\
 	atom.s\
+	atomic.s\
 	cycles.s\
 	getcallerpc.s\
 	getfcr.s\
--- /dev/null
+++ b/sys/src/libc/power64/atomic.s
@@ -1,0 +1,91 @@
+/* get variants */
+TEXT agetl+0(SB),1,$0
+	SYNC
+	LWAR	(RARG), RARG
+	CMPW	RARG, RARG
+	BNE	-1(PC)
+	ISYNC
+	RETURN
+
+TEXT agetp+0(SB),1,$0
+	SYNC
+	LDAR	(RARG), RARG
+	CMP	RARG, RARG
+	BNE	-1(PC)
+	ISYNC
+	RETURN
+
+/* set variants */
+TEXT aswapl+0(SB),1,$0
+	MOVD	RARG, R4
+	MOVW	val+8(FP), R5
+	SYNC
+_aswapl:
+	LWAR	(R4), RARG
+	STWCCC	R5, (R4)
+	BNE	_aswapl
+	RETURN
+
+TEXT aswapp+0(SB),1,$0
+	MOVD	RARG, R4
+	MOVD	val+8(FP), R5
+	SYNC
+_aswapp:
+	LDAR	(R4), RARG
+	STDCCC	R5, (R4)
+	BNE	_aswapp
+	RETURN
+
+/* inc variants */
+TEXT aincl+0(SB),1,$0
+	MOVD	RARG, R4
+	MOVW	delta+8(FP), R5
+	LWSYNC
+_aincl:
+	LWAR	(R4), RARG
+	ADD	R5, RARG
+	STWCCC	RARG, (R4)
+	BNE	_aincl
+	RETURN
+
+/* cas variants */
+TEXT acasl+0(SB),1,$0
+	MOVWZ	old+8(FP), R4
+	MOVWZ	new+16(FP), R5
+	LWSYNC
+_casl:
+	LWAR	(RARG), R6
+	CMPW	R6, R4
+	BNE	_caslf
+	STWCCC	R5, (RARG)
+	BNE	_casl
+	MOVD	$1, RARG
+	LWSYNC
+	RETURN
+_caslf:
+	LWSYNC
+	AND	R0, RARG
+	RETURN
+
+TEXT acasp+0(SB),1,$0
+	MOVD	old+8(FP), R4
+	MOVD	new+16(FP), R5
+	LWSYNC
+_casp:
+	LDAR	(RARG), R6
+	CMP	R6, R4
+	BNE	_caspf
+	STDCCC	R5, (RARG)
+	BNE	_casp
+	MOVD	$1, RARG
+	LWSYNC
+	RETURN
+_caspf:
+	LWSYNC
+	AND	R0, RARG
+	RETURN
+
+/* barriers */
+TEXT coherence+0(SB),1,$0
+	SYNC
+	RETURN
--- /dev/null
+++ b/sys/src/libc/power64/atomic64.s
@@ -1,0 +1,50 @@
+/* get variants */
+TEXT agetv+0(SB),1,$0
+	SYNC
+	LDAR	(RARG), RARG
+	CMP	RARG, RARG
+	BNE	-1(PC)
+	ISYNC
+	RETURN
+
+/* set variants */
+TEXT aswapv+0(SB),1,$0
+	MOVD	RARG, R4
+	MOVD	val+8(FP), R5
+	SYNC
+_aswapv:
+	LDAR	(R4), RARG
+	STDCCC	R5, (R4)
+	BNE	_aswapv
+	RETURN
+
+/* inc variants */
+TEXT aincv+0(SB),1,$0
+	MOVD	RARG, R4
+	MOVD	delta+8(FP), R5
+	LWSYNC
+_aincl:
+	LDAR	(R4), RARG
+	ADD	R5, RARG
+	STDCCC	RARG, (R4)
+	BNE	_aincl
+	RETURN
+
+/* cas variants */
+TEXT acasv+0(SB),1,$0
+	MOVD	old+8(FP), R4
+	MOVD	new+16(FP), R5
+	LWSYNC
+_casv:
+	LDAR	(RARG), R6
+	CMP	R6, R4
+	BNE	_casvf
+	STDCCC	R5, (RARG)
+	BNE	_casv
+	MOVD	$1, RARG
+	LWSYNC
+	RETURN
+_casvf:
+	LWSYNC
+	AND	R0, RARG
+	RETURN
--- a/sys/src/libc/power64/mkfile
+++ b/sys/src/libc/power64/mkfile
@@ -4,11 +4,13 @@
 LIB=/$objtype/lib/libc.a
 SFILES=\
 	argv0.s\
+	atom.s\
+	atomic.s\
+	atomic64.s\
 	cycles.s\
 	getcallerpc.s\
 	getfcr.s\
 	main9.s\
-	atom.s\
 	main9p.s\
 	setjmp.s\
 	tas.s\
--- a/sys/src/libc/spim/mkfile
+++ b/sys/src/libc/spim/mkfile
@@ -5,6 +5,7 @@
 MIPSSFILES=\
 	argv0.s\
 	atom.s\
+	atomic.s\
 	getcallerpc.s\
 	getfcr.s\
 	main9.s\
--- /dev/null
+++ b/sys/src/libc/test/atomic.c
@@ -1,0 +1,156 @@
+#include <u.h>
+#include <libc.h>
+
+Along nextid;
+
+Along	counter;
+Along	done0;
+Along	done1;
+long	val0;
+long	val1;
+
+int
+spawn(void (*f)(void*), void *p)
+{
+	int pid;
+
+	pid = rfork(RFMEM|RFPROC);
+	switch(pid){
+	case -1:
+		sysfatal("rfork");
+	case 0:
+		f(p);
+		exits("spawn");
+	default:
+		return pid;
+	}
+}
+
+void
+inc(void *p)
+{
+	int i;
+
+	for(i = 0; i < 10000; i++)
+		aincl(p, 1);
+}
+
+void
+dec(void *p)
+{
+	int i;
+
+	for(i = 0; i < 10000; i++)
+		aincl(p, -1);
+}
+
+void
+casinc(void *p)
+{
+	int i;
+	long v;
+
+	for(i = 0; i < 10000; i++) do{
+		v = agetl(p);
+	}while(!acasl(p, v, v+1));
+}
+
+void
+casdec(void *p)
+{
+	int i;
+	long v;
+
+	for(i = 0; i < 10000; i++) do{
+		v = agetl(p);
+	}while(!acasl(p, v, v-1));
+}
+
+void
+toggle0(void*)
+{
+	val0 = 1;
+	aswapl(&done0, 1);
+	while(agetl(&done1) != 1)
+		/* wait */;
+	assert(val1 == 1);
+}
+
+void
+toggle1(void*)
+{
+	val1 = 1;
+	aswapl(&done1, 1);
+	while(agetl(&done0) != 1)
+		/* wait */;
+	assert(val0 == 1);
+}
+
+void
+main(void)
+{
+	Along l;
+	Avlong v;
+	Aptr p;
+	int i;
+
+	/* smoke test: does it work at all */
+	aswapl(&l, 1);
+	assert(agetl(&l) == 1);
+	assert(aincl(&l, 1) == 2);
+	assert(aincl(&l, -1) == 1);
+	assert(acasl(&l, 42, 123) == 0);
+	assert(agetl(&l) == 1);
+	assert(aswapl(&l, 77) == 1);
+	assert(acasl(&l, 77, 42) == 1);
+	assert(agetl(&l) == 42);
+
+	aswapv(&v, 1);
+	assert(agetv(&v) == 1);
+	assert(aincv(&v, 1) == 2);
+	assert(aincv(&v, -1) == 1);
+	assert(acasv(&v, 42, 123) == 0);
+	assert(agetv(&v) == 1);
+	assert(aswapv(&v, 77) == 1);
+	assert(acasv(&v, 77, 42) == 1);
+	assert(agetv(&v) == 42);
+
+	aswapp(&p, &v);
+	assert(agetp(&p) == &v);
+	assert(acasp(&p, &l, &i) == 0);
+	assert(agetp(&p) == &v);
+	assert(acasp(&p, &v, &i) == 1);
+	assert(agetp(&p) == &i);
+
+	/* do our counters look atomic */
+	for(i = 0; i < 10; i++){
+		spawn(inc, &counter);
+		spawn(dec, &counter);
+	}
+	for(i = 0; i < 10; i++){
+		free(wait());
+		free(wait());
+	}
+	assert(agetl(&counter) == 0);
+
+	/* how about when cas'ing */
+	for(i = 0; i < 1000; i++){
+		spawn(casinc, &counter);
+		spawn(casdec, &counter);
+	}
+	for(i = 0; i < 1000; i++){
+		free(wait());
+		free(wait());
+	}
+	assert(agetl(&counter) == 0);
+
+	/* do the atomics act as barriers? */
+	for(i = 0; i < 10000; i++){
+		spawn(toggle0, &counter);
+		spawn(toggle1, &counter);
+		free(wait());
+		free(wait());
+	}
+	assert(agetl(&counter) == 0);
+	exits(nil);
+}
--- a/sys/src/libc/test/mkfile
+++ b/sys/src/libc/test/mkfile
@@ -1,6 +1,7 @@
 </$objtype/mkfile
 
 TEST=\
+	atomic\
 	date\
 	pow\
 	runebreak\
--