ref: e10e93451a5c77dbd68748ebda4752956ba6357d
parent: 521281c4ce23a8b69ca7c30d5c852e18d6847912
author: Ori Bernstein <ori@eigenstate.org>
date: Tue Feb 25 11:02:21 EST 2025
gefs: optimize hash function Before: hash 1000000 N 1724 ns/op 6604 cy/op After: hash 1000000 N 1199 ns/op 4596 cy/op Tested on a Ryzen 7.
--- a/sys/src/cmd/gefs/hash.c
+++ b/sys/src/cmd/gefs/hash.c
@@ -45,43 +45,44 @@
uvlong
metrohash64_1(void * key, u64int len, u32int seed)
{
- static const u64int k0 = 0xC83A91E1;
- static const u64int k1 = 0x8648DBDB;
- static const u64int k2 = 0x7BDEC03B;
- static const u64int k3 = 0x2F5870A5;
+ enum {
+ k0 = 0xC83A91E1ULL,
+ k1 = 0x8648DBDBULL,
+ k2 = 0x7BDEC03BULL,
+ k3 = 0x2F5870A5ULL,
+ };
const uchar * ptr = key;
const uchar * const end = ptr + len;
+ u64int v0, v1, v2, v3, t;
u64int hash = ((((u64int) seed) + k2) * k0) + len;
if(len >= 32){
- u64int v[4];
- v[0] = hash;
- v[1] = hash;
- v[2] = hash;
- v[3] = hash;
+ v0 = hash;
+ v1 = hash;
+ v2 = hash;
+ v3 = hash;
do{
- v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
- v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3];
- v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0];
- v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1];
- }
- while(ptr <= (end - 32));
+ v0 += read_u64(ptr) * k0; v0 = rotate_right(v0, 29) + v2; ptr += 8;
+ v1 += read_u64(ptr) * k1; v1 = rotate_right(v1, 29) + v3; ptr += 8;
+ v2 += read_u64(ptr) * k2; v2 = rotate_right(v2, 29) + v0; ptr += 8;
+ v3 += read_u64(ptr) * k3; v3 = rotate_right(v3, 29) + v1; ptr += 8;
+ }while(ptr <= (end - 32));
- v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1;
- v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0;
- v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1;
- v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
- hash += v[0] ^ v[1];
+ t = ((v0 + v3) * k0) + v1; v2 ^= rotate_right(t, 33) * k1;
+ t = ((v1 + v2) * k1) + v0; v3 ^= rotate_right(t, 33) * k0;
+ t = ((v0 + v2) * k0) + v3; v0 ^= rotate_right(t, 33) * k1;
+ t = ((v1 + v3) * k1) + v2; v1 ^= rotate_right(t, 33) * k0;
+ hash += v0 ^ v1;
}
if((end - ptr) >= 16){
- u64int v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1;
- u64int v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2;
- v0 ^= rotate_right(v0 * k0, 35) + v1;
- v1 ^= rotate_right(v1 * k3, 35) + v0;
+ v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0, 33) * k1;
+ v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1, 33) * k2;
+ t = v0 * k0; v0 ^= rotate_right(t, 35) + v1;
+ t = v1 * k3; v1 ^= rotate_right(t, 35) + v0;
hash += v1;
}
--
⑨