ref: ba327e76c5b110044ec8ebb75630e79506dfbed8
parent: 1be291400068cd14762c723c7ecb582bd5c0ffe0
author: Runxi Yu <runxiyu@umich.edu>
date: Tue Jan 6 08:01:54 EST 2026
adler32: Drop SSE3 support
--- a/internal/adler32/adler32_amd64.go
+++ b/internal/adler32/adler32_amd64.go
@@ -15,7 +15,6 @@
const Size = 4
var (
- hasSSE3 = cpu.X86.HasSSE3
hasAVX2 = cpu.X86.HasAVX2
)
@@ -27,7 +26,7 @@
// New returns a new hash.Hash32 computing the Adler-32 checksum.
func New() hash.Hash32 {- if !hasSSE3 {+ if !hasAVX2 {return adler32.New()
}
d := new(digest)
@@ -60,11 +59,7 @@
func (d *digest) Write(data []byte) (nn int, err error) { if len(data) >= 64 {var h uint32
- if hasAVX2 {- h = adler32_avx2(uint32(*d), data)
- } else {- h = adler32_sse3(uint32(*d), data)
- }
+ h = adler32_avx2(uint32(*d), data)
*d = digest(h)
} else {h := update(uint32(*d), data)
@@ -82,12 +77,9 @@
// Checksum returns the Adler-32 checksum of data.
func Checksum(data []byte) uint32 {- if !hasSSE3 || len(data) < 64 {+ if !hasAVX2 || len(data) < 64 {return update(1, data)
}
- if hasAVX2 {- return adler32_avx2(1, data)
- }
- return adler32_sse3(1, data)
+ return adler32_avx2(1, data)
}
--- a/internal/adler32/adler32_sse3.go
+++ /dev/null
@@ -1,6 +1,0 @@
-//go:build !purego && amd64
-
-package adler32
-
-//go:noescape
-func adler32_sse3(in uint32, buf []byte) uint32
--- a/internal/adler32/adler32_sse3.s
+++ /dev/null
@@ -1,214 +1,0 @@
-//go:build !purego && amd64
-
-#include "textflag.h"
-
-DATA weights_17_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
-DATA weights_17_32<>+0x08(SB)/8, $0x1112131415161718
-GLOBL weights_17_32<>(SB), (RODATA|NOPTR), $16
-
-DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001
-DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001
-GLOBL ones_u16<>(SB), (RODATA|NOPTR), $16
-
-DATA weights_1_16<>+0x00(SB)/8, $0x090a0b0c0d0e0f10
-DATA weights_1_16<>+0x08(SB)/8, $0x0102030405060708
-GLOBL weights_1_16<>(SB), (RODATA|NOPTR), $16
-
-TEXT ·adler32_sse3(SB), NOSPLIT, $0-36
- MOVLQZX in+0(FP), DI
- MOVQ buf_base+8(FP), SI
- MOVQ buf_len+16(FP), DX
- MOVQ buf_cap+24(FP), CX
- NOP
- NOP
- NOP
- WORD $0xf889
- LONG $0xc8b70f44
- WORD $0xe8c1; BYTE $0x10
- WORD $0xd189
- WORD $0xe183; BYTE $0x1f
- CMPQ DX, $0x20
- JAE block_loop_setup
- WORD $0x8944; BYTE $0xcf
- JMP tail_entry
-
-block_loop_setup:
- SHRQ $0x5, DX
- LONG $0xc0ef0f66
- MOVO weights_17_32<>(SB), X1
- MOVO ones_u16<>(SB), X2
- MOVO weights_1_16<>(SB), X3
- LONG $0x8071b841; WORD $0x8007
-
-block_outer_loop:
- CMPQ DX, $0xad
- LONG $0x00adba41; WORD $0x0000
- LONG $0xd2420f4c
- WORD $0x8944; BYTE $0xcf
- LONG $0xfaaf0f41
- LONG $0xef6e0f66
- LONG $0xe06e0f66
- WORD $0x8944; BYTE $0xd0
- LONG $0xf6ef0f66
-
-block_inner_loop:
- LONG $0x3e6f0ff3
- LONG $0x6f0f4466; BYTE $0xc7
- LONG $0x04380f66; BYTE $0xf9
- LONG $0xfaf50f66
- LONG $0xfcfe0f66
- LONG $0x666f0ff3; BYTE $0x10
- LONG $0xeefe0f66
- LONG $0xf60f4466; BYTE $0xc0
- LONG $0xfe0f4466; BYTE $0xc6
- LONG $0xf46f0f66
- LONG $0xf0f60f66
- LONG $0xfe0f4166; BYTE $0xf0
- LONG $0x04380f66; BYTE $0xe3
- LONG $0xe2f50f66
- LONG $0xe7fe0f66
- ADDQ $0x20, SI
- WORD $0xc8ff
- JNE block_inner_loop
- LONG $0xf5720f66; BYTE $0x05
- LONG $0xe5fe0f66
- LONG $0xee700f66; BYTE $0xb1
- LONG $0xeefe0f66
- LONG $0xf5700f66; BYTE $0xee
- LONG $0xf5fe0f66
- LONG $0xf77e0f66
- WORD $0x0144; BYTE $0xcf
- LONG $0xec700f66; BYTE $0xb1
- LONG $0xecfe0f66
- LONG $0xe5700f66; BYTE $0xee
- LONG $0xe5fe0f66
- LONG $0xe07e0f66
- MOVQ DI, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00
- WORD $0x2944; BYTE $0xcf
- MOVQ AX, R9
- IMULQ R8, R9
- SHRQ $0x2f, R9
- LONG $0xf1c96945; WORD $0x00ff; BYTE $0x00
- WORD $0x2944; BYTE $0xc8
- WORD $0x8941; BYTE $0xf9
- SUBQ R10, DX
- JNE block_outer_loop
-
-tail_entry:
- WORD $0x8548; BYTE $0xc9
- JE return_result
- CMPL CX, $0x10
- JB tail_bytes_setup
- WORD $0xb60f; BYTE $0x16
- WORD $0xd701
- WORD $0xf801
- LONG $0x0156b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x027eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0356b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x047eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0556b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x067eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0756b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x087eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0956b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x0a7eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0b56b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x0c7eb60f
- WORD $0xd701
- WORD $0xf801
- LONG $0x0d56b60f
- WORD $0xfa01
- WORD $0xd001
- LONG $0x46b60f44; BYTE $0x0e
- WORD $0x0141; BYTE $0xd0
- WORD $0x0144; BYTE $0xc0
- LONG $0x0f7eb60f
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- ADDQ $-0x10, CX
- JE final_reduce
- ADDQ $0x10, SI
-
-tail_bytes_setup:
- LEAQ -0x1(CX), DX
- MOVQ CX, R9
- ANDQ $0x3, R9
- JE tail_dword_setup
- XORL R8, R8
-
-tail_byte_loop:
- LONG $0x14b60f46; BYTE $0x06
- WORD $0x0144; BYTE $0xd7
- WORD $0xf801
- INCQ R8
- CMPQ R9, R8
- JNE tail_byte_loop
- ADDQ R8, SI
- SUBQ R8, CX
-
-tail_dword_setup:
- CMPQ DX, $0x3
- JB final_reduce
- XORL DX, DX
-
-tail_dword_loop:
- LONG $0x04b60f44; BYTE $0x16
- WORD $0x0141; BYTE $0xf8
- WORD $0x0144; BYTE $0xc0
- LONG $0x167cb60f; BYTE $0x01
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- LONG $0x44b60f44; WORD $0x0216
- WORD $0x0141; BYTE $0xf8
- WORD $0x0144; BYTE $0xc0
- LONG $0x167cb60f; BYTE $0x03
- WORD $0x0144; BYTE $0xc7
- WORD $0xf801
- ADDQ $0x4, DX
- CMPQ CX, DX
- JNE tail_dword_loop
-
-final_reduce:
- LONG $0x000f8f8d; WORD $0xffff
- CMPL DI, $0xfff1
- WORD $0x420f; BYTE $0xcf
- WORD $0xc289
- LONG $0x078071be; BYTE $0x80
- IMULQ DX, SI
- SHRQ $0x2f, SI
- LONG $0xfff1d669; WORD $0x0000
- WORD $0xd029
- WORD $0xcf89
-
-return_result:
- WORD $0xe0c1; BYTE $0x10
- WORD $0xf809
- NOP
- NOP
- MOVL AX, ret+32(FP)
- RET
--
⑨