shithub: furgit

Download patch

ref: ba327e76c5b110044ec8ebb75630e79506dfbed8
parent: 1be291400068cd14762c723c7ecb582bd5c0ffe0
author: Runxi Yu <runxiyu@umich.edu>
date: Tue Jan 6 08:01:54 EST 2026

adler32: Drop SSE3 support

--- a/internal/adler32/adler32_amd64.go
+++ b/internal/adler32/adler32_amd64.go
@@ -15,7 +15,6 @@
 const Size = 4
 
 var (
-	hasSSE3 = cpu.X86.HasSSE3
 	hasAVX2 = cpu.X86.HasAVX2
 )
 
@@ -27,7 +26,7 @@
 
 // New returns a new hash.Hash32 computing the Adler-32 checksum.
 func New() hash.Hash32 {
-	if !hasSSE3 {
+	if !hasAVX2 {
 		return adler32.New()
 	}
 	d := new(digest)
@@ -60,11 +59,7 @@
 func (d *digest) Write(data []byte) (nn int, err error) {
 	if len(data) >= 64 {
 		var h uint32
-		if hasAVX2 {
-			h = adler32_avx2(uint32(*d), data)
-		} else {
-			h = adler32_sse3(uint32(*d), data)
-		}
+		h = adler32_avx2(uint32(*d), data)
 		*d = digest(h)
 	} else {
 		h := update(uint32(*d), data)
@@ -82,12 +77,9 @@
 
 // Checksum returns the Adler-32 checksum of data.
 func Checksum(data []byte) uint32 {
-	if !hasSSE3 || len(data) < 64 {
+	if !hasAVX2 || len(data) < 64 {
 		return update(1, data)
 	}
 
-	if hasAVX2 {
-		return adler32_avx2(1, data)
-	}
-	return adler32_sse3(1, data)
+	return adler32_avx2(1, data)
 }
--- a/internal/adler32/adler32_sse3.go
+++ /dev/null
@@ -1,6 +1,0 @@
-//go:build !purego && amd64
-
-package adler32
-
-//go:noescape
-func adler32_sse3(in uint32, buf []byte) uint32
--- a/internal/adler32/adler32_sse3.s
+++ /dev/null
@@ -1,214 +1,0 @@
-//go:build !purego && amd64
-
-#include "textflag.h"
-
-DATA weights_17_32<>+0x00(SB)/8, $0x191a1b1c1d1e1f20
-DATA weights_17_32<>+0x08(SB)/8, $0x1112131415161718
-GLOBL weights_17_32<>(SB), (RODATA|NOPTR), $16
-
-DATA ones_u16<>+0x00(SB)/8, $0x0001000100010001
-DATA ones_u16<>+0x08(SB)/8, $0x0001000100010001
-GLOBL ones_u16<>(SB), (RODATA|NOPTR), $16
-
-DATA weights_1_16<>+0x00(SB)/8, $0x090a0b0c0d0e0f10
-DATA weights_1_16<>+0x08(SB)/8, $0x0102030405060708
-GLOBL weights_1_16<>(SB), (RODATA|NOPTR), $16
-
-TEXT ·adler32_sse3(SB), NOSPLIT, $0-36
-	MOVLQZX in+0(FP), DI
-	MOVQ    buf_base+8(FP), SI
-	MOVQ    buf_len+16(FP), DX
-	MOVQ    buf_cap+24(FP), CX
-	NOP
-	NOP
-	NOP
-	WORD    $0xf889
-	LONG    $0xc8b70f44
-	WORD    $0xe8c1; BYTE $0x10
-	WORD    $0xd189
-	WORD    $0xe183; BYTE $0x1f
-	CMPQ    DX, $0x20
-	JAE     block_loop_setup
-	WORD    $0x8944; BYTE $0xcf
-	JMP     tail_entry
-
-block_loop_setup:
-	SHRQ $0x5, DX
-	LONG $0xc0ef0f66
-	MOVO weights_17_32<>(SB), X1
-	MOVO ones_u16<>(SB), X2
-	MOVO weights_1_16<>(SB), X3
-	LONG $0x8071b841; WORD $0x8007
-
-block_outer_loop:
-	CMPQ DX, $0xad
-	LONG $0x00adba41; WORD $0x0000
-	LONG $0xd2420f4c
-	WORD $0x8944; BYTE $0xcf
-	LONG $0xfaaf0f41
-	LONG $0xef6e0f66
-	LONG $0xe06e0f66
-	WORD $0x8944; BYTE $0xd0
-	LONG $0xf6ef0f66
-
-block_inner_loop:
-	LONG  $0x3e6f0ff3
-	LONG  $0x6f0f4466; BYTE $0xc7
-	LONG  $0x04380f66; BYTE $0xf9
-	LONG  $0xfaf50f66
-	LONG  $0xfcfe0f66
-	LONG  $0x666f0ff3; BYTE $0x10
-	LONG  $0xeefe0f66
-	LONG  $0xf60f4466; BYTE $0xc0
-	LONG  $0xfe0f4466; BYTE $0xc6
-	LONG  $0xf46f0f66
-	LONG  $0xf0f60f66
-	LONG  $0xfe0f4166; BYTE $0xf0
-	LONG  $0x04380f66; BYTE $0xe3
-	LONG  $0xe2f50f66
-	LONG  $0xe7fe0f66
-	ADDQ  $0x20, SI
-	WORD  $0xc8ff
-	JNE   block_inner_loop
-	LONG  $0xf5720f66; BYTE $0x05
-	LONG  $0xe5fe0f66
-	LONG  $0xee700f66; BYTE $0xb1
-	LONG  $0xeefe0f66
-	LONG  $0xf5700f66; BYTE $0xee
-	LONG  $0xf5fe0f66
-	LONG  $0xf77e0f66
-	WORD  $0x0144; BYTE $0xcf
-	LONG  $0xec700f66; BYTE $0xb1
-	LONG  $0xecfe0f66
-	LONG  $0xe5700f66; BYTE $0xee
-	LONG  $0xe5fe0f66
-	LONG  $0xe07e0f66
-	MOVQ  DI, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	LONG  $0xf1c96945; WORD $0x00ff; BYTE $0x00
-	WORD  $0x2944; BYTE $0xcf
-	MOVQ  AX, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	LONG  $0xf1c96945; WORD $0x00ff; BYTE $0x00
-	WORD  $0x2944; BYTE $0xc8
-	WORD  $0x8941; BYTE $0xf9
-	SUBQ  R10, DX
-	JNE   block_outer_loop
-
-tail_entry:
-	WORD $0x8548; BYTE $0xc9
-	JE   return_result
-	CMPL CX, $0x10
-	JB   tail_bytes_setup
-	WORD $0xb60f; BYTE $0x16
-	WORD $0xd701
-	WORD $0xf801
-	LONG $0x0156b60f
-	WORD $0xfa01
-	WORD $0xd001
-	LONG $0x027eb60f
-	WORD $0xd701
-	WORD $0xf801
-	LONG $0x0356b60f
-	WORD $0xfa01
-	WORD $0xd001
-	LONG $0x047eb60f
-	WORD $0xd701
-	WORD $0xf801
-	LONG $0x0556b60f
-	WORD $0xfa01
-	WORD $0xd001
-	LONG $0x067eb60f
-	WORD $0xd701
-	WORD $0xf801
-	LONG $0x0756b60f
-	WORD $0xfa01
-	WORD $0xd001
-	LONG $0x087eb60f
-	WORD $0xd701
-	WORD $0xf801
-	LONG $0x0956b60f
-	WORD $0xfa01
-	WORD $0xd001
-	LONG $0x0a7eb60f
-	WORD $0xd701
-	WORD $0xf801
-	LONG $0x0b56b60f
-	WORD $0xfa01
-	WORD $0xd001
-	LONG $0x0c7eb60f
-	WORD $0xd701
-	WORD $0xf801
-	LONG $0x0d56b60f
-	WORD $0xfa01
-	WORD $0xd001
-	LONG $0x46b60f44; BYTE $0x0e
-	WORD $0x0141; BYTE $0xd0
-	WORD $0x0144; BYTE $0xc0
-	LONG $0x0f7eb60f
-	WORD $0x0144; BYTE $0xc7
-	WORD $0xf801
-	ADDQ $-0x10, CX
-	JE   final_reduce
-	ADDQ $0x10, SI
-
-tail_bytes_setup:
-	LEAQ -0x1(CX), DX
-	MOVQ CX, R9
-	ANDQ $0x3, R9
-	JE   tail_dword_setup
-	XORL R8, R8
-
-tail_byte_loop:
-	LONG $0x14b60f46; BYTE $0x06
-	WORD $0x0144; BYTE $0xd7
-	WORD $0xf801
-	INCQ R8
-	CMPQ R9, R8
-	JNE  tail_byte_loop
-	ADDQ R8, SI
-	SUBQ R8, CX
-
-tail_dword_setup:
-	CMPQ DX, $0x3
-	JB   final_reduce
-	XORL DX, DX
-
-tail_dword_loop:
-	LONG $0x04b60f44; BYTE $0x16
-	WORD $0x0141; BYTE $0xf8
-	WORD $0x0144; BYTE $0xc0
-	LONG $0x167cb60f; BYTE $0x01
-	WORD $0x0144; BYTE $0xc7
-	WORD $0xf801
-	LONG $0x44b60f44; WORD $0x0216
-	WORD $0x0141; BYTE $0xf8
-	WORD $0x0144; BYTE $0xc0
-	LONG $0x167cb60f; BYTE $0x03
-	WORD $0x0144; BYTE $0xc7
-	WORD $0xf801
-	ADDQ $0x4, DX
-	CMPQ CX, DX
-	JNE  tail_dword_loop
-
-final_reduce:
-	LONG  $0x000f8f8d; WORD $0xffff
-	CMPL  DI, $0xfff1
-	WORD  $0x420f; BYTE $0xcf
-	WORD  $0xc289
-	LONG  $0x078071be; BYTE $0x80
-	IMULQ DX, SI
-	SHRQ  $0x2f, SI
-	LONG  $0xfff1d669; WORD $0x0000
-	WORD  $0xd029
-	WORD  $0xcf89
-
-return_result:
-	WORD $0xe0c1; BYTE $0x10
-	WORD $0xf809
-	NOP
-	NOP
-	MOVL AX, ret+32(FP)
-	RET
--