Text file src/internal/runtime/maps/memhash_amd64.s

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // func memHashAES(p unsafe.Pointer, h, s uintptr) uintptr
     8  // hash function using AES hardware instructions
     9  TEXT ·memHashAES<ABIInternal>(SB),NOSPLIT,$0-32
    10  	// AX: data
    11  	// BX: hash seed
    12  	// CX: length
    13  	// At return: AX = return value
    14  
    15  	// Fill an SSE register with our seeds.
    16  	MOVQ	BX, X0				// 64 bits of per-table hash seed
    17  	PINSRW	$4, CX, X0			// 16 bits of length
    18  	PSHUFHW $0, X0, X0			// repeat length 4 times total
    19  	MOVO	X0, X1				// save unscrambled seed
    20  	PXOR	·aeskeysched(SB), X0	// xor in per-process seed
    21  	AESENC	X0, X0				// scramble seed
    22  
    23  	CMPQ	CX, $16
    24  	JB	aes0to15
    25  	JE	aes16
    26  	CMPQ	CX, $32
    27  	JBE	aes17to32
    28  	CMPQ	CX, $64
    29  	JBE	aes33to64
    30  	CMPQ	CX, $128
    31  	JBE	aes65to128
    32  	JMP	aes129plus
    33  
    34  aes0to15:
    35  	TESTQ	CX, CX
    36  	JE	aes0
    37  
    38  	ADDQ	$16, AX
    39  	TESTW	$0xff0, AX
    40  	JE	endofpage
    41  
    42  	// 16 bytes loaded at this address won't cross
    43  	// a page boundary, so we can load it directly.
    44  	MOVOU	-16(AX), X1
    45  	ADDQ	CX, CX
    46  	MOVQ	$masks<>(SB), AX
    47  	PAND	(AX)(CX*8), X1
    48  final1:
    49  	PXOR	X0, X1	// xor data with seed
    50  	AESENC	X1, X1	// scramble combo 3 times
    51  	AESENC	X1, X1
    52  	AESENC	X1, X1
    53  	MOVQ	X1, AX	// return X1
    54  	RET
    55  
    56  endofpage:
    57  	// address ends in 1111xxxx. Might be up against
    58  	// a page boundary, so load ending at last byte.
    59  	// Then shift bytes down using pshufb.
    60  	MOVOU	-32(AX)(CX*1), X1
    61  	ADDQ	CX, CX
    62  	MOVQ	$shifts<>(SB), AX
    63  	PSHUFB	(AX)(CX*8), X1
    64  	JMP	final1
    65  
    66  aes0:
    67  	// Return scrambled input seed
    68  	AESENC	X0, X0
    69  	MOVQ	X0, AX	// return X0
    70  	RET
    71  
    72  aes16:
    73  	MOVOU	(AX), X1
    74  	JMP	final1
    75  
    76  aes17to32:
    77  	// make second starting seed
    78  	PXOR	·aeskeysched+16(SB), X1
    79  	AESENC	X1, X1
    80  
    81  	// load data to be hashed
    82  	MOVOU	(AX), X2
    83  	MOVOU	-16(AX)(CX*1), X3
    84  
    85  	// xor with seed
    86  	PXOR	X0, X2
    87  	PXOR	X1, X3
    88  
    89  	// scramble 3 times
    90  	AESENC	X2, X2
    91  	AESENC	X3, X3
    92  	AESENC	X2, X2
    93  	AESENC	X3, X3
    94  	AESENC	X2, X2
    95  	AESENC	X3, X3
    96  
    97  	// combine results
    98  	PXOR	X3, X2
    99  	MOVQ	X2, AX	// return X2
   100  	RET
   101  
   102  aes33to64:
   103  	// make 3 more starting seeds
   104  	MOVO	X1, X2
   105  	MOVO	X1, X3
   106  	PXOR	·aeskeysched+16(SB), X1
   107  	PXOR	·aeskeysched+32(SB), X2
   108  	PXOR	·aeskeysched+48(SB), X3
   109  	AESENC	X1, X1
   110  	AESENC	X2, X2
   111  	AESENC	X3, X3
   112  
   113  	MOVOU	(AX), X4
   114  	MOVOU	16(AX), X5
   115  	MOVOU	-32(AX)(CX*1), X6
   116  	MOVOU	-16(AX)(CX*1), X7
   117  
   118  	PXOR	X0, X4
   119  	PXOR	X1, X5
   120  	PXOR	X2, X6
   121  	PXOR	X3, X7
   122  
   123  	AESENC	X4, X4
   124  	AESENC	X5, X5
   125  	AESENC	X6, X6
   126  	AESENC	X7, X7
   127  
   128  	AESENC	X4, X4
   129  	AESENC	X5, X5
   130  	AESENC	X6, X6
   131  	AESENC	X7, X7
   132  
   133  	AESENC	X4, X4
   134  	AESENC	X5, X5
   135  	AESENC	X6, X6
   136  	AESENC	X7, X7
   137  
   138  	PXOR	X6, X4
   139  	PXOR	X7, X5
   140  	PXOR	X5, X4
   141  	MOVQ	X4, AX	// return X4
   142  	RET
   143  
   144  aes65to128:
   145  	// make 7 more starting seeds
   146  	MOVO	X1, X2
   147  	MOVO	X1, X3
   148  	MOVO	X1, X4
   149  	MOVO	X1, X5
   150  	MOVO	X1, X6
   151  	MOVO	X1, X7
   152  	PXOR	·aeskeysched+16(SB), X1
   153  	PXOR	·aeskeysched+32(SB), X2
   154  	PXOR	·aeskeysched+48(SB), X3
   155  	PXOR	·aeskeysched+64(SB), X4
   156  	PXOR	·aeskeysched+80(SB), X5
   157  	PXOR	·aeskeysched+96(SB), X6
   158  	PXOR	·aeskeysched+112(SB), X7
   159  	AESENC	X1, X1
   160  	AESENC	X2, X2
   161  	AESENC	X3, X3
   162  	AESENC	X4, X4
   163  	AESENC	X5, X5
   164  	AESENC	X6, X6
   165  	AESENC	X7, X7
   166  
   167  	// load data
   168  	MOVOU	(AX), X8
   169  	MOVOU	16(AX), X9
   170  	MOVOU	32(AX), X10
   171  	MOVOU	48(AX), X11
   172  	MOVOU	-64(AX)(CX*1), X12
   173  	MOVOU	-48(AX)(CX*1), X13
   174  	MOVOU	-32(AX)(CX*1), X14
   175  	MOVOU	-16(AX)(CX*1), X15
   176  
   177  	// xor with seed
   178  	PXOR	X0, X8
   179  	PXOR	X1, X9
   180  	PXOR	X2, X10
   181  	PXOR	X3, X11
   182  	PXOR	X4, X12
   183  	PXOR	X5, X13
   184  	PXOR	X6, X14
   185  	PXOR	X7, X15
   186  
   187  	// scramble 3 times
   188  	AESENC	X8, X8
   189  	AESENC	X9, X9
   190  	AESENC	X10, X10
   191  	AESENC	X11, X11
   192  	AESENC	X12, X12
   193  	AESENC	X13, X13
   194  	AESENC	X14, X14
   195  	AESENC	X15, X15
   196  
   197  	AESENC	X8, X8
   198  	AESENC	X9, X9
   199  	AESENC	X10, X10
   200  	AESENC	X11, X11
   201  	AESENC	X12, X12
   202  	AESENC	X13, X13
   203  	AESENC	X14, X14
   204  	AESENC	X15, X15
   205  
   206  	AESENC	X8, X8
   207  	AESENC	X9, X9
   208  	AESENC	X10, X10
   209  	AESENC	X11, X11
   210  	AESENC	X12, X12
   211  	AESENC	X13, X13
   212  	AESENC	X14, X14
   213  	AESENC	X15, X15
   214  
   215  	// combine results
   216  	PXOR	X12, X8
   217  	PXOR	X13, X9
   218  	PXOR	X14, X10
   219  	PXOR	X15, X11
   220  	PXOR	X10, X8
   221  	PXOR	X11, X9
   222  	PXOR	X9, X8
   223  	// X15 must be zero on return
   224  	PXOR	X15, X15
   225  	MOVQ	X8, AX	// return X8
   226  	RET
   227  
   228  aes129plus:
   229  	// make 7 more starting seeds
   230  	MOVO	X1, X2
   231  	MOVO	X1, X3
   232  	MOVO	X1, X4
   233  	MOVO	X1, X5
   234  	MOVO	X1, X6
   235  	MOVO	X1, X7
   236  	PXOR	·aeskeysched+16(SB), X1
   237  	PXOR	·aeskeysched+32(SB), X2
   238  	PXOR	·aeskeysched+48(SB), X3
   239  	PXOR	·aeskeysched+64(SB), X4
   240  	PXOR	·aeskeysched+80(SB), X5
   241  	PXOR	·aeskeysched+96(SB), X6
   242  	PXOR	·aeskeysched+112(SB), X7
   243  	AESENC	X1, X1
   244  	AESENC	X2, X2
   245  	AESENC	X3, X3
   246  	AESENC	X4, X4
   247  	AESENC	X5, X5
   248  	AESENC	X6, X6
   249  	AESENC	X7, X7
   250  
   251  	// start with last (possibly overlapping) block
   252  	MOVOU	-128(AX)(CX*1), X8
   253  	MOVOU	-112(AX)(CX*1), X9
   254  	MOVOU	-96(AX)(CX*1), X10
   255  	MOVOU	-80(AX)(CX*1), X11
   256  	MOVOU	-64(AX)(CX*1), X12
   257  	MOVOU	-48(AX)(CX*1), X13
   258  	MOVOU	-32(AX)(CX*1), X14
   259  	MOVOU	-16(AX)(CX*1), X15
   260  
   261  	// xor in seed
   262  	PXOR	X0, X8
   263  	PXOR	X1, X9
   264  	PXOR	X2, X10
   265  	PXOR	X3, X11
   266  	PXOR	X4, X12
   267  	PXOR	X5, X13
   268  	PXOR	X6, X14
   269  	PXOR	X7, X15
   270  
   271  	// compute number of remaining 128-byte blocks
   272  	DECQ	CX
   273  	SHRQ	$7, CX
   274  
   275  	PCALIGN $16
   276  aesloop:
   277  	// scramble state
   278  	AESENC	X8, X8
   279  	AESENC	X9, X9
   280  	AESENC	X10, X10
   281  	AESENC	X11, X11
   282  	AESENC	X12, X12
   283  	AESENC	X13, X13
   284  	AESENC	X14, X14
   285  	AESENC	X15, X15
   286  
   287  	// scramble state, xor in a block
   288  	MOVOU	(AX), X0
   289  	MOVOU	16(AX), X1
   290  	MOVOU	32(AX), X2
   291  	MOVOU	48(AX), X3
   292  	AESENC	X0, X8
   293  	AESENC	X1, X9
   294  	AESENC	X2, X10
   295  	AESENC	X3, X11
   296  	MOVOU	64(AX), X4
   297  	MOVOU	80(AX), X5
   298  	MOVOU	96(AX), X6
   299  	MOVOU	112(AX), X7
   300  	AESENC	X4, X12
   301  	AESENC	X5, X13
   302  	AESENC	X6, X14
   303  	AESENC	X7, X15
   304  
   305  	ADDQ	$128, AX
   306  	DECQ	CX
   307  	JNE	aesloop
   308  
   309  	// 3 more scrambles to finish
   310  	AESENC	X8, X8
   311  	AESENC	X9, X9
   312  	AESENC	X10, X10
   313  	AESENC	X11, X11
   314  	AESENC	X12, X12
   315  	AESENC	X13, X13
   316  	AESENC	X14, X14
   317  	AESENC	X15, X15
   318  	AESENC	X8, X8
   319  	AESENC	X9, X9
   320  	AESENC	X10, X10
   321  	AESENC	X11, X11
   322  	AESENC	X12, X12
   323  	AESENC	X13, X13
   324  	AESENC	X14, X14
   325  	AESENC	X15, X15
   326  	AESENC	X8, X8
   327  	AESENC	X9, X9
   328  	AESENC	X10, X10
   329  	AESENC	X11, X11
   330  	AESENC	X12, X12
   331  	AESENC	X13, X13
   332  	AESENC	X14, X14
   333  	AESENC	X15, X15
   334  
   335  	PXOR	X12, X8
   336  	PXOR	X13, X9
   337  	PXOR	X14, X10
   338  	PXOR	X15, X11
   339  	PXOR	X10, X8
   340  	PXOR	X11, X9
   341  	PXOR	X9, X8
   342  	// X15 must be zero on return
   343  	PXOR	X15, X15
   344  	MOVQ	X8, AX	// return X8
   345  	RET
   346  
   347  // simple mask to get rid of data in the high part of the register.
   348  DATA masks<>+0x00(SB)/8, $0x0000000000000000
   349  DATA masks<>+0x08(SB)/8, $0x0000000000000000
   350  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
   351  DATA masks<>+0x18(SB)/8, $0x0000000000000000
   352  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
   353  DATA masks<>+0x28(SB)/8, $0x0000000000000000
   354  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
   355  DATA masks<>+0x38(SB)/8, $0x0000000000000000
   356  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
   357  DATA masks<>+0x48(SB)/8, $0x0000000000000000
   358  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
   359  DATA masks<>+0x58(SB)/8, $0x0000000000000000
   360  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
   361  DATA masks<>+0x68(SB)/8, $0x0000000000000000
   362  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
   363  DATA masks<>+0x78(SB)/8, $0x0000000000000000
   364  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
   365  DATA masks<>+0x88(SB)/8, $0x0000000000000000
   366  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
   367  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
   368  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
   369  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
   370  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
   371  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
   372  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
   373  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
   374  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
   375  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
   376  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
   377  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
   378  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
   379  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
   380  GLOBL masks<>(SB),RODATA,$256
   381  
   382  // these are arguments to pshufb. They move data down from
   383  // the high bytes of the register to the low bytes of the register.
   384  // index is how many bytes to move.
   385  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
   386  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
   387  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
   388  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
   389  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
   390  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
   391  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
   392  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
   393  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
   394  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
   395  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
   396  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
   397  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
   398  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
   399  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
   400  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
   401  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
   402  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
   403  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
   404  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
   405  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
   406  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
   407  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
   408  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
   409  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
   410  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
   411  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
   412  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
   413  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
   414  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
   415  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
   416  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
   417  GLOBL shifts<>(SB),RODATA,$256
   418  
   419  TEXT ·checkMasksAndShiftsAlignment<ABIInternal>(SB),NOSPLIT,$0-1
   420  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
   421  	MOVQ	$masks<>(SB), AX
   422  	MOVQ	$shifts<>(SB), BX
   423  	ORQ	BX, AX
   424  	TESTQ	$15, AX
   425  	SETEQ	AX
   426  	RET
   427  

View as plain text