Text file src/internal/runtime/maps/memhash_386.s

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // hash function using AES hardware instructions
     8  
     9  // func memHash32AES(k uint32, h uintptr) uintptr
    10  TEXT ·memHash32AES(SB),NOSPLIT,$0-12
    11  	MOVL	h+4(FP), X0     // seed
    12  	PINSRD	$1, k+0(FP), X0	// data
    13  	AESENC	·aeskeysched+0(SB), X0
    14  	AESENC	·aeskeysched+16(SB), X0
    15  	AESENC	·aeskeysched+32(SB), X0
    16  	MOVL	X0, ret+8(FP)
    17  	RET
    18  
    19  // func memHash64AES(k uint64, h uintptr) uintptr
    20  TEXT ·memHash64AES(SB),NOSPLIT,$0-16
    21  	MOVQ	k+0(FP), X0     // data
    22  	PINSRD	$2, h+8(FP), X0	// seed
    23  	AESENC	·aeskeysched+0(SB), X0
    24  	AESENC	·aeskeysched+16(SB), X0
    25  	AESENC	·aeskeysched+32(SB), X0
    26  	MOVL	X0, ret+12(FP)
    27  	RET
    28  
    29  // func memHashAES(p unsafe.Pointer, h, size uintptr) uintptr
    30  TEXT ·memHashAES(SB),NOSPLIT,$0-16
    31  	// AX: data
    32  	// BX: size
    33  	// DX: address to put return value
    34  	MOVL	p+0(FP), AX
    35  	MOVL	s+8(FP), BX
    36  	LEAL	ret+12(FP), DX
    37  
    38  	MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
    39  	PINSRW	$4, BX, X0	            // 16 bits of length
    40  	PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
    41  	MOVO	X0, X1                      // save unscrambled seed
    42  	PXOR	·aeskeysched(SB), X0 // xor in per-process seed
    43  	AESENC	X0, X0                      // scramble seed
    44  
    45  	CMPL	BX, $16
    46  	JB	aes0to15
    47  	JE	aes16
    48  	CMPL	BX, $32
    49  	JBE	aes17to32
    50  	CMPL	BX, $64
    51  	JBE	aes33to64
    52  	JMP	aes65plus
    53  
    54  aes0to15:
    55  	TESTL	BX, BX
    56  	JE	aes0
    57  
    58  	ADDL	$16, AX
    59  	TESTW	$0xff0, AX
    60  	JE	endofpage
    61  
    62  	// 16 bytes loaded at this address won't cross
    63  	// a page boundary, so we can load it directly.
    64  	MOVOU	-16(AX), X1
    65  	ADDL	BX, BX
    66  	PAND	masks<>(SB)(BX*8), X1
    67  
    68  final1:
    69  	PXOR	X0, X1	// xor data with seed
    70  	AESENC	X1, X1  // scramble combo 3 times
    71  	AESENC	X1, X1
    72  	AESENC	X1, X1
    73  	MOVL	X1, (DX)
    74  	RET
    75  
    76  endofpage:
    77  	// address ends in 1111xxxx. Might be up against
    78  	// a page boundary, so load ending at last byte.
    79  	// Then shift bytes down using pshufb.
    80  	MOVOU	-32(AX)(BX*1), X1
    81  	ADDL	BX, BX
    82  	PSHUFB	shifts<>(SB)(BX*8), X1
    83  	JMP	final1
    84  
    85  aes0:
    86  	// Return scrambled input seed
    87  	AESENC	X0, X0
    88  	MOVL	X0, (DX)
    89  	RET
    90  
    91  aes16:
    92  	MOVOU	(AX), X1
    93  	JMP	final1
    94  
    95  aes17to32:
    96  	// make second starting seed
    97  	PXOR	·aeskeysched+16(SB), X1
    98  	AESENC	X1, X1
    99  
   100  	// load data to be hashed
   101  	MOVOU	(AX), X2
   102  	MOVOU	-16(AX)(BX*1), X3
   103  
   104  	// xor with seed
   105  	PXOR	X0, X2
   106  	PXOR	X1, X3
   107  
   108  	// scramble 3 times
   109  	AESENC	X2, X2
   110  	AESENC	X3, X3
   111  	AESENC	X2, X2
   112  	AESENC	X3, X3
   113  	AESENC	X2, X2
   114  	AESENC	X3, X3
   115  
   116  	// combine results
   117  	PXOR	X3, X2
   118  	MOVL	X2, (DX)
   119  	RET
   120  
   121  aes33to64:
   122  	// make 3 more starting seeds
   123  	MOVO	X1, X2
   124  	MOVO	X1, X3
   125  	PXOR	·aeskeysched+16(SB), X1
   126  	PXOR	·aeskeysched+32(SB), X2
   127  	PXOR	·aeskeysched+48(SB), X3
   128  	AESENC	X1, X1
   129  	AESENC	X2, X2
   130  	AESENC	X3, X3
   131  
   132  	MOVOU	(AX), X4
   133  	MOVOU	16(AX), X5
   134  	MOVOU	-32(AX)(BX*1), X6
   135  	MOVOU	-16(AX)(BX*1), X7
   136  
   137  	PXOR	X0, X4
   138  	PXOR	X1, X5
   139  	PXOR	X2, X6
   140  	PXOR	X3, X7
   141  
   142  	AESENC	X4, X4
   143  	AESENC	X5, X5
   144  	AESENC	X6, X6
   145  	AESENC	X7, X7
   146  
   147  	AESENC	X4, X4
   148  	AESENC	X5, X5
   149  	AESENC	X6, X6
   150  	AESENC	X7, X7
   151  
   152  	AESENC	X4, X4
   153  	AESENC	X5, X5
   154  	AESENC	X6, X6
   155  	AESENC	X7, X7
   156  
   157  	PXOR	X6, X4
   158  	PXOR	X7, X5
   159  	PXOR	X5, X4
   160  	MOVL	X4, (DX)
   161  	RET
   162  
   163  aes65plus:
   164  	// make 3 more starting seeds
   165  	MOVO	X1, X2
   166  	MOVO	X1, X3
   167  	PXOR	·aeskeysched+16(SB), X1
   168  	PXOR	·aeskeysched+32(SB), X2
   169  	PXOR	·aeskeysched+48(SB), X3
   170  	AESENC	X1, X1
   171  	AESENC	X2, X2
   172  	AESENC	X3, X3
   173  
   174  	// start with last (possibly overlapping) block
   175  	MOVOU	-64(AX)(BX*1), X4
   176  	MOVOU	-48(AX)(BX*1), X5
   177  	MOVOU	-32(AX)(BX*1), X6
   178  	MOVOU	-16(AX)(BX*1), X7
   179  
   180  	// scramble state once
   181  	AESENC	X0, X4
   182  	AESENC	X1, X5
   183  	AESENC	X2, X6
   184  	AESENC	X3, X7
   185  
   186  	// compute number of remaining 64-byte blocks
   187  	DECL	BX
   188  	SHRL	$6, BX
   189  
   190  aesloop:
   191  	// scramble state, xor in a block
   192  	MOVOU	(AX), X0
   193  	MOVOU	16(AX), X1
   194  	MOVOU	32(AX), X2
   195  	MOVOU	48(AX), X3
   196  	AESENC	X0, X4
   197  	AESENC	X1, X5
   198  	AESENC	X2, X6
   199  	AESENC	X3, X7
   200  
   201  	// scramble state
   202  	AESENC	X4, X4
   203  	AESENC	X5, X5
   204  	AESENC	X6, X6
   205  	AESENC	X7, X7
   206  
   207  	ADDL	$64, AX
   208  	DECL	BX
   209  	JNE	aesloop
   210  
   211  	// 3 more scrambles to finish
   212  	AESENC	X4, X4
   213  	AESENC	X5, X5
   214  	AESENC	X6, X6
   215  	AESENC	X7, X7
   216  
   217  	AESENC	X4, X4
   218  	AESENC	X5, X5
   219  	AESENC	X6, X6
   220  	AESENC	X7, X7
   221  
   222  	AESENC	X4, X4
   223  	AESENC	X5, X5
   224  	AESENC	X6, X6
   225  	AESENC	X7, X7
   226  
   227  	PXOR	X6, X4
   228  	PXOR	X7, X5
   229  	PXOR	X5, X4
   230  	MOVL	X4, (DX)
   231  	RET
   232  
   233  // simple mask to get rid of data in the high part of the register.
   234  DATA masks<>+0x00(SB)/4, $0x00000000
   235  DATA masks<>+0x04(SB)/4, $0x00000000
   236  DATA masks<>+0x08(SB)/4, $0x00000000
   237  DATA masks<>+0x0c(SB)/4, $0x00000000
   238  
   239  DATA masks<>+0x10(SB)/4, $0x000000ff
   240  DATA masks<>+0x14(SB)/4, $0x00000000
   241  DATA masks<>+0x18(SB)/4, $0x00000000
   242  DATA masks<>+0x1c(SB)/4, $0x00000000
   243  
   244  DATA masks<>+0x20(SB)/4, $0x0000ffff
   245  DATA masks<>+0x24(SB)/4, $0x00000000
   246  DATA masks<>+0x28(SB)/4, $0x00000000
   247  DATA masks<>+0x2c(SB)/4, $0x00000000
   248  
   249  DATA masks<>+0x30(SB)/4, $0x00ffffff
   250  DATA masks<>+0x34(SB)/4, $0x00000000
   251  DATA masks<>+0x38(SB)/4, $0x00000000
   252  DATA masks<>+0x3c(SB)/4, $0x00000000
   253  
   254  DATA masks<>+0x40(SB)/4, $0xffffffff
   255  DATA masks<>+0x44(SB)/4, $0x00000000
   256  DATA masks<>+0x48(SB)/4, $0x00000000
   257  DATA masks<>+0x4c(SB)/4, $0x00000000
   258  
   259  DATA masks<>+0x50(SB)/4, $0xffffffff
   260  DATA masks<>+0x54(SB)/4, $0x000000ff
   261  DATA masks<>+0x58(SB)/4, $0x00000000
   262  DATA masks<>+0x5c(SB)/4, $0x00000000
   263  
   264  DATA masks<>+0x60(SB)/4, $0xffffffff
   265  DATA masks<>+0x64(SB)/4, $0x0000ffff
   266  DATA masks<>+0x68(SB)/4, $0x00000000
   267  DATA masks<>+0x6c(SB)/4, $0x00000000
   268  
   269  DATA masks<>+0x70(SB)/4, $0xffffffff
   270  DATA masks<>+0x74(SB)/4, $0x00ffffff
   271  DATA masks<>+0x78(SB)/4, $0x00000000
   272  DATA masks<>+0x7c(SB)/4, $0x00000000
   273  
   274  DATA masks<>+0x80(SB)/4, $0xffffffff
   275  DATA masks<>+0x84(SB)/4, $0xffffffff
   276  DATA masks<>+0x88(SB)/4, $0x00000000
   277  DATA masks<>+0x8c(SB)/4, $0x00000000
   278  
   279  DATA masks<>+0x90(SB)/4, $0xffffffff
   280  DATA masks<>+0x94(SB)/4, $0xffffffff
   281  DATA masks<>+0x98(SB)/4, $0x000000ff
   282  DATA masks<>+0x9c(SB)/4, $0x00000000
   283  
   284  DATA masks<>+0xa0(SB)/4, $0xffffffff
   285  DATA masks<>+0xa4(SB)/4, $0xffffffff
   286  DATA masks<>+0xa8(SB)/4, $0x0000ffff
   287  DATA masks<>+0xac(SB)/4, $0x00000000
   288  
   289  DATA masks<>+0xb0(SB)/4, $0xffffffff
   290  DATA masks<>+0xb4(SB)/4, $0xffffffff
   291  DATA masks<>+0xb8(SB)/4, $0x00ffffff
   292  DATA masks<>+0xbc(SB)/4, $0x00000000
   293  
   294  DATA masks<>+0xc0(SB)/4, $0xffffffff
   295  DATA masks<>+0xc4(SB)/4, $0xffffffff
   296  DATA masks<>+0xc8(SB)/4, $0xffffffff
   297  DATA masks<>+0xcc(SB)/4, $0x00000000
   298  
   299  DATA masks<>+0xd0(SB)/4, $0xffffffff
   300  DATA masks<>+0xd4(SB)/4, $0xffffffff
   301  DATA masks<>+0xd8(SB)/4, $0xffffffff
   302  DATA masks<>+0xdc(SB)/4, $0x000000ff
   303  
   304  DATA masks<>+0xe0(SB)/4, $0xffffffff
   305  DATA masks<>+0xe4(SB)/4, $0xffffffff
   306  DATA masks<>+0xe8(SB)/4, $0xffffffff
   307  DATA masks<>+0xec(SB)/4, $0x0000ffff
   308  
   309  DATA masks<>+0xf0(SB)/4, $0xffffffff
   310  DATA masks<>+0xf4(SB)/4, $0xffffffff
   311  DATA masks<>+0xf8(SB)/4, $0xffffffff
   312  DATA masks<>+0xfc(SB)/4, $0x00ffffff
   313  
   314  GLOBL masks<>(SB),RODATA,$256
   315  
   316  // these are arguments to pshufb. They move data down from
   317  // the high bytes of the register to the low bytes of the register.
   318  // index is how many bytes to move.
   319  DATA shifts<>+0x00(SB)/4, $0x00000000
   320  DATA shifts<>+0x04(SB)/4, $0x00000000
   321  DATA shifts<>+0x08(SB)/4, $0x00000000
   322  DATA shifts<>+0x0c(SB)/4, $0x00000000
   323  
   324  DATA shifts<>+0x10(SB)/4, $0xffffff0f
   325  DATA shifts<>+0x14(SB)/4, $0xffffffff
   326  DATA shifts<>+0x18(SB)/4, $0xffffffff
   327  DATA shifts<>+0x1c(SB)/4, $0xffffffff
   328  
   329  DATA shifts<>+0x20(SB)/4, $0xffff0f0e
   330  DATA shifts<>+0x24(SB)/4, $0xffffffff
   331  DATA shifts<>+0x28(SB)/4, $0xffffffff
   332  DATA shifts<>+0x2c(SB)/4, $0xffffffff
   333  
   334  DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
   335  DATA shifts<>+0x34(SB)/4, $0xffffffff
   336  DATA shifts<>+0x38(SB)/4, $0xffffffff
   337  DATA shifts<>+0x3c(SB)/4, $0xffffffff
   338  
   339  DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
   340  DATA shifts<>+0x44(SB)/4, $0xffffffff
   341  DATA shifts<>+0x48(SB)/4, $0xffffffff
   342  DATA shifts<>+0x4c(SB)/4, $0xffffffff
   343  
   344  DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
   345  DATA shifts<>+0x54(SB)/4, $0xffffff0f
   346  DATA shifts<>+0x58(SB)/4, $0xffffffff
   347  DATA shifts<>+0x5c(SB)/4, $0xffffffff
   348  
   349  DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
   350  DATA shifts<>+0x64(SB)/4, $0xffff0f0e
   351  DATA shifts<>+0x68(SB)/4, $0xffffffff
   352  DATA shifts<>+0x6c(SB)/4, $0xffffffff
   353  
   354  DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
   355  DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
   356  DATA shifts<>+0x78(SB)/4, $0xffffffff
   357  DATA shifts<>+0x7c(SB)/4, $0xffffffff
   358  
   359  DATA shifts<>+0x80(SB)/4, $0x0b0a0908
   360  DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
   361  DATA shifts<>+0x88(SB)/4, $0xffffffff
   362  DATA shifts<>+0x8c(SB)/4, $0xffffffff
   363  
   364  DATA shifts<>+0x90(SB)/4, $0x0a090807
   365  DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
   366  DATA shifts<>+0x98(SB)/4, $0xffffff0f
   367  DATA shifts<>+0x9c(SB)/4, $0xffffffff
   368  
   369  DATA shifts<>+0xa0(SB)/4, $0x09080706
   370  DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
   371  DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
   372  DATA shifts<>+0xac(SB)/4, $0xffffffff
   373  
   374  DATA shifts<>+0xb0(SB)/4, $0x08070605
   375  DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
   376  DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
   377  DATA shifts<>+0xbc(SB)/4, $0xffffffff
   378  
   379  DATA shifts<>+0xc0(SB)/4, $0x07060504
   380  DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
   381  DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
   382  DATA shifts<>+0xcc(SB)/4, $0xffffffff
   383  
   384  DATA shifts<>+0xd0(SB)/4, $0x06050403
   385  DATA shifts<>+0xd4(SB)/4, $0x0a090807
   386  DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
   387  DATA shifts<>+0xdc(SB)/4, $0xffffff0f
   388  
   389  DATA shifts<>+0xe0(SB)/4, $0x05040302
   390  DATA shifts<>+0xe4(SB)/4, $0x09080706
   391  DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
   392  DATA shifts<>+0xec(SB)/4, $0xffff0f0e
   393  
   394  DATA shifts<>+0xf0(SB)/4, $0x04030201
   395  DATA shifts<>+0xf4(SB)/4, $0x08070605
   396  DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
   397  DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
   398  
   399  GLOBL shifts<>(SB),RODATA,$256
   400  
   401  TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
   402  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
   403  	MOVL	$masks<>(SB), AX
   404  	MOVL	$shifts<>(SB), BX
   405  	ORL	BX, AX
   406  	TESTL	$15, AX
   407  	SETEQ   ret+0(FP)
   408  	RET
   409  

View as plain text