// Code generated by command: go run sha512block_amd64_asm.go -out ../sha512block_amd64.s. DO NOT EDIT. //go:build !purego #include "textflag.h" // func blockAVX2(dig *Digest, p []byte) // Requires: AVX, AVX2, BMI2 TEXT ·blockAVX2(SB), NOSPLIT, $56-32 MOVQ dig+0(FP), SI MOVQ p_base+8(FP), DI MOVQ p_len+16(FP), DX SHRQ $0x07, DX SHLQ $0x07, DX JZ done_hash ADDQ DI, DX MOVQ DX, 48(SP) MOVQ (SI), AX MOVQ 8(SI), BX MOVQ 16(SI), CX MOVQ 24(SI), R8 MOVQ 32(SI), DX MOVQ 40(SI), R9 MOVQ 48(SI), R10 MOVQ 56(SI), R11 VMOVDQU PSHUFFLE_BYTE_FLIP_MASK<>+0(SB), Y9 loop0: MOVQ $·_K+0(SB), BP VMOVDQU (DI), Y4 VPSHUFB Y9, Y4, Y4 VMOVDQU 32(DI), Y5 VPSHUFB Y9, Y5, Y5 VMOVDQU 64(DI), Y6 VPSHUFB Y9, Y6, Y6 VMOVDQU 96(DI), Y7 VPSHUFB Y9, Y7, Y7 MOVQ DI, 40(SP) MOVQ $0x00000004, 32(SP) loop1: VPADDQ (BP), Y4, Y0 VMOVDQU Y0, (SP) VPERM2F128 $0x03, Y6, Y7, Y0 VPALIGNR $0x08, Y6, Y0, Y0 VPADDQ Y4, Y0, Y0 VPERM2F128 $0x03, Y4, Y5, Y1 VPALIGNR $0x08, Y4, Y1, Y1 VPSRLQ $0x01, Y1, Y2 VPSLLQ $0x3f, Y1, Y3 VPOR Y2, Y3, Y3 VPSRLQ $0x07, Y1, Y8 MOVQ AX, DI RORXQ $0x29, DX, R13 RORXQ $0x12, DX, R14 ADDQ (SP), R11 ORQ CX, DI MOVQ R9, R15 RORXQ $0x22, AX, R12 XORQ R14, R13 XORQ R10, R15 RORXQ $0x0e, DX, R14 ANDQ DX, R15 XORQ R14, R13 RORXQ $0x27, AX, R14 ADDQ R11, R8 ANDQ BX, DI XORQ R12, R14 RORXQ $0x1c, AX, R12 XORQ R10, R15 XORQ R12, R14 MOVQ AX, R12 ANDQ CX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R11 ADDQ R15, R8 ADDQ R15, R11 ADDQ DI, R11 VPSRLQ $0x08, Y1, Y2 VPSLLQ $0x38, Y1, Y1 VPOR Y2, Y1, Y1 VPXOR Y8, Y3, Y3 VPXOR Y1, Y3, Y1 VPADDQ Y1, Y0, Y0 VPERM2F128 $0x00, Y0, Y0, Y4 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0 VPERM2F128 $0x11, Y7, Y7, Y2 VPSRLQ $0x06, Y2, Y8 MOVQ R11, DI RORXQ $0x29, R8, R13 RORXQ $0x12, R8, R14 ADDQ 8(SP), R10 ORQ BX, DI MOVQ DX, R15 RORXQ $0x22, R11, R12 XORQ R14, R13 XORQ R9, R15 RORXQ $0x0e, R8, R14 XORQ R14, R13 RORXQ $0x27, R11, R14 ANDQ R8, R15 ADDQ R10, CX ANDQ AX, DI XORQ R12, R14 RORXQ $0x1c, R11, R12 XORQ R9, R15 XORQ R12, R14 MOVQ R11, R12 ANDQ BX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R10 ADDQ R15, CX ADDQ R15, R10 ADDQ DI, R10 VPSRLQ $0x13, Y2, Y3 VPSLLQ $0x2d, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y2, Y3 VPSLLQ $0x03, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y4, Y4 VPSRLQ $0x06, Y4, Y8 MOVQ R10, DI RORXQ $0x29, CX, R13 ADDQ 16(SP), R9 RORXQ $0x12, CX, R14 ORQ AX, DI MOVQ R8, R15 XORQ DX, R15 RORXQ $0x22, R10, R12 XORQ R14, R13 ANDQ CX, R15 RORXQ $0x0e, CX, R14 ADDQ R9, BX ANDQ R11, DI XORQ R14, R13 RORXQ $0x27, R10, R14 XORQ DX, R15 XORQ R12, R14 RORXQ $0x1c, R10, R12 XORQ R12, R14 MOVQ R10, R12 ANDQ AX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R9 ADDQ R15, BX ADDQ R15, R9 ADDQ DI, R9 VPSRLQ $0x13, Y4, Y3 VPSLLQ $0x2d, Y4, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y4, Y3 VPSLLQ $0x03, Y4, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y0, Y2 VPBLENDD $0xf0, Y2, Y4, Y4 MOVQ R9, DI RORXQ $0x29, BX, R13 RORXQ $0x12, BX, R14 ADDQ 24(SP), DX ORQ R11, DI MOVQ CX, R15 RORXQ $0x22, R9, R12 XORQ R14, R13 XORQ R8, R15 RORXQ $0x0e, BX, R14 ANDQ BX, R15 ADDQ DX, AX ANDQ R10, DI XORQ R14, R13 XORQ R8, R15 RORXQ $0x27, R9, R14 ADDQ R13, R15 XORQ R12, R14 ADDQ R15, AX RORXQ $0x1c, R9, R12 XORQ R12, R14 MOVQ R9, R12 ANDQ R11, R12 ORQ R12, DI ADDQ R14, DX ADDQ R15, DX ADDQ DI, DX VPADDQ 32(BP), Y5, Y0 VMOVDQU Y0, (SP) VPERM2F128 $0x03, Y7, Y4, Y0 VPALIGNR $0x08, Y7, Y0, Y0 VPADDQ Y5, Y0, Y0 VPERM2F128 $0x03, Y5, Y6, Y1 VPALIGNR $0x08, Y5, Y1, Y1 VPSRLQ $0x01, Y1, Y2 VPSLLQ $0x3f, Y1, Y3 VPOR Y2, Y3, Y3 VPSRLQ $0x07, Y1, Y8 MOVQ DX, DI RORXQ $0x29, AX, R13 RORXQ $0x12, AX, R14 ADDQ (SP), R8 ORQ R10, DI MOVQ BX, R15 RORXQ $0x22, DX, R12 XORQ R14, R13 XORQ CX, R15 RORXQ $0x0e, AX, R14 ANDQ AX, R15 XORQ R14, R13 RORXQ $0x27, DX, R14 ADDQ R8, R11 ANDQ R9, DI XORQ R12, R14 RORXQ $0x1c, DX, R12 XORQ CX, R15 XORQ R12, R14 MOVQ DX, R12 ANDQ R10, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R8 ADDQ R15, R11 ADDQ R15, R8 ADDQ DI, R8 VPSRLQ $0x08, Y1, Y2 VPSLLQ $0x38, Y1, Y1 VPOR Y2, Y1, Y1 VPXOR Y8, Y3, Y3 VPXOR Y1, Y3, Y1 VPADDQ Y1, Y0, Y0 VPERM2F128 $0x00, Y0, Y0, Y5 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0 VPERM2F128 $0x11, Y4, Y4, Y2 VPSRLQ $0x06, Y2, Y8 MOVQ R8, DI RORXQ $0x29, R11, R13 RORXQ $0x12, R11, R14 ADDQ 8(SP), CX ORQ R9, DI MOVQ AX, R15 RORXQ $0x22, R8, R12 XORQ R14, R13 XORQ BX, R15 RORXQ $0x0e, R11, R14 XORQ R14, R13 RORXQ $0x27, R8, R14 ANDQ R11, R15 ADDQ CX, R10 ANDQ DX, DI XORQ R12, R14 RORXQ $0x1c, R8, R12 XORQ BX, R15 XORQ R12, R14 MOVQ R8, R12 ANDQ R9, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, CX ADDQ R15, R10 ADDQ R15, CX ADDQ DI, CX VPSRLQ $0x13, Y2, Y3 VPSLLQ $0x2d, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y2, Y3 VPSLLQ $0x03, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y5, Y5 VPSRLQ $0x06, Y5, Y8 MOVQ CX, DI RORXQ $0x29, R10, R13 ADDQ 16(SP), BX RORXQ $0x12, R10, R14 ORQ DX, DI MOVQ R11, R15 XORQ AX, R15 RORXQ $0x22, CX, R12 XORQ R14, R13 ANDQ R10, R15 RORXQ $0x0e, R10, R14 ADDQ BX, R9 ANDQ R8, DI XORQ R14, R13 RORXQ $0x27, CX, R14 XORQ AX, R15 XORQ R12, R14 RORXQ $0x1c, CX, R12 XORQ R12, R14 MOVQ CX, R12 ANDQ DX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, BX ADDQ R15, R9 ADDQ R15, BX ADDQ DI, BX VPSRLQ $0x13, Y5, Y3 VPSLLQ $0x2d, Y5, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y5, Y3 VPSLLQ $0x03, Y5, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y0, Y2 VPBLENDD $0xf0, Y2, Y5, Y5 MOVQ BX, DI RORXQ $0x29, R9, R13 RORXQ $0x12, R9, R14 ADDQ 24(SP), AX ORQ R8, DI MOVQ R10, R15 RORXQ $0x22, BX, R12 XORQ R14, R13 XORQ R11, R15 RORXQ $0x0e, R9, R14 ANDQ R9, R15 ADDQ AX, DX ANDQ CX, DI XORQ R14, R13 XORQ R11, R15 RORXQ $0x27, BX, R14 ADDQ R13, R15 XORQ R12, R14 ADDQ R15, DX RORXQ $0x1c, BX, R12 XORQ R12, R14 MOVQ BX, R12 ANDQ R8, R12 ORQ R12, DI ADDQ R14, AX ADDQ R15, AX ADDQ DI, AX VPADDQ 64(BP), Y6, Y0 VMOVDQU Y0, (SP) VPERM2F128 $0x03, Y4, Y5, Y0 VPALIGNR $0x08, Y4, Y0, Y0 VPADDQ Y6, Y0, Y0 VPERM2F128 $0x03, Y6, Y7, Y1 VPALIGNR $0x08, Y6, Y1, Y1 VPSRLQ $0x01, Y1, Y2 VPSLLQ $0x3f, Y1, Y3 VPOR Y2, Y3, Y3 VPSRLQ $0x07, Y1, Y8 MOVQ AX, DI RORXQ $0x29, DX, R13 RORXQ $0x12, DX, R14 ADDQ (SP), R11 ORQ CX, DI MOVQ R9, R15 RORXQ $0x22, AX, R12 XORQ R14, R13 XORQ R10, R15 RORXQ $0x0e, DX, R14 ANDQ DX, R15 XORQ R14, R13 RORXQ $0x27, AX, R14 ADDQ R11, R8 ANDQ BX, DI XORQ R12, R14 RORXQ $0x1c, AX, R12 XORQ R10, R15 XORQ R12, R14 MOVQ AX, R12 ANDQ CX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R11 ADDQ R15, R8 ADDQ R15, R11 ADDQ DI, R11 VPSRLQ $0x08, Y1, Y2 VPSLLQ $0x38, Y1, Y1 VPOR Y2, Y1, Y1 VPXOR Y8, Y3, Y3 VPXOR Y1, Y3, Y1 VPADDQ Y1, Y0, Y0 VPERM2F128 $0x00, Y0, Y0, Y6 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0 VPERM2F128 $0x11, Y5, Y5, Y2 VPSRLQ $0x06, Y2, Y8 MOVQ R11, DI RORXQ $0x29, R8, R13 RORXQ $0x12, R8, R14 ADDQ 8(SP), R10 ORQ BX, DI MOVQ DX, R15 RORXQ $0x22, R11, R12 XORQ R14, R13 XORQ R9, R15 RORXQ $0x0e, R8, R14 XORQ R14, R13 RORXQ $0x27, R11, R14 ANDQ R8, R15 ADDQ R10, CX ANDQ AX, DI XORQ R12, R14 RORXQ $0x1c, R11, R12 XORQ R9, R15 XORQ R12, R14 MOVQ R11, R12 ANDQ BX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R10 ADDQ R15, CX ADDQ R15, R10 ADDQ DI, R10 VPSRLQ $0x13, Y2, Y3 VPSLLQ $0x2d, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y2, Y3 VPSLLQ $0x03, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y6, Y6 VPSRLQ $0x06, Y6, Y8 MOVQ R10, DI RORXQ $0x29, CX, R13 ADDQ 16(SP), R9 RORXQ $0x12, CX, R14 ORQ AX, DI MOVQ R8, R15 XORQ DX, R15 RORXQ $0x22, R10, R12 XORQ R14, R13 ANDQ CX, R15 RORXQ $0x0e, CX, R14 ADDQ R9, BX ANDQ R11, DI XORQ R14, R13 RORXQ $0x27, R10, R14 XORQ DX, R15 XORQ R12, R14 RORXQ $0x1c, R10, R12 XORQ R12, R14 MOVQ R10, R12 ANDQ AX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R9 ADDQ R15, BX ADDQ R15, R9 ADDQ DI, R9 VPSRLQ $0x13, Y6, Y3 VPSLLQ $0x2d, Y6, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y6, Y3 VPSLLQ $0x03, Y6, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y0, Y2 VPBLENDD $0xf0, Y2, Y6, Y6 MOVQ R9, DI RORXQ $0x29, BX, R13 RORXQ $0x12, BX, R14 ADDQ 24(SP), DX ORQ R11, DI MOVQ CX, R15 RORXQ $0x22, R9, R12 XORQ R14, R13 XORQ R8, R15 RORXQ $0x0e, BX, R14 ANDQ BX, R15 ADDQ DX, AX ANDQ R10, DI XORQ R14, R13 XORQ R8, R15 RORXQ $0x27, R9, R14 ADDQ R13, R15 XORQ R12, R14 ADDQ R15, AX RORXQ $0x1c, R9, R12 XORQ R12, R14 MOVQ R9, R12 ANDQ R11, R12 ORQ R12, DI ADDQ R14, DX ADDQ R15, DX ADDQ DI, DX VPADDQ 96(BP), Y7, Y0 VMOVDQU Y0, (SP) ADDQ $0x80, BP VPERM2F128 $0x03, Y5, Y6, Y0 VPALIGNR $0x08, Y5, Y0, Y0 VPADDQ Y7, Y0, Y0 VPERM2F128 $0x03, Y7, Y4, Y1 VPALIGNR $0x08, Y7, Y1, Y1 VPSRLQ $0x01, Y1, Y2 VPSLLQ $0x3f, Y1, Y3 VPOR Y2, Y3, Y3 VPSRLQ $0x07, Y1, Y8 MOVQ DX, DI RORXQ $0x29, AX, R13 RORXQ $0x12, AX, R14 ADDQ (SP), R8 ORQ R10, DI MOVQ BX, R15 RORXQ $0x22, DX, R12 XORQ R14, R13 XORQ CX, R15 RORXQ $0x0e, AX, R14 ANDQ AX, R15 XORQ R14, R13 RORXQ $0x27, DX, R14 ADDQ R8, R11 ANDQ R9, DI XORQ R12, R14 RORXQ $0x1c, DX, R12 XORQ CX, R15 XORQ R12, R14 MOVQ DX, R12 ANDQ R10, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, R8 ADDQ R15, R11 ADDQ R15, R8 ADDQ DI, R8 VPSRLQ $0x08, Y1, Y2 VPSLLQ $0x38, Y1, Y1 VPOR Y2, Y1, Y1 VPXOR Y8, Y3, Y3 VPXOR Y1, Y3, Y1 VPADDQ Y1, Y0, Y0 VPERM2F128 $0x00, Y0, Y0, Y7 VPAND MASK_YMM_LO<>+0(SB), Y0, Y0 VPERM2F128 $0x11, Y6, Y6, Y2 VPSRLQ $0x06, Y2, Y8 MOVQ R8, DI RORXQ $0x29, R11, R13 RORXQ $0x12, R11, R14 ADDQ 8(SP), CX ORQ R9, DI MOVQ AX, R15 RORXQ $0x22, R8, R12 XORQ R14, R13 XORQ BX, R15 RORXQ $0x0e, R11, R14 XORQ R14, R13 RORXQ $0x27, R8, R14 ANDQ R11, R15 ADDQ CX, R10 ANDQ DX, DI XORQ R12, R14 RORXQ $0x1c, R8, R12 XORQ BX, R15 XORQ R12, R14 MOVQ R8, R12 ANDQ R9, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, CX ADDQ R15, R10 ADDQ R15, CX ADDQ DI, CX VPSRLQ $0x13, Y2, Y3 VPSLLQ $0x2d, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y2, Y3 VPSLLQ $0x03, Y2, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y7, Y7 VPSRLQ $0x06, Y7, Y8 MOVQ CX, DI RORXQ $0x29, R10, R13 ADDQ 16(SP), BX RORXQ $0x12, R10, R14 ORQ DX, DI MOVQ R11, R15 XORQ AX, R15 RORXQ $0x22, CX, R12 XORQ R14, R13 ANDQ R10, R15 RORXQ $0x0e, R10, R14 ADDQ BX, R9 ANDQ R8, DI XORQ R14, R13 RORXQ $0x27, CX, R14 XORQ AX, R15 XORQ R12, R14 RORXQ $0x1c, CX, R12 XORQ R12, R14 MOVQ CX, R12 ANDQ DX, R12 ADDQ R13, R15 ORQ R12, DI ADDQ R14, BX ADDQ R15, R9 ADDQ R15, BX ADDQ DI, BX VPSRLQ $0x13, Y7, Y3 VPSLLQ $0x2d, Y7, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPSRLQ $0x3d, Y7, Y3 VPSLLQ $0x03, Y7, Y1 VPOR Y1, Y3, Y3 VPXOR Y3, Y8, Y8 VPADDQ Y8, Y0, Y2 VPBLENDD $0xf0, Y2, Y7, Y7 MOVQ BX, DI RORXQ $0x29, R9, R13 RORXQ $0x12, R9, R14 ADDQ 24(SP), AX ORQ R8, DI MOVQ R10, R15 RORXQ $0x22, BX, R12 XORQ R14, R13 XORQ R11, R15 RORXQ $0x0e, R9, R14 ANDQ R9, R15 ADDQ AX, DX ANDQ CX, DI XORQ R14, R13 XORQ R11, R15 RORXQ $0x27, BX, R14 ADDQ R13, R15 XORQ R12, R14 ADDQ R15, DX RORXQ $0x1c, BX, R12 XORQ R12, R14 MOVQ BX, R12 ANDQ R8, R12 ORQ R12, DI ADDQ R14, AX ADDQ R15, AX ADDQ DI, AX SUBQ $0x01, 32(SP) JNE loop1 MOVQ $0x00000002, 32(SP) loop2: VPADDQ (BP), Y4, Y0 VMOVDQU Y0, (SP) MOVQ R9, R15 RORXQ $0x29, DX, R13 RORXQ $0x12, DX, R14 XORQ R10, R15 XORQ R14, R13 RORXQ $0x0e, DX, R14 ANDQ DX, R15 XORQ R14, R13 RORXQ $0x22, AX, R12 XORQ R10, R15 RORXQ $0x27, AX, R14 MOVQ AX, DI XORQ R12, R14 RORXQ $0x1c, AX, R12 ADDQ (SP), R11 ORQ CX, DI XORQ R12, R14 MOVQ AX, R12 ANDQ BX, DI ANDQ CX, R12 ADDQ R13, R15 ADDQ R11, R8 ORQ R12, DI ADDQ R14, R11 ADDQ R15, R8 ADDQ R15, R11 MOVQ DX, R15 RORXQ $0x29, R8, R13 RORXQ $0x12, R8, R14 XORQ R9, R15 XORQ R14, R13 RORXQ $0x0e, R8, R14 ANDQ R8, R15 ADDQ DI, R11 XORQ R14, R13 RORXQ $0x22, R11, R12 XORQ R9, R15 RORXQ $0x27, R11, R14 MOVQ R11, DI XORQ R12, R14 RORXQ $0x1c, R11, R12 ADDQ 8(SP), R10 ORQ BX, DI XORQ R12, R14 MOVQ R11, R12 ANDQ AX, DI ANDQ BX, R12 ADDQ R13, R15 ADDQ R10, CX ORQ R12, DI ADDQ R14, R10 ADDQ R15, CX ADDQ R15, R10 MOVQ R8, R15 RORXQ $0x29, CX, R13 RORXQ $0x12, CX, R14 XORQ DX, R15 XORQ R14, R13 RORXQ $0x0e, CX, R14 ANDQ CX, R15 ADDQ DI, R10 XORQ R14, R13 RORXQ $0x22, R10, R12 XORQ DX, R15 RORXQ $0x27, R10, R14 MOVQ R10, DI XORQ R12, R14 RORXQ $0x1c, R10, R12 ADDQ 16(SP), R9 ORQ AX, DI XORQ R12, R14 MOVQ R10, R12 ANDQ R11, DI ANDQ AX, R12 ADDQ R13, R15 ADDQ R9, BX ORQ R12, DI ADDQ R14, R9 ADDQ R15, BX ADDQ R15, R9 MOVQ CX, R15 RORXQ $0x29, BX, R13 RORXQ $0x12, BX, R14 XORQ R8, R15 XORQ R14, R13 RORXQ $0x0e, BX, R14 ANDQ BX, R15 ADDQ DI, R9 XORQ R14, R13 RORXQ $0x22, R9, R12 XORQ R8, R15 RORXQ $0x27, R9, R14 MOVQ R9, DI XORQ R12, R14 RORXQ $0x1c, R9, R12 ADDQ 24(SP), DX ORQ R11, DI XORQ R12, R14 MOVQ R9, R12 ANDQ R10, DI ANDQ R11, R12 ADDQ R13, R15 ADDQ DX, AX ORQ R12, DI ADDQ R14, DX ADDQ R15, AX ADDQ R15, DX ADDQ DI, DX VPADDQ 32(BP), Y5, Y0 VMOVDQU Y0, (SP) ADDQ $0x40, BP MOVQ BX, R15 RORXQ $0x29, AX, R13 RORXQ $0x12, AX, R14 XORQ CX, R15 XORQ R14, R13 RORXQ $0x0e, AX, R14 ANDQ AX, R15 XORQ R14, R13 RORXQ $0x22, DX, R12 XORQ CX, R15 RORXQ $0x27, DX, R14 MOVQ DX, DI XORQ R12, R14 RORXQ $0x1c, DX, R12 ADDQ (SP), R8 ORQ R10, DI XORQ R12, R14 MOVQ DX, R12 ANDQ R9, DI ANDQ R10, R12 ADDQ R13, R15 ADDQ R8, R11 ORQ R12, DI ADDQ R14, R8 ADDQ R15, R11 ADDQ R15, R8 MOVQ AX, R15 RORXQ $0x29, R11, R13 RORXQ $0x12, R11, R14 XORQ BX, R15 XORQ R14, R13 RORXQ $0x0e, R11, R14 ANDQ R11, R15 ADDQ DI, R8 XORQ R14, R13 RORXQ $0x22, R8, R12 XORQ BX, R15 RORXQ $0x27, R8, R14 MOVQ R8, DI XORQ R12, R14 RORXQ $0x1c, R8, R12 ADDQ 8(SP), CX ORQ R9, DI XORQ R12, R14 MOVQ R8, R12 ANDQ DX, DI ANDQ R9, R12 ADDQ R13, R15 ADDQ CX, R10 ORQ R12, DI ADDQ R14, CX ADDQ R15, R10 ADDQ R15, CX MOVQ R11, R15 RORXQ $0x29, R10, R13 RORXQ $0x12, R10, R14 XORQ AX, R15 XORQ R14, R13 RORXQ $0x0e, R10, R14 ANDQ R10, R15 ADDQ DI, CX XORQ R14, R13 RORXQ $0x22, CX, R12 XORQ AX, R15 RORXQ $0x27, CX, R14 MOVQ CX, DI XORQ R12, R14 RORXQ $0x1c, CX, R12 ADDQ 16(SP), BX ORQ DX, DI XORQ R12, R14 MOVQ CX, R12 ANDQ R8, DI ANDQ DX, R12 ADDQ R13, R15 ADDQ BX, R9 ORQ R12, DI ADDQ R14, BX ADDQ R15, R9 ADDQ R15, BX MOVQ R10, R15 RORXQ $0x29, R9, R13 RORXQ $0x12, R9, R14 XORQ R11, R15 XORQ R14, R13 RORXQ $0x0e, R9, R14 ANDQ R9, R15 ADDQ DI, BX XORQ R14, R13 RORXQ $0x22, BX, R12 XORQ R11, R15 RORXQ $0x27, BX, R14 MOVQ BX, DI XORQ R12, R14 RORXQ $0x1c, BX, R12 ADDQ 24(SP), AX ORQ R8, DI XORQ R12, R14 MOVQ BX, R12 ANDQ CX, DI ANDQ R8, R12 ADDQ R13, R15 ADDQ AX, DX ORQ R12, DI ADDQ R14, AX ADDQ R15, DX ADDQ R15, AX ADDQ DI, AX VMOVDQU Y6, Y4 VMOVDQU Y7, Y5 SUBQ $0x01, 32(SP) JNE loop2 ADDQ (SI), AX MOVQ AX, (SI) ADDQ 8(SI), BX MOVQ BX, 8(SI) ADDQ 16(SI), CX MOVQ CX, 16(SI) ADDQ 24(SI), R8 MOVQ R8, 24(SI) ADDQ 32(SI), DX MOVQ DX, 32(SI) ADDQ 40(SI), R9 MOVQ R9, 40(SI) ADDQ 48(SI), R10 MOVQ R10, 48(SI) ADDQ 56(SI), R11 MOVQ R11, 56(SI) MOVQ 40(SP), DI ADDQ $0x80, DI CMPQ DI, 48(SP) JNE loop0 done_hash: VZEROUPPER RET DATA PSHUFFLE_BYTE_FLIP_MASK<>+0(SB)/8, $0x0001020304050607 DATA PSHUFFLE_BYTE_FLIP_MASK<>+8(SB)/8, $0x08090a0b0c0d0e0f DATA PSHUFFLE_BYTE_FLIP_MASK<>+16(SB)/8, $0x1011121314151617 DATA PSHUFFLE_BYTE_FLIP_MASK<>+24(SB)/8, $0x18191a1b1c1d1e1f GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), RODATA|NOPTR, $32 DATA MASK_YMM_LO<>+0(SB)/8, $0x0000000000000000 DATA MASK_YMM_LO<>+8(SB)/8, $0x0000000000000000 DATA MASK_YMM_LO<>+16(SB)/8, $0xffffffffffffffff DATA MASK_YMM_LO<>+24(SB)/8, $0xffffffffffffffff GLOBL MASK_YMM_LO<>(SB), RODATA|NOPTR, $32