// Code generated by command: go run sha1block_amd64_asm.go -out ../sha1block_amd64.s -pkg sha1. DO NOT EDIT. //go:build !purego #include "textflag.h" // func blockAVX2(dig *digest, p []byte) // Requires: AVX, AVX2, BMI, BMI2, CMOV TEXT ·blockAVX2(SB), $1408-32 MOVQ dig+0(FP), DI MOVQ p_base+8(FP), SI MOVQ p_len+16(FP), DX SHRQ $0x06, DX SHLQ $0x06, DX LEAQ K_XMM_AR<>+0(SB), R8 MOVQ DI, R9 MOVQ SI, R10 LEAQ 64(SI), R13 ADDQ SI, DX ADDQ $0x40, DX MOVQ DX, R11 CMPQ R13, R11 CMOVQCC R8, R13 VMOVDQU BSWAP_SHUFB_CTL<>+0(SB), Y10 MOVL (R9), CX MOVL 4(R9), SI MOVL 8(R9), DI MOVL 12(R9), AX MOVL 16(R9), DX MOVQ SP, R14 LEAQ 672(SP), R15 VMOVDQU (R10), X0 VINSERTI128 $0x01, (R13), Y0, Y0 VPSHUFB Y10, Y0, Y15 VPADDD (R8), Y15, Y0 VMOVDQU Y0, (R14) VMOVDQU 16(R10), X0 VINSERTI128 $0x01, 16(R13), Y0, Y0 VPSHUFB Y10, Y0, Y14 VPADDD (R8), Y14, Y0 VMOVDQU Y0, 32(R14) VMOVDQU 32(R10), X0 VINSERTI128 $0x01, 32(R13), Y0, Y0 VPSHUFB Y10, Y0, Y13 VPADDD (R8), Y13, Y0 VMOVDQU Y0, 64(R14) VMOVDQU 48(R10), X0 VINSERTI128 $0x01, 48(R13), Y0, Y0 VPSHUFB Y10, Y0, Y12 VPADDD (R8), Y12, Y0 VMOVDQU Y0, 96(R14) VPALIGNR $0x08, Y15, Y14, Y8 VPSRLDQ $0x04, Y12, Y0 VPXOR Y13, Y8, Y8 VPXOR Y15, Y0, Y0 VPXOR Y0, Y8, Y8 VPSLLDQ $0x0c, Y8, Y9 VPSLLD $0x01, Y8, Y0 VPSRLD $0x1f, Y8, Y8 VPOR Y8, Y0, Y0 VPSLLD $0x02, Y9, Y8 VPSRLD $0x1e, Y9, Y9 VPXOR Y8, Y0, Y0 VPXOR Y9, Y0, Y8 VPADDD (R8), Y8, Y0 VMOVDQU Y0, 128(R14) VPALIGNR $0x08, Y14, Y13, Y7 VPSRLDQ $0x04, Y8, Y0 VPXOR Y12, Y7, Y7 VPXOR Y14, Y0, Y0 VPXOR Y0, Y7, Y7 VPSLLDQ $0x0c, Y7, Y9 VPSLLD $0x01, Y7, Y0 VPSRLD $0x1f, Y7, Y7 VPOR Y7, Y0, Y0 VPSLLD $0x02, Y9, Y7 VPSRLD $0x1e, Y9, Y9 VPXOR Y7, Y0, Y0 VPXOR Y9, Y0, Y7 VPADDD 32(R8), Y7, Y0 VMOVDQU Y0, 160(R14) VPALIGNR $0x08, Y13, Y12, Y5 VPSRLDQ $0x04, Y7, Y0 VPXOR Y8, Y5, Y5 VPXOR Y13, Y0, Y0 VPXOR Y0, Y5, Y5 VPSLLDQ $0x0c, Y5, Y9 VPSLLD $0x01, Y5, Y0 VPSRLD $0x1f, Y5, Y5 VPOR Y5, Y0, Y0 VPSLLD $0x02, Y9, Y5 VPSRLD $0x1e, Y9, Y9 VPXOR Y5, Y0, Y0 VPXOR Y9, Y0, Y5 VPADDD 32(R8), Y5, Y0 VMOVDQU Y0, 192(R14) VPALIGNR $0x08, Y12, Y8, Y3 VPSRLDQ $0x04, Y5, Y0 VPXOR Y7, Y3, Y3 VPXOR Y12, Y0, Y0 VPXOR Y0, Y3, Y3 VPSLLDQ $0x0c, Y3, Y9 VPSLLD $0x01, Y3, Y0 VPSRLD $0x1f, Y3, Y3 VPOR Y3, Y0, Y0 VPSLLD $0x02, Y9, Y3 VPSRLD $0x1e, Y9, Y9 VPXOR Y3, Y0, Y0 VPXOR Y9, Y0, Y3 VPADDD 32(R8), Y3, Y0 VMOVDQU Y0, 224(R14) VPALIGNR $0x08, Y5, Y3, Y0 VPXOR Y14, Y15, Y15 VPXOR Y8, Y0, Y0 VPXOR Y0, Y15, Y15 VPSLLD $0x02, Y15, Y0 VPSRLD $0x1e, Y15, Y15 VPOR Y15, Y0, Y15 VPADDD 32(R8), Y15, Y0 VMOVDQU Y0, 256(R14) VPALIGNR $0x08, Y3, Y15, Y0 VPXOR Y13, Y14, Y14 VPXOR Y7, Y0, Y0 VPXOR Y0, Y14, Y14 VPSLLD $0x02, Y14, Y0 VPSRLD $0x1e, Y14, Y14 VPOR Y14, Y0, Y14 VPADDD 32(R8), Y14, Y0 VMOVDQU Y0, 288(R14) VPALIGNR $0x08, Y15, Y14, Y0 VPXOR Y12, Y13, Y13 VPXOR Y5, Y0, Y0 VPXOR Y0, Y13, Y13 VPSLLD $0x02, Y13, Y0 VPSRLD $0x1e, Y13, Y13 VPOR Y13, Y0, Y13 VPADDD 64(R8), Y13, Y0 VMOVDQU Y0, 320(R14) VPALIGNR $0x08, Y14, Y13, Y0 VPXOR Y8, Y12, Y12 VPXOR Y3, Y0, Y0 VPXOR Y0, Y12, Y12 VPSLLD $0x02, Y12, Y0 VPSRLD $0x1e, Y12, Y12 VPOR Y12, Y0, Y12 VPADDD 64(R8), Y12, Y0 VMOVDQU Y0, 352(R14) VPALIGNR $0x08, Y13, Y12, Y0 VPXOR Y7, Y8, Y8 VPXOR Y15, Y0, Y0 VPXOR Y0, Y8, Y8 VPSLLD $0x02, Y8, Y0 VPSRLD $0x1e, Y8, Y8 VPOR Y8, Y0, Y8 VPADDD 64(R8), Y8, Y0 VMOVDQU Y0, 384(R14) VPALIGNR $0x08, Y12, Y8, Y0 VPXOR Y5, Y7, Y7 VPXOR Y14, Y0, Y0 VPXOR Y0, Y7, Y7 VPSLLD $0x02, Y7, Y0 VPSRLD $0x1e, Y7, Y7 VPOR Y7, Y0, Y7 VPADDD 64(R8), Y7, Y0 VMOVDQU Y0, 416(R14) VPALIGNR $0x08, Y8, Y7, Y0 VPXOR Y3, Y5, Y5 VPXOR Y13, Y0, Y0 VPXOR Y0, Y5, Y5 VPSLLD $0x02, Y5, Y0 VPSRLD $0x1e, Y5, Y5 VPOR Y5, Y0, Y5 VPADDD 64(R8), Y5, Y0 VMOVDQU Y0, 448(R14) VPALIGNR $0x08, Y7, Y5, Y0 VPXOR Y15, Y3, Y3 VPXOR Y12, Y0, Y0 VPXOR Y0, Y3, Y3 VPSLLD $0x02, Y3, Y0 VPSRLD $0x1e, Y3, Y3 VPOR Y3, Y0, Y3 VPADDD 96(R8), Y3, Y0 VMOVDQU Y0, 480(R14) VPALIGNR $0x08, Y5, Y3, Y0 VPXOR Y14, Y15, Y15 VPXOR Y8, Y0, Y0 VPXOR Y0, Y15, Y15 VPSLLD $0x02, Y15, Y0 VPSRLD $0x1e, Y15, Y15 VPOR Y15, Y0, Y15 VPADDD 96(R8), Y15, Y0 VMOVDQU Y0, 512(R14) VPALIGNR $0x08, Y3, Y15, Y0 VPXOR Y13, Y14, Y14 VPXOR Y7, Y0, Y0 VPXOR Y0, Y14, Y14 VPSLLD $0x02, Y14, Y0 VPSRLD $0x1e, Y14, Y14 VPOR Y14, Y0, Y14 VPADDD 96(R8), Y14, Y0 VMOVDQU Y0, 544(R14) VPALIGNR $0x08, Y15, Y14, Y0 VPXOR Y12, Y13, Y13 VPXOR Y5, Y0, Y0 VPXOR Y0, Y13, Y13 VPSLLD $0x02, Y13, Y0 VPSRLD $0x1e, Y13, Y13 VPOR Y13, Y0, Y13 VPADDD 96(R8), Y13, Y0 VMOVDQU Y0, 576(R14) VPALIGNR $0x08, Y14, Y13, Y0 VPXOR Y8, Y12, Y12 VPXOR Y3, Y0, Y0 VPXOR Y0, Y12, Y12 VPSLLD $0x02, Y12, Y0 VPSRLD $0x1e, Y12, Y12 VPOR Y12, Y0, Y12 VPADDD 96(R8), Y12, Y0 VMOVDQU Y0, 608(R14) XCHGQ R15, R14 loop: CMPQ R10, R8 JNE begin VZEROUPPER RET begin: MOVL SI, BX RORXL $0x02, SI, SI ANDNL AX, BX, BP ANDL DI, BX XORL BP, BX ADDL (R15), DX ANDNL DI, CX, BP LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VMOVDQU 128(R10), X0 ANDL SI, CX XORL BP, CX LEAL (DX)(R12*1), DX ADDL 4(R15), AX ANDNL SI, DX, BP LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VINSERTI128 $0x01, 128(R13), Y0, Y0 ANDL BX, DX XORL BP, DX LEAL (AX)(R12*1), AX ADDL 8(R15), DI ANDNL BX, AX, BP LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPSHUFB Y10, Y0, Y15 ANDL CX, AX XORL BP, AX LEAL (DI)(R12*1), DI ADDL 12(R15), SI ANDNL CX, DI, BP LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL DX, DI XORL BP, DI LEAL (SI)(R12*1), SI ADDL 32(R15), BX ANDNL DX, SI, BP LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPADDD (R8), Y15, Y0 ANDL AX, SI XORL BP, SI LEAL (BX)(R12*1), BX ADDL 36(R15), CX ANDNL AX, BX, BP LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL DI, BX XORL BP, BX LEAL (CX)(R12*1), CX ADDL 40(R15), DX ANDNL DI, CX, BP LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL SI, CX XORL BP, CX LEAL (DX)(R12*1), DX ADDL 44(R15), AX ANDNL SI, DX, BP LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VMOVDQU Y0, (R14) ANDL BX, DX XORL BP, DX LEAL (AX)(R12*1), AX ADDL 64(R15), DI ANDNL BX, AX, BP LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VMOVDQU 144(R10), X0 ANDL CX, AX XORL BP, AX LEAL (DI)(R12*1), DI ADDL 68(R15), SI ANDNL CX, DI, BP LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VINSERTI128 $0x01, 144(R13), Y0, Y0 ANDL DX, DI XORL BP, DI LEAL (SI)(R12*1), SI ADDL 72(R15), BX ANDNL DX, SI, BP LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPSHUFB Y10, Y0, Y14 ANDL AX, SI XORL BP, SI LEAL (BX)(R12*1), BX ADDL 76(R15), CX ANDNL AX, BX, BP LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL DI, BX XORL BP, BX LEAL (CX)(R12*1), CX ADDL 96(R15), DX ANDNL DI, CX, BP LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPADDD (R8), Y14, Y0 ANDL SI, CX XORL BP, CX LEAL (DX)(R12*1), DX ADDL 100(R15), AX ANDNL SI, DX, BP LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX ANDL BX, DX XORL BP, DX LEAL (AX)(R12*1), AX ADDL 104(R15), DI ANDNL BX, AX, BP LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL CX, AX XORL BP, AX LEAL (DI)(R12*1), DI ADDL 108(R15), SI ANDNL CX, DI, BP LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VMOVDQU Y0, 32(R14) ANDL DX, DI XORL BP, DI LEAL (SI)(R12*1), SI ADDL 128(R15), BX ANDNL DX, SI, BP LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VMOVDQU 160(R10), X0 ANDL AX, SI XORL BP, SI LEAL (BX)(R12*1), BX ADDL 132(R15), CX ANDNL AX, BX, BP LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VINSERTI128 $0x01, 160(R13), Y0, Y0 ANDL DI, BX XORL BP, BX LEAL (CX)(R12*1), CX ADDL 136(R15), DX ANDNL DI, CX, BP LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPSHUFB Y10, Y0, Y13 ANDL SI, CX XORL BP, CX LEAL (DX)(R12*1), DX ADDL 140(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 160(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPADDD (R8), Y13, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 164(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 168(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 172(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VMOVDQU Y0, 64(R14) XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 192(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VMOVDQU 176(R10), X0 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 196(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VINSERTI128 $0x01, 176(R13), Y0, Y0 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 200(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPSHUFB Y10, Y0, Y12 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 204(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 224(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPADDD (R8), Y12, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 228(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 232(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 236(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VMOVDQU Y0, 96(R14) XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 256(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPALIGNR $0x08, Y15, Y14, Y8 VPSRLDQ $0x04, Y12, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 260(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y13, Y8, Y8 VPXOR Y15, Y0, Y0 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 264(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPXOR Y0, Y8, Y8 VPSLLDQ $0x0c, Y8, Y9 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 268(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPSLLD $0x01, Y8, Y0 VPSRLD $0x1f, Y8, Y8 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 288(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPOR Y8, Y0, Y0 VPSLLD $0x02, Y9, Y8 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 292(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPSRLD $0x1e, Y9, Y9 VPXOR Y8, Y0, Y0 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 296(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 300(R15), SI VPXOR Y9, Y0, Y8 VPADDD (R8), Y8, Y0 VMOVDQU Y0, 128(R14) LEAL (SI)(AX*1), SI MOVL DX, BP ORL DI, BP RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL CX, BP ANDL DX, DI ORL BP, DI ADDL R12, SI ADDL 320(R15), BX VPALIGNR $0x08, Y14, Y13, Y7 VPSRLDQ $0x04, Y8, Y0 LEAL (BX)(DI*1), BX MOVL AX, BP ORL SI, BP RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL DX, BP ANDL AX, SI ORL BP, SI ADDL R12, BX ADDL 324(R15), CX VPXOR Y12, Y7, Y7 VPXOR Y14, Y0, Y0 LEAL (CX)(SI*1), CX MOVL DI, BP ORL BX, BP RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL AX, BP ANDL DI, BX ORL BP, BX ADDL R12, CX ADDL 328(R15), DX VPXOR Y0, Y7, Y7 VPSLLDQ $0x0c, Y7, Y9 LEAL (DX)(BX*1), DX MOVL SI, BP ORL CX, BP RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL DI, BP ANDL SI, CX ORL BP, CX ADDL R12, DX ADDL 332(R15), AX VPSLLD $0x01, Y7, Y0 VPSRLD $0x1f, Y7, Y7 LEAL (AX)(CX*1), AX MOVL BX, BP ORL DX, BP RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX ANDL SI, BP ANDL BX, DX ORL BP, DX ADDL R12, AX ADDL 352(R15), DI VPOR Y7, Y0, Y0 VPSLLD $0x02, Y9, Y7 LEAL (DI)(DX*1), DI MOVL CX, BP ORL AX, BP RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL BX, BP ANDL CX, AX ORL BP, AX ADDL R12, DI ADDL 356(R15), SI VPSRLD $0x1e, Y9, Y9 VPXOR Y7, Y0, Y0 LEAL (SI)(AX*1), SI MOVL DX, BP ORL DI, BP RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL CX, BP ANDL DX, DI ORL BP, DI ADDL R12, SI ADDL 360(R15), BX LEAL (BX)(DI*1), BX MOVL AX, BP ORL SI, BP RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL DX, BP ANDL AX, SI ORL BP, SI ADDL R12, BX ADDL 364(R15), CX VPXOR Y9, Y0, Y7 VPADDD 32(R8), Y7, Y0 VMOVDQU Y0, 160(R14) LEAL (CX)(SI*1), CX MOVL DI, BP ORL BX, BP RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL AX, BP ANDL DI, BX ORL BP, BX ADDL R12, CX ADDL 384(R15), DX VPALIGNR $0x08, Y13, Y12, Y5 VPSRLDQ $0x04, Y7, Y0 LEAL (DX)(BX*1), DX MOVL SI, BP ORL CX, BP RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL DI, BP ANDL SI, CX ORL BP, CX ADDL R12, DX ADDL 388(R15), AX VPXOR Y8, Y5, Y5 VPXOR Y13, Y0, Y0 LEAL (AX)(CX*1), AX MOVL BX, BP ORL DX, BP RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX ANDL SI, BP ANDL BX, DX ORL BP, DX ADDL R12, AX ADDL 392(R15), DI VPXOR Y0, Y5, Y5 VPSLLDQ $0x0c, Y5, Y9 LEAL (DI)(DX*1), DI MOVL CX, BP ORL AX, BP RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL BX, BP ANDL CX, AX ORL BP, AX ADDL R12, DI ADDL 396(R15), SI VPSLLD $0x01, Y5, Y0 VPSRLD $0x1f, Y5, Y5 LEAL (SI)(AX*1), SI MOVL DX, BP ORL DI, BP RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL CX, BP ANDL DX, DI ORL BP, DI ADDL R12, SI ADDL 416(R15), BX VPOR Y5, Y0, Y0 VPSLLD $0x02, Y9, Y5 LEAL (BX)(DI*1), BX MOVL AX, BP ORL SI, BP RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL DX, BP ANDL AX, SI ORL BP, SI ADDL R12, BX ADDL 420(R15), CX VPSRLD $0x1e, Y9, Y9 VPXOR Y5, Y0, Y0 LEAL (CX)(SI*1), CX MOVL DI, BP ORL BX, BP RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL AX, BP ANDL DI, BX ORL BP, BX ADDL R12, CX ADDL 424(R15), DX LEAL (DX)(BX*1), DX MOVL SI, BP ORL CX, BP RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL DI, BP ANDL SI, CX ORL BP, CX ADDL R12, DX ADDL 428(R15), AX VPXOR Y9, Y0, Y5 VPADDD 32(R8), Y5, Y0 VMOVDQU Y0, 192(R14) LEAL (AX)(CX*1), AX MOVL BX, BP ORL DX, BP RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX ANDL SI, BP ANDL BX, DX ORL BP, DX ADDL R12, AX ADDL 448(R15), DI VPALIGNR $0x08, Y12, Y8, Y3 VPSRLDQ $0x04, Y5, Y0 LEAL (DI)(DX*1), DI MOVL CX, BP ORL AX, BP RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL BX, BP ANDL CX, AX ORL BP, AX ADDL R12, DI ADDL 452(R15), SI VPXOR Y7, Y3, Y3 VPXOR Y12, Y0, Y0 LEAL (SI)(AX*1), SI MOVL DX, BP ORL DI, BP RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL CX, BP ANDL DX, DI ORL BP, DI ADDL R12, SI ADDL 456(R15), BX VPXOR Y0, Y3, Y3 VPSLLDQ $0x0c, Y3, Y9 LEAL (BX)(DI*1), BX MOVL AX, BP ORL SI, BP RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL DX, BP ANDL AX, SI ORL BP, SI ADDL R12, BX ADDL 460(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPSLLD $0x01, Y3, Y0 VPSRLD $0x1f, Y3, Y3 XORL DI, BX ADDL R12, CX XORL AX, BX ADDQ $0x80, R10 CMPQ R10, R11 CMOVQCC R8, R10 ADDL 480(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPOR Y3, Y0, Y0 VPSLLD $0x02, Y9, Y3 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 484(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPSRLD $0x1e, Y9, Y9 VPXOR Y3, Y0, Y0 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 488(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 492(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y9, Y0, Y3 VPADDD 32(R8), Y3, Y0 VMOVDQU Y0, 224(R14) XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 512(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPALIGNR $0x08, Y5, Y3, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 516(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPXOR Y14, Y15, Y15 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 520(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPXOR Y8, Y0, Y0 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 524(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPXOR Y0, Y15, Y15 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 544(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPSLLD $0x02, Y15, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 548(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPSRLD $0x1e, Y15, Y15 VPOR Y15, Y0, Y15 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 552(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 556(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPADDD 32(R8), Y15, Y0 VMOVDQU Y0, 256(R14) XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 576(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPALIGNR $0x08, Y3, Y15, Y0 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 580(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPXOR Y13, Y14, Y14 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 584(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPXOR Y7, Y0, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 588(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y0, Y14, Y14 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 608(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPSLLD $0x02, Y14, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 612(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPSRLD $0x1e, Y14, Y14 VPOR Y14, Y0, Y14 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 616(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 620(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 VPADDD 32(R8), Y14, Y0 VMOVDQU Y0, 288(R14) ADDL R12, AX ADDL (R9), AX MOVL AX, (R9) ADDL 4(R9), DX MOVL DX, 4(R9) ADDL 8(R9), BX MOVL BX, 8(R9) ADDL 12(R9), SI MOVL SI, 12(R9) ADDL 16(R9), DI MOVL DI, 16(R9) CMPQ R10, R8 JE loop MOVL DX, CX MOVL CX, DX RORXL $0x02, CX, CX ANDNL SI, DX, BP ANDL BX, DX XORL BP, DX ADDL 16(R15), DI ANDNL BX, AX, BP LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPALIGNR $0x08, Y15, Y14, Y0 ANDL CX, AX XORL BP, AX LEAL (DI)(R12*1), DI ADDL 20(R15), SI ANDNL CX, DI, BP LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y12, Y13, Y13 ANDL DX, DI XORL BP, DI LEAL (SI)(R12*1), SI ADDL 24(R15), BX ANDNL DX, SI, BP LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPXOR Y5, Y0, Y0 ANDL AX, SI XORL BP, SI LEAL (BX)(R12*1), BX ADDL 28(R15), CX ANDNL AX, BX, BP LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPXOR Y0, Y13, Y13 ANDL DI, BX XORL BP, BX LEAL (CX)(R12*1), CX ADDL 48(R15), DX ANDNL DI, CX, BP LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPSLLD $0x02, Y13, Y0 ANDL SI, CX XORL BP, CX LEAL (DX)(R12*1), DX ADDL 52(R15), AX ANDNL SI, DX, BP LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPSRLD $0x1e, Y13, Y13 VPOR Y13, Y0, Y13 ANDL BX, DX XORL BP, DX LEAL (AX)(R12*1), AX ADDL 56(R15), DI ANDNL BX, AX, BP LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL CX, AX XORL BP, AX LEAL (DI)(R12*1), DI ADDL 60(R15), SI ANDNL CX, DI, BP LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPADDD 64(R8), Y13, Y0 VMOVDQU Y0, 320(R14) ANDL DX, DI XORL BP, DI LEAL (SI)(R12*1), SI ADDL 80(R15), BX ANDNL DX, SI, BP LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPALIGNR $0x08, Y14, Y13, Y0 ANDL AX, SI XORL BP, SI LEAL (BX)(R12*1), BX ADDL 84(R15), CX ANDNL AX, BX, BP LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPXOR Y8, Y12, Y12 ANDL DI, BX XORL BP, BX LEAL (CX)(R12*1), CX ADDL 88(R15), DX ANDNL DI, CX, BP LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPXOR Y3, Y0, Y0 ANDL SI, CX XORL BP, CX LEAL (DX)(R12*1), DX ADDL 92(R15), AX ANDNL SI, DX, BP LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPXOR Y0, Y12, Y12 ANDL BX, DX XORL BP, DX LEAL (AX)(R12*1), AX ADDL 112(R15), DI ANDNL BX, AX, BP LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPSLLD $0x02, Y12, Y0 ANDL CX, AX XORL BP, AX LEAL (DI)(R12*1), DI ADDL 116(R15), SI ANDNL CX, DI, BP LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPSRLD $0x1e, Y12, Y12 VPOR Y12, Y0, Y12 ANDL DX, DI XORL BP, DI LEAL (SI)(R12*1), SI ADDL 120(R15), BX ANDNL DX, SI, BP LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL AX, SI XORL BP, SI LEAL (BX)(R12*1), BX ADDL 124(R15), CX ANDNL AX, BX, BP LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPADDD 64(R8), Y12, Y0 VMOVDQU Y0, 352(R14) ANDL DI, BX XORL BP, BX LEAL (CX)(R12*1), CX ADDL 144(R15), DX ANDNL DI, CX, BP LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPALIGNR $0x08, Y13, Y12, Y0 ANDL SI, CX XORL BP, CX LEAL (DX)(R12*1), DX ADDL 148(R15), AX ANDNL SI, DX, BP LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPXOR Y7, Y8, Y8 ANDL BX, DX XORL BP, DX LEAL (AX)(R12*1), AX ADDL 152(R15), DI ANDNL BX, AX, BP LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPXOR Y15, Y0, Y0 ANDL CX, AX XORL BP, AX LEAL (DI)(R12*1), DI ADDL 156(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y0, Y8, Y8 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 176(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPSLLD $0x02, Y8, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 180(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPSRLD $0x1e, Y8, Y8 VPOR Y8, Y0, Y8 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 184(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 188(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPADDD 64(R8), Y8, Y0 VMOVDQU Y0, 384(R14) XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 208(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPALIGNR $0x08, Y12, Y8, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 212(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y5, Y7, Y7 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 216(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPXOR Y14, Y0, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 220(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPXOR Y0, Y7, Y7 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 240(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPSLLD $0x02, Y7, Y0 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 244(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPSRLD $0x1e, Y7, Y7 VPOR Y7, Y0, Y7 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 248(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 252(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPADDD 64(R8), Y7, Y0 VMOVDQU Y0, 416(R14) XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 272(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPALIGNR $0x08, Y8, Y7, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 276(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPXOR Y3, Y5, Y5 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 280(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPXOR Y13, Y0, Y0 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 284(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPXOR Y0, Y5, Y5 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 304(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPSLLD $0x02, Y5, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 308(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPSRLD $0x1e, Y5, Y5 VPOR Y5, Y0, Y5 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 312(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 316(R15), CX VPADDD 64(R8), Y5, Y0 VMOVDQU Y0, 448(R14) LEAL (CX)(SI*1), CX MOVL DI, BP ORL BX, BP RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL AX, BP ANDL DI, BX ORL BP, BX ADDL R12, CX ADDL 336(R15), DX VPALIGNR $0x08, Y7, Y5, Y0 LEAL (DX)(BX*1), DX MOVL SI, BP ORL CX, BP RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL DI, BP ANDL SI, CX ORL BP, CX ADDL R12, DX ADDL 340(R15), AX VPXOR Y15, Y3, Y3 LEAL (AX)(CX*1), AX MOVL BX, BP ORL DX, BP RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX ANDL SI, BP ANDL BX, DX ORL BP, DX ADDL R12, AX ADDL 344(R15), DI VPXOR Y12, Y0, Y0 LEAL (DI)(DX*1), DI MOVL CX, BP ORL AX, BP RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL BX, BP ANDL CX, AX ORL BP, AX ADDL R12, DI ADDL 348(R15), SI VPXOR Y0, Y3, Y3 LEAL (SI)(AX*1), SI MOVL DX, BP ORL DI, BP RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL CX, BP ANDL DX, DI ORL BP, DI ADDL R12, SI ADDL 368(R15), BX VPSLLD $0x02, Y3, Y0 LEAL (BX)(DI*1), BX MOVL AX, BP ORL SI, BP RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL DX, BP ANDL AX, SI ORL BP, SI ADDL R12, BX ADDL 372(R15), CX VPSRLD $0x1e, Y3, Y3 VPOR Y3, Y0, Y3 LEAL (CX)(SI*1), CX MOVL DI, BP ORL BX, BP RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL AX, BP ANDL DI, BX ORL BP, BX ADDL R12, CX ADDL 376(R15), DX LEAL (DX)(BX*1), DX MOVL SI, BP ORL CX, BP RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL DI, BP ANDL SI, CX ORL BP, CX ADDL R12, DX ADDL 380(R15), AX VPADDD 96(R8), Y3, Y0 VMOVDQU Y0, 480(R14) LEAL (AX)(CX*1), AX MOVL BX, BP ORL DX, BP RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX ANDL SI, BP ANDL BX, DX ORL BP, DX ADDL R12, AX ADDL 400(R15), DI VPALIGNR $0x08, Y5, Y3, Y0 LEAL (DI)(DX*1), DI MOVL CX, BP ORL AX, BP RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL BX, BP ANDL CX, AX ORL BP, AX ADDL R12, DI ADDL 404(R15), SI VPXOR Y14, Y15, Y15 LEAL (SI)(AX*1), SI MOVL DX, BP ORL DI, BP RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL CX, BP ANDL DX, DI ORL BP, DI ADDL R12, SI ADDL 408(R15), BX VPXOR Y8, Y0, Y0 LEAL (BX)(DI*1), BX MOVL AX, BP ORL SI, BP RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL DX, BP ANDL AX, SI ORL BP, SI ADDL R12, BX ADDL 412(R15), CX VPXOR Y0, Y15, Y15 LEAL (CX)(SI*1), CX MOVL DI, BP ORL BX, BP RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL AX, BP ANDL DI, BX ORL BP, BX ADDL R12, CX ADDL 432(R15), DX VPSLLD $0x02, Y15, Y0 LEAL (DX)(BX*1), DX MOVL SI, BP ORL CX, BP RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL DI, BP ANDL SI, CX ORL BP, CX ADDL R12, DX ADDL 436(R15), AX VPSRLD $0x1e, Y15, Y15 VPOR Y15, Y0, Y15 LEAL (AX)(CX*1), AX MOVL BX, BP ORL DX, BP RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX ANDL SI, BP ANDL BX, DX ORL BP, DX ADDL R12, AX ADDL 440(R15), DI LEAL (DI)(DX*1), DI MOVL CX, BP ORL AX, BP RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX ANDL BX, BP ANDL CX, AX ORL BP, AX ADDL R12, DI ADDL 444(R15), SI VPADDD 96(R8), Y15, Y0 VMOVDQU Y0, 512(R14) LEAL (SI)(AX*1), SI MOVL DX, BP ORL DI, BP RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX ANDL CX, BP ANDL DX, DI ORL BP, DI ADDL R12, SI ADDL 464(R15), BX VPALIGNR $0x08, Y3, Y15, Y0 LEAL (BX)(DI*1), BX MOVL AX, BP ORL SI, BP RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI ANDL DX, BP ANDL AX, SI ORL BP, SI ADDL R12, BX ADDL 468(R15), CX VPXOR Y13, Y14, Y14 LEAL (CX)(SI*1), CX MOVL DI, BP ORL BX, BP RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI ANDL AX, BP ANDL DI, BX ORL BP, BX ADDL R12, CX ADDL 472(R15), DX VPXOR Y7, Y0, Y0 LEAL (DX)(BX*1), DX MOVL SI, BP ORL CX, BP RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX ANDL DI, BP ANDL SI, CX ORL BP, CX ADDL R12, DX ADDL 476(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPXOR Y0, Y14, Y14 XORL BX, DX ADDL R12, AX XORL SI, DX ADDQ $0x80, R13 CMPQ R13, R11 CMOVQCC R8, R10 ADDL 496(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPSLLD $0x02, Y14, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 500(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPSRLD $0x1e, Y14, Y14 VPOR Y14, Y0, Y14 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 504(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 508(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPADDD 96(R8), Y14, Y0 VMOVDQU Y0, 544(R14) XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 528(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPALIGNR $0x08, Y15, Y14, Y0 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 532(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPXOR Y12, Y13, Y13 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 536(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPXOR Y5, Y0, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 540(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y0, Y13, Y13 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 560(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPSLLD $0x02, Y13, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 564(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPSRLD $0x1e, Y13, Y13 VPOR Y13, Y0, Y13 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 568(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 572(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPADDD 96(R8), Y13, Y0 VMOVDQU Y0, 576(R14) XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 592(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX VPALIGNR $0x08, Y14, Y13, Y0 XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 596(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 RORXL $0x02, DI, AX VPXOR Y8, Y12, Y12 XORL DX, DI ADDL R12, SI XORL CX, DI ADDL 600(R15), BX LEAL (BX)(DI*1), BX RORXL $0x1b, SI, R12 RORXL $0x02, SI, DI VPXOR Y3, Y0, Y0 XORL AX, SI ADDL R12, BX XORL DX, SI ADDL 604(R15), CX LEAL (CX)(SI*1), CX RORXL $0x1b, BX, R12 RORXL $0x02, BX, SI VPXOR Y0, Y12, Y12 XORL DI, BX ADDL R12, CX XORL AX, BX ADDL 624(R15), DX LEAL (DX)(BX*1), DX RORXL $0x1b, CX, R12 RORXL $0x02, CX, BX VPSLLD $0x02, Y12, Y0 XORL SI, CX ADDL R12, DX XORL DI, CX ADDL 628(R15), AX LEAL (AX)(CX*1), AX RORXL $0x1b, DX, R12 RORXL $0x02, DX, CX VPSRLD $0x1e, Y12, Y12 VPOR Y12, Y0, Y12 XORL BX, DX ADDL R12, AX XORL SI, DX ADDL 632(R15), DI LEAL (DI)(DX*1), DI RORXL $0x1b, AX, R12 RORXL $0x02, AX, DX XORL CX, AX ADDL R12, DI XORL BX, AX ADDL 636(R15), SI LEAL (SI)(AX*1), SI RORXL $0x1b, DI, R12 VPADDD 96(R8), Y12, Y0 VMOVDQU Y0, 608(R14) ADDL R12, SI ADDL (R9), SI MOVL SI, (R9) ADDL 4(R9), DI MOVL DI, 4(R9) ADDL 8(R9), DX MOVL DX, 8(R9) ADDL 12(R9), CX MOVL CX, 12(R9) ADDL 16(R9), BX MOVL BX, 16(R9) MOVL SI, R12 MOVL DI, SI MOVL DX, DI MOVL BX, DX MOVL CX, AX MOVL R12, CX XCHGQ R15, R14 JMP loop DATA K_XMM_AR<>+0(SB)/4, $0x5a827999 DATA K_XMM_AR<>+4(SB)/4, $0x5a827999 DATA K_XMM_AR<>+8(SB)/4, $0x5a827999 DATA K_XMM_AR<>+12(SB)/4, $0x5a827999 DATA K_XMM_AR<>+16(SB)/4, $0x5a827999 DATA K_XMM_AR<>+20(SB)/4, $0x5a827999 DATA K_XMM_AR<>+24(SB)/4, $0x5a827999 DATA K_XMM_AR<>+28(SB)/4, $0x5a827999 DATA K_XMM_AR<>+32(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+36(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+40(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+44(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+48(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+52(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+56(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+60(SB)/4, $0x6ed9eba1 DATA K_XMM_AR<>+64(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+68(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+72(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+76(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+80(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+84(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+88(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+92(SB)/4, $0x8f1bbcdc DATA K_XMM_AR<>+96(SB)/4, $0xca62c1d6 DATA K_XMM_AR<>+100(SB)/4, $0xca62c1d6 DATA K_XMM_AR<>+104(SB)/4, $0xca62c1d6 DATA K_XMM_AR<>+108(SB)/4, $0xca62c1d6 DATA K_XMM_AR<>+112(SB)/4, $0xca62c1d6 DATA K_XMM_AR<>+116(SB)/4, $0xca62c1d6 DATA K_XMM_AR<>+120(SB)/4, $0xca62c1d6 DATA K_XMM_AR<>+124(SB)/4, $0xca62c1d6 GLOBL K_XMM_AR<>(SB), RODATA, $128 DATA BSWAP_SHUFB_CTL<>+0(SB)/4, $0x00010203 DATA BSWAP_SHUFB_CTL<>+4(SB)/4, $0x04050607 DATA BSWAP_SHUFB_CTL<>+8(SB)/4, $0x08090a0b DATA BSWAP_SHUFB_CTL<>+12(SB)/4, $0x0c0d0e0f DATA BSWAP_SHUFB_CTL<>+16(SB)/4, $0x00010203 DATA BSWAP_SHUFB_CTL<>+20(SB)/4, $0x04050607 DATA BSWAP_SHUFB_CTL<>+24(SB)/4, $0x08090a0b DATA BSWAP_SHUFB_CTL<>+28(SB)/4, $0x0c0d0e0f GLOBL BSWAP_SHUFB_CTL<>(SB), RODATA, $32 // func blockSHANI(dig *digest, p []byte) // Requires: AVX, SHA, SSE2, SSE4.1, SSSE3 TEXT ·blockSHANI(SB), $48-32 MOVQ dig+0(FP), DI MOVQ p_base+8(FP), SI MOVQ p_len+16(FP), DX CMPQ DX, $0x00 JEQ done ADDQ SI, DX // Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes LEAQ 15(SP), AX MOVQ $0x000000000000000f, CX NOTQ CX ANDQ CX, AX // Load initial hash state PINSRD $0x03, 16(DI), X5 VMOVDQU (DI), X0 PAND upper_mask<>+0(SB), X5 PSHUFD $0x1b, X0, X0 VMOVDQA shuffle_mask<>+0(SB), X7 loop: // Save ABCD and E working values VMOVDQA X5, (AX) VMOVDQA X0, 16(AX) // Rounds 0-3 VMOVDQU (SI), X1 PSHUFB X7, X1 PADDD X1, X5 VMOVDQA X0, X6 SHA1RNDS4 $0x00, X5, X0 // Rounds 4-7 VMOVDQU 16(SI), X2 PSHUFB X7, X2 SHA1NEXTE X2, X6 VMOVDQA X0, X5 SHA1RNDS4 $0x00, X6, X0 SHA1MSG1 X2, X1 // Rounds 8-11 VMOVDQU 32(SI), X3 PSHUFB X7, X3 SHA1NEXTE X3, X5 VMOVDQA X0, X6 SHA1RNDS4 $0x00, X5, X0 SHA1MSG1 X3, X2 PXOR X3, X1 // Rounds 12-15 VMOVDQU 48(SI), X4 PSHUFB X7, X4 SHA1NEXTE X4, X6 VMOVDQA X0, X5 SHA1MSG2 X4, X1 SHA1RNDS4 $0x00, X6, X0 SHA1MSG1 X4, X3 PXOR X4, X2 // Rounds 16-19 SHA1NEXTE X1, X5 VMOVDQA X0, X6 SHA1MSG2 X1, X2 SHA1RNDS4 $0x00, X5, X0 SHA1MSG1 X1, X4 PXOR X1, X3 // Rounds 20-23 SHA1NEXTE X2, X6 VMOVDQA X0, X5 SHA1MSG2 X2, X3 SHA1RNDS4 $0x01, X6, X0 SHA1MSG1 X2, X1 PXOR X2, X4 // Rounds 24-27 SHA1NEXTE X3, X5 VMOVDQA X0, X6 SHA1MSG2 X3, X4 SHA1RNDS4 $0x01, X5, X0 SHA1MSG1 X3, X2 PXOR X3, X1 // Rounds 28-31 SHA1NEXTE X4, X6 VMOVDQA X0, X5 SHA1MSG2 X4, X1 SHA1RNDS4 $0x01, X6, X0 SHA1MSG1 X4, X3 PXOR X4, X2 // Rounds 32-35 SHA1NEXTE X1, X5 VMOVDQA X0, X6 SHA1MSG2 X1, X2 SHA1RNDS4 $0x01, X5, X0 SHA1MSG1 X1, X4 PXOR X1, X3 // Rounds 36-39 SHA1NEXTE X2, X6 VMOVDQA X0, X5 SHA1MSG2 X2, X3 SHA1RNDS4 $0x01, X6, X0 SHA1MSG1 X2, X1 PXOR X2, X4 // Rounds 40-43 SHA1NEXTE X3, X5 VMOVDQA X0, X6 SHA1MSG2 X3, X4 SHA1RNDS4 $0x02, X5, X0 SHA1MSG1 X3, X2 PXOR X3, X1 // Rounds 44-47 SHA1NEXTE X4, X6 VMOVDQA X0, X5 SHA1MSG2 X4, X1 SHA1RNDS4 $0x02, X6, X0 SHA1MSG1 X4, X3 PXOR X4, X2 // Rounds 48-51 SHA1NEXTE X1, X5 VMOVDQA X0, X6 SHA1MSG2 X1, X2 SHA1RNDS4 $0x02, X5, X0 SHA1MSG1 X1, X4 PXOR X1, X3 // Rounds 52-55 SHA1NEXTE X2, X6 VMOVDQA X0, X5 SHA1MSG2 X2, X3 SHA1RNDS4 $0x02, X6, X0 SHA1MSG1 X2, X1 PXOR X2, X4 // Rounds 56-59 SHA1NEXTE X3, X5 VMOVDQA X0, X6 SHA1MSG2 X3, X4 SHA1RNDS4 $0x02, X5, X0 SHA1MSG1 X3, X2 PXOR X3, X1 // Rounds 60-63 SHA1NEXTE X4, X6 VMOVDQA X0, X5 SHA1MSG2 X4, X1 SHA1RNDS4 $0x03, X6, X0 SHA1MSG1 X4, X3 PXOR X4, X2 // Rounds 64-67 SHA1NEXTE X1, X5 VMOVDQA X0, X6 SHA1MSG2 X1, X2 SHA1RNDS4 $0x03, X5, X0 SHA1MSG1 X1, X4 PXOR X1, X3 // Rounds 68-71 SHA1NEXTE X2, X6 VMOVDQA X0, X5 SHA1MSG2 X2, X3 SHA1RNDS4 $0x03, X6, X0 PXOR X2, X4 // Rounds 72-75 SHA1NEXTE X3, X5 VMOVDQA X0, X6 SHA1MSG2 X3, X4 SHA1RNDS4 $0x03, X5, X0 // Rounds 76-79 SHA1NEXTE X4, X6 VMOVDQA X0, X5 SHA1RNDS4 $0x03, X6, X0 // Add saved E and ABCD SHA1NEXTE (AX), X5 PADDD 16(AX), X0 // Check if we are done, if not return to the loop ADDQ $0x40, SI CMPQ SI, DX JNE loop // Write the hash state back to digest PSHUFD $0x1b, X0, X0 VMOVDQU X0, (DI) PEXTRD $0x03, X5, 16(DI) done: RET DATA upper_mask<>+0(SB)/8, $0x0000000000000000 DATA upper_mask<>+8(SB)/8, $0xffffffff00000000 GLOBL upper_mask<>(SB), RODATA, $16 DATA shuffle_mask<>+0(SB)/8, $0x08090a0b0c0d0e0f DATA shuffle_mask<>+8(SB)/8, $0x0001020304050607 GLOBL shuffle_mask<>(SB), RODATA, $16