#include .globl ossl_vaes_vpclmulqdq_capable .type ossl_vaes_vpclmulqdq_capable,@function .align 32 ossl_vaes_vpclmulqdq_capable: movq OPENSSL_ia32cap_P+8(%rip),%rcx movq $6600291188736,%rdx xorl %eax,%eax andq %rdx,%rcx cmpq %rdx,%rcx cmoveq %rcx,%rax .byte 0xf3,0xc3 .size ossl_vaes_vpclmulqdq_capable, .-ossl_vaes_vpclmulqdq_capable .text .globl ossl_aes_gcm_init_avx512 .type ossl_aes_gcm_init_avx512,@function .align 32 ossl_aes_gcm_init_avx512: .cfi_startproc .byte 243,15,30,250 vpxorq %xmm16,%xmm16,%xmm16 movl 240(%rdi),%eax cmpl $9,%eax je .Laes_128_0 cmpl $11,%eax je .Laes_192_0 cmpl $13,%eax je .Laes_256_0 jmp .Lexit_aes_0 .align 32 .Laes_128_0: vpxorq 0(%rdi),%xmm16,%xmm16 vaesenc 16(%rdi),%xmm16,%xmm16 vaesenc 32(%rdi),%xmm16,%xmm16 vaesenc 48(%rdi),%xmm16,%xmm16 vaesenc 64(%rdi),%xmm16,%xmm16 vaesenc 80(%rdi),%xmm16,%xmm16 vaesenc 96(%rdi),%xmm16,%xmm16 vaesenc 112(%rdi),%xmm16,%xmm16 vaesenc 128(%rdi),%xmm16,%xmm16 vaesenc 144(%rdi),%xmm16,%xmm16 vaesenclast 160(%rdi),%xmm16,%xmm16 jmp .Lexit_aes_0 .align 32 .Laes_192_0: vpxorq 0(%rdi),%xmm16,%xmm16 vaesenc 16(%rdi),%xmm16,%xmm16 vaesenc 32(%rdi),%xmm16,%xmm16 vaesenc 48(%rdi),%xmm16,%xmm16 vaesenc 64(%rdi),%xmm16,%xmm16 vaesenc 80(%rdi),%xmm16,%xmm16 vaesenc 96(%rdi),%xmm16,%xmm16 vaesenc 112(%rdi),%xmm16,%xmm16 vaesenc 128(%rdi),%xmm16,%xmm16 vaesenc 144(%rdi),%xmm16,%xmm16 vaesenc 160(%rdi),%xmm16,%xmm16 vaesenc 176(%rdi),%xmm16,%xmm16 vaesenclast 192(%rdi),%xmm16,%xmm16 jmp .Lexit_aes_0 .align 32 .Laes_256_0: vpxorq 0(%rdi),%xmm16,%xmm16 vaesenc 16(%rdi),%xmm16,%xmm16 vaesenc 32(%rdi),%xmm16,%xmm16 vaesenc 48(%rdi),%xmm16,%xmm16 vaesenc 64(%rdi),%xmm16,%xmm16 vaesenc 80(%rdi),%xmm16,%xmm16 vaesenc 96(%rdi),%xmm16,%xmm16 vaesenc 112(%rdi),%xmm16,%xmm16 vaesenc 128(%rdi),%xmm16,%xmm16 vaesenc 144(%rdi),%xmm16,%xmm16 vaesenc 160(%rdi),%xmm16,%xmm16 vaesenc 176(%rdi),%xmm16,%xmm16 vaesenc 192(%rdi),%xmm16,%xmm16 vaesenc 208(%rdi),%xmm16,%xmm16 vaesenclast 224(%rdi),%xmm16,%xmm16 jmp .Lexit_aes_0 .Lexit_aes_0: vpshufb SHUF_MASK(%rip),%xmm16,%xmm16 vmovdqa64 %xmm16,%xmm2 vpsllq $1,%xmm16,%xmm16 vpsrlq $63,%xmm2,%xmm2 vmovdqa %xmm2,%xmm1 vpslldq $8,%xmm2,%xmm2 vpsrldq $8,%xmm1,%xmm1 vporq %xmm2,%xmm16,%xmm16 vpshufd $36,%xmm1,%xmm2 vpcmpeqd TWOONE(%rip),%xmm2,%xmm2 vpand POLY(%rip),%xmm2,%xmm2 vpxorq %xmm2,%xmm16,%xmm16 vmovdqu64 %xmm16,336(%rsi) vshufi32x4 $0x00,%ymm16,%ymm16,%ymm4 vmovdqa %ymm4,%ymm3 vpclmulqdq $0x11,%ymm4,%ymm3,%ymm0 vpclmulqdq $0x00,%ymm4,%ymm3,%ymm1 vpclmulqdq $0x01,%ymm4,%ymm3,%ymm2 vpclmulqdq $0x10,%ymm4,%ymm3,%ymm3 vpxorq %ymm2,%ymm3,%ymm3 vpsrldq $8,%ymm3,%ymm2 vpslldq $8,%ymm3,%ymm3 vpxorq %ymm2,%ymm0,%ymm0 vpxorq %ymm1,%ymm3,%ymm3 vmovdqu64 POLY2(%rip),%ymm2 vpclmulqdq $0x01,%ymm3,%ymm2,%ymm1 vpslldq $8,%ymm1,%ymm1 vpxorq %ymm1,%ymm3,%ymm3 vpclmulqdq $0x00,%ymm3,%ymm2,%ymm1 vpsrldq $4,%ymm1,%ymm1 vpclmulqdq $0x10,%ymm3,%ymm2,%ymm3 vpslldq $4,%ymm3,%ymm3 vpternlogq $0x96,%ymm1,%ymm0,%ymm3 vmovdqu64 %xmm3,320(%rsi) vinserti64x2 $1,%xmm16,%ymm3,%ymm4 vmovdqa64 %ymm4,%ymm5 vpclmulqdq $0x11,%ymm3,%ymm4,%ymm0 vpclmulqdq $0x00,%ymm3,%ymm4,%ymm1 vpclmulqdq $0x01,%ymm3,%ymm4,%ymm2 vpclmulqdq $0x10,%ymm3,%ymm4,%ymm4 vpxorq %ymm2,%ymm4,%ymm4 vpsrldq $8,%ymm4,%ymm2 vpslldq $8,%ymm4,%ymm4 vpxorq %ymm2,%ymm0,%ymm0 vpxorq %ymm1,%ymm4,%ymm4 vmovdqu64 POLY2(%rip),%ymm2 vpclmulqdq $0x01,%ymm4,%ymm2,%ymm1 vpslldq $8,%ymm1,%ymm1 vpxorq %ymm1,%ymm4,%ymm4 vpclmulqdq $0x00,%ymm4,%ymm2,%ymm1 vpsrldq $4,%ymm1,%ymm1 vpclmulqdq $0x10,%ymm4,%ymm2,%ymm4 vpslldq $4,%ymm4,%ymm4 vpternlogq $0x96,%ymm1,%ymm0,%ymm4 vmovdqu64 %ymm4,288(%rsi) vinserti64x4 $1,%ymm5,%zmm4,%zmm4 vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 vmovdqa64 %zmm4,%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm0 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm1 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm2 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm2,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm2 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm2 vpclmulqdq $0x01,%zmm4,%zmm2,%zmm1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm2,%zmm1 vpsrldq $4,%zmm1,%zmm1 vpclmulqdq $0x10,%zmm4,%zmm2,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm1,%zmm0,%zmm4 vmovdqu64 %zmm4,224(%rsi) vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 vpclmulqdq $0x11,%zmm3,%zmm5,%zmm0 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm1 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm2 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm2,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm2 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm2 vpclmulqdq $0x01,%zmm5,%zmm2,%zmm1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm2,%zmm1 vpsrldq $4,%zmm1,%zmm1 vpclmulqdq $0x10,%zmm5,%zmm2,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm1,%zmm0,%zmm5 vmovdqu64 %zmm5,160(%rsi) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm0 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm1 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm2 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm2,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm2 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm2,%zmm0,%zmm0 vpxorq %zmm1,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm2 vpclmulqdq $0x01,%zmm4,%zmm2,%zmm1 vpslldq $8,%zmm1,%zmm1 vpxorq %zmm1,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm2,%zmm1 vpsrldq $4,%zmm1,%zmm1 vpclmulqdq $0x10,%zmm4,%zmm2,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm1,%zmm0,%zmm4 vmovdqu64 %zmm4,96(%rsi) vzeroupper .Labort_init: .byte 0xf3,0xc3 .cfi_endproc .size ossl_aes_gcm_init_avx512, .-ossl_aes_gcm_init_avx512 .globl ossl_aes_gcm_setiv_avx512 .type ossl_aes_gcm_setiv_avx512,@function .align 32 ossl_aes_gcm_setiv_avx512: .cfi_startproc .Lsetiv_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lsetiv_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lsetiv_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lsetiv_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lsetiv_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lsetiv_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lsetiv_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lsetiv_seh_setfp: .Lsetiv_seh_prolog_end: subq $820,%rsp andq $(-64),%rsp cmpq $12,%rcx je iv_len_12_init_IV vpxor %xmm2,%xmm2,%xmm2 movq %rdx,%r10 movq %rcx,%r11 orq %r11,%r11 jz .L_CALC_AAD_done_1 xorq %rbx,%rbx vmovdqa64 SHUF_MASK(%rip),%zmm16 .L_get_AAD_loop48x16_1: cmpq $768,%r11 jl .L_exit_AAD_loop48x16_1 vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_2 vmovdqu64 288(%rsi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rsi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rsi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rsi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,192(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,128(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,64(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,0(%rsp) .L_skip_hkeys_precomputation_2: movq $1,%rbx vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 0(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 64(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 192(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 512(%r10),%zmm11 vmovdqu64 576(%r10),%zmm3 vmovdqu64 640(%r10),%zmm4 vmovdqu64 704(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $768,%r11 je .L_CALC_AAD_done_1 addq $768,%r10 jmp .L_get_AAD_loop48x16_1 .L_exit_AAD_loop48x16_1: cmpq $512,%r11 jl .L_less_than_32x16_1 vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_3 vmovdqu64 288(%rsi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rsi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rsi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rsi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .L_skip_hkeys_precomputation_3: movq $1,%rbx vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $512,%r11 je .L_CALC_AAD_done_1 addq $512,%r10 jmp .L_less_than_16x16_1 .L_less_than_32x16_1: cmpq $256,%r11 jl .L_less_than_16x16_1 vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 96(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 160(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 224(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 288(%rsi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm6,%xmm9,%xmm2 subq $256,%r11 je .L_CALC_AAD_done_1 addq $256,%r10 .L_less_than_16x16_1: leaq byte64_len_to_mask_table(%rip),%r12 leaq (%r12,%r11,8),%r12 addl $15,%r11d shrl $4,%r11d cmpl $2,%r11d jb .L_AAD_blocks_1_1 je .L_AAD_blocks_2_1 cmpl $4,%r11d jb .L_AAD_blocks_3_1 je .L_AAD_blocks_4_1 cmpl $6,%r11d jb .L_AAD_blocks_5_1 je .L_AAD_blocks_6_1 cmpl $8,%r11d jb .L_AAD_blocks_7_1 je .L_AAD_blocks_8_1 cmpl $10,%r11d jb .L_AAD_blocks_9_1 je .L_AAD_blocks_10_1 cmpl $12,%r11d jb .L_AAD_blocks_11_1 je .L_AAD_blocks_12_1 cmpl $14,%r11d jb .L_AAD_blocks_13_1 je .L_AAD_blocks_14_1 cmpl $15,%r11d je .L_AAD_blocks_15_1 .L_AAD_blocks_16_1: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 96(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 160(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 224(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm9,%zmm11,%zmm1 vpternlogq $0x96,%zmm10,%zmm3,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm12,%zmm11,%zmm7 vpternlogq $0x96,%zmm13,%zmm3,%zmm8 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_15_1: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 112(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 176(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 240(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_14_1: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%ymm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %ymm16,%ymm5,%ymm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 128(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 192(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 256(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm5,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm5,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm5,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm5,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_13_1: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%xmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %xmm16,%xmm5,%xmm5 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 144(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 208(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 272(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm5,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm5,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm5,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm5,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_12_1: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 160(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 224(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_11_1: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 176(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 240(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_10_1: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%ymm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %ymm16,%ymm4,%ymm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 192(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 256(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm4,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm4,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm4,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm4,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_9_1: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%xmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %xmm16,%xmm4,%xmm4 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 208(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 272(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm4,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm4,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm4,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm4,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_8_1: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 224(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_7_1: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 240(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_6_1: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%ymm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %ymm16,%ymm3,%ymm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 256(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm3,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm3,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm3,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm3,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_5_1: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%xmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %xmm16,%xmm3,%xmm3 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 272(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm3,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm3,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm3,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm3,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_4_1: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 288(%rsi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_3_1: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 304(%rsi),%ymm15 vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_2_1: kmovq (%r12),%k1 vmovdqu8 0(%r10),%ymm11{%k1}{z} vpshufb %ymm16,%ymm11,%ymm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 320(%rsi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm11,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm11,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm11,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm11,%ymm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 jmp .L_CALC_AAD_done_1 .L_AAD_blocks_1_1: kmovq (%r12),%k1 vmovdqu8 0(%r10),%xmm11{%k1}{z} vpshufb %xmm16,%xmm11,%xmm11 vpxorq %zmm2,%zmm11,%zmm11 vmovdqu64 336(%rsi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm11,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm11,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm11,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm11,%xmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm1,%xmm8,%xmm2 .L_CALC_AAD_done_1: movq %rcx,%r10 shlq $3,%r10 vmovq %r10,%xmm3 vpxorq %xmm2,%xmm3,%xmm2 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x11,%xmm1,%xmm2,%xmm11 vpclmulqdq $0x00,%xmm1,%xmm2,%xmm3 vpclmulqdq $0x01,%xmm1,%xmm2,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm2,%xmm2 vpxorq %xmm4,%xmm2,%xmm2 vpsrldq $8,%xmm2,%xmm4 vpslldq $8,%xmm2,%xmm2 vpxorq %xmm4,%xmm11,%xmm11 vpxorq %xmm3,%xmm2,%xmm2 vmovdqu64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm2,%xmm4,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm2,%xmm2 vpclmulqdq $0x00,%xmm2,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm2,%xmm4,%xmm2 vpslldq $4,%xmm2,%xmm2 vpternlogq $0x96,%xmm3,%xmm11,%xmm2 vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 jmp skip_iv_len_12_init_IV iv_len_12_init_IV: vmovdqu8 ONEf(%rip),%xmm2 movq %rdx,%r11 movl $0x0000000000000fff,%r10d kmovq %r10,%k1 vmovdqu8 (%r11),%xmm2{%k1} skip_iv_len_12_init_IV: vmovdqu %xmm2,%xmm1 movl 240(%rdi),%r10d cmpl $9,%r10d je .Laes_128_4 cmpl $11,%r10d je .Laes_192_4 cmpl $13,%r10d je .Laes_256_4 jmp .Lexit_aes_4 .align 32 .Laes_128_4: vpxorq 0(%rdi),%xmm1,%xmm1 vaesenc 16(%rdi),%xmm1,%xmm1 vaesenc 32(%rdi),%xmm1,%xmm1 vaesenc 48(%rdi),%xmm1,%xmm1 vaesenc 64(%rdi),%xmm1,%xmm1 vaesenc 80(%rdi),%xmm1,%xmm1 vaesenc 96(%rdi),%xmm1,%xmm1 vaesenc 112(%rdi),%xmm1,%xmm1 vaesenc 128(%rdi),%xmm1,%xmm1 vaesenc 144(%rdi),%xmm1,%xmm1 vaesenclast 160(%rdi),%xmm1,%xmm1 jmp .Lexit_aes_4 .align 32 .Laes_192_4: vpxorq 0(%rdi),%xmm1,%xmm1 vaesenc 16(%rdi),%xmm1,%xmm1 vaesenc 32(%rdi),%xmm1,%xmm1 vaesenc 48(%rdi),%xmm1,%xmm1 vaesenc 64(%rdi),%xmm1,%xmm1 vaesenc 80(%rdi),%xmm1,%xmm1 vaesenc 96(%rdi),%xmm1,%xmm1 vaesenc 112(%rdi),%xmm1,%xmm1 vaesenc 128(%rdi),%xmm1,%xmm1 vaesenc 144(%rdi),%xmm1,%xmm1 vaesenc 160(%rdi),%xmm1,%xmm1 vaesenc 176(%rdi),%xmm1,%xmm1 vaesenclast 192(%rdi),%xmm1,%xmm1 jmp .Lexit_aes_4 .align 32 .Laes_256_4: vpxorq 0(%rdi),%xmm1,%xmm1 vaesenc 16(%rdi),%xmm1,%xmm1 vaesenc 32(%rdi),%xmm1,%xmm1 vaesenc 48(%rdi),%xmm1,%xmm1 vaesenc 64(%rdi),%xmm1,%xmm1 vaesenc 80(%rdi),%xmm1,%xmm1 vaesenc 96(%rdi),%xmm1,%xmm1 vaesenc 112(%rdi),%xmm1,%xmm1 vaesenc 128(%rdi),%xmm1,%xmm1 vaesenc 144(%rdi),%xmm1,%xmm1 vaesenc 160(%rdi),%xmm1,%xmm1 vaesenc 176(%rdi),%xmm1,%xmm1 vaesenc 192(%rdi),%xmm1,%xmm1 vaesenc 208(%rdi),%xmm1,%xmm1 vaesenclast 224(%rdi),%xmm1,%xmm1 jmp .Lexit_aes_4 .Lexit_aes_4: vmovdqu %xmm1,32(%rsi) vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 vmovdqu %xmm2,0(%rsi) cmpq $256,%rcx jbe .Lskip_hkeys_cleanup_5 vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_5: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .Labort_setiv: .byte 0xf3,0xc3 .Lsetiv_seh_end: .cfi_endproc .size ossl_aes_gcm_setiv_avx512, .-ossl_aes_gcm_setiv_avx512 .globl ossl_aes_gcm_update_aad_avx512 .type ossl_aes_gcm_update_aad_avx512,@function .align 32 ossl_aes_gcm_update_aad_avx512: .cfi_startproc .Lghash_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lghash_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lghash_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lghash_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lghash_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lghash_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lghash_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lghash_seh_setfp: .Lghash_seh_prolog_end: subq $820,%rsp andq $(-64),%rsp vmovdqu64 64(%rdi),%xmm14 movq %rsi,%r10 movq %rdx,%r11 orq %r11,%r11 jz .L_CALC_AAD_done_6 xorq %rbx,%rbx vmovdqa64 SHUF_MASK(%rip),%zmm16 .L_get_AAD_loop48x16_6: cmpq $768,%r11 jl .L_exit_AAD_loop48x16_6 vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_7 vmovdqu64 288(%rdi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rdi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rdi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rdi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,192(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,128(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,64(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,0(%rsp) .L_skip_hkeys_precomputation_7: movq $1,%rbx vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 0(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 64(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 128(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 192(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 512(%r10),%zmm11 vmovdqu64 576(%r10),%zmm3 vmovdqu64 640(%r10),%zmm4 vmovdqu64 704(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $768,%r11 je .L_CALC_AAD_done_6 addq $768,%r10 jmp .L_get_AAD_loop48x16_6 .L_exit_AAD_loop48x16_6: cmpq $512,%r11 jl .L_less_than_32x16_6 vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 testq %rbx,%rbx jnz .L_skip_hkeys_precomputation_8 vmovdqu64 288(%rdi),%zmm1 vmovdqu64 %zmm1,704(%rsp) vmovdqu64 224(%rdi),%zmm9 vmovdqu64 %zmm9,640(%rsp) vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 vmovdqu64 160(%rdi),%zmm10 vmovdqu64 %zmm10,576(%rsp) vmovdqu64 96(%rdi),%zmm12 vmovdqu64 %zmm12,512(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,448(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,384(%rsp) vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 vpxorq %zmm17,%zmm10,%zmm10 vpsrldq $8,%zmm10,%zmm17 vpslldq $8,%zmm10,%zmm10 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm10,%zmm10 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm10,%zmm10 vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 vpslldq $4,%zmm10,%zmm10 vpternlogq $0x96,%zmm15,%zmm13,%zmm10 vmovdqu64 %zmm10,320(%rsp) vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 vpxorq %zmm17,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm17 vpslldq $8,%zmm12,%zmm12 vpxorq %zmm17,%zmm13,%zmm13 vpxorq %zmm15,%zmm12,%zmm12 vmovdqu64 POLY2(%rip),%zmm17 vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 vpslldq $8,%zmm15,%zmm15 vpxorq %zmm15,%zmm12,%zmm12 vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 vpsrldq $4,%zmm15,%zmm15 vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 vpslldq $4,%zmm12,%zmm12 vpternlogq $0x96,%zmm15,%zmm13,%zmm12 vmovdqu64 %zmm12,256(%rsp) .L_skip_hkeys_precomputation_8: movq $1,%rbx vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 256(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 320(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 384(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 448(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 256(%r10),%zmm11 vmovdqu64 320(%r10),%zmm3 vmovdqu64 384(%r10),%zmm4 vmovdqu64 448(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vmovdqu64 512(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 576(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 640(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 704(%rsp),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $512,%r11 je .L_CALC_AAD_done_6 addq $512,%r10 jmp .L_less_than_16x16_6 .L_less_than_32x16_6: cmpq $256,%r11 jl .L_less_than_16x16_6 vmovdqu64 0(%r10),%zmm11 vmovdqu64 64(%r10),%zmm3 vmovdqu64 128(%r10),%zmm4 vmovdqu64 192(%r10),%zmm5 vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 96(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 vmovdqu64 160(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 vpxorq %zmm17,%zmm10,%zmm7 vpxorq %zmm13,%zmm1,%zmm6 vpxorq %zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vmovdqu64 224(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 vmovdqu64 288(%rdi),%zmm19 vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 vpternlogq $0x96,%zmm17,%zmm10,%zmm7 vpternlogq $0x96,%zmm13,%zmm1,%zmm6 vpternlogq $0x96,%zmm15,%zmm9,%zmm8 vpternlogq $0x96,%zmm18,%zmm12,%zmm7 vpsrldq $8,%zmm7,%zmm1 vpslldq $8,%zmm7,%zmm9 vpxorq %zmm1,%zmm6,%zmm6 vpxorq %zmm9,%zmm8,%zmm8 vextracti64x4 $1,%zmm6,%ymm1 vpxorq %ymm1,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm1 vpxorq %xmm1,%xmm6,%xmm6 vextracti64x4 $1,%zmm8,%ymm9 vpxorq %ymm9,%ymm8,%ymm8 vextracti32x4 $1,%ymm8,%xmm9 vpxorq %xmm9,%xmm8,%xmm8 vmovdqa64 POLY2(%rip),%xmm10 vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm1,%xmm8,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 vpsrldq $4,%xmm9,%xmm9 vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm6,%xmm9,%xmm14 subq $256,%r11 je .L_CALC_AAD_done_6 addq $256,%r10 .L_less_than_16x16_6: leaq byte64_len_to_mask_table(%rip),%r12 leaq (%r12,%r11,8),%r12 addl $15,%r11d shrl $4,%r11d cmpl $2,%r11d jb .L_AAD_blocks_1_6 je .L_AAD_blocks_2_6 cmpl $4,%r11d jb .L_AAD_blocks_3_6 je .L_AAD_blocks_4_6 cmpl $6,%r11d jb .L_AAD_blocks_5_6 je .L_AAD_blocks_6_6 cmpl $8,%r11d jb .L_AAD_blocks_7_6 je .L_AAD_blocks_8_6 cmpl $10,%r11d jb .L_AAD_blocks_9_6 je .L_AAD_blocks_10_6 cmpl $12,%r11d jb .L_AAD_blocks_11_6 je .L_AAD_blocks_12_6 cmpl $14,%r11d jb .L_AAD_blocks_13_6 je .L_AAD_blocks_14_6 cmpl $15,%r11d je .L_AAD_blocks_15_6 .L_AAD_blocks_16_6: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 96(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 160(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 224(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm9,%zmm11,%zmm1 vpternlogq $0x96,%zmm10,%zmm3,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm12,%zmm11,%zmm7 vpternlogq $0x96,%zmm13,%zmm3,%zmm8 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_15_6: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%zmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %zmm16,%zmm5,%zmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 112(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 176(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 240(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm5,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm5,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm5,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm5,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_14_6: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%ymm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %ymm16,%ymm5,%ymm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 128(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 192(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 256(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm5,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm5,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm5,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm5,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_13_6: subq $1536,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4 vmovdqu8 192(%r10),%xmm5{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpshufb %xmm16,%xmm5,%xmm5 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 144(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 208(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 272(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm5,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm5,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm5,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm5,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_12_6: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 160(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 224(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm1,%zmm11,%zmm9 vpternlogq $0x96,%zmm6,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 vpternlogq $0x96,%zmm7,%zmm11,%zmm12 vpternlogq $0x96,%zmm8,%zmm3,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_11_6: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%zmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %zmm16,%zmm4,%zmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 176(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 240(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm4,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm4,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm4,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm4,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_10_6: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%ymm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %ymm16,%ymm4,%ymm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 192(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 256(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm4,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm4,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm4,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm4,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_9_6: subq $1024,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3 vmovdqu8 128(%r10),%xmm4{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpshufb %xmm16,%xmm4,%xmm4 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 208(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 272(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm4,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm4,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm4,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm4,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_8_6: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 224(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 vpxorq %zmm9,%zmm1,%zmm9 vpxorq %zmm10,%zmm6,%zmm10 vpxorq %zmm12,%zmm7,%zmm12 vpxorq %zmm13,%zmm8,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_7_6: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%zmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %zmm16,%zmm3,%zmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 240(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm3,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm3,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm3,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm3,%zmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_6_6: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%ymm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %ymm16,%ymm3,%ymm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 256(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm3,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm3,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm3,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm3,%ymm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_5_6: subq $512,%r12 kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11 vmovdqu8 64(%r10),%xmm3{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpshufb %xmm16,%xmm3,%xmm3 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 272(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm3,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm3,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm3,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm3,%xmm6 vpxorq %zmm12,%zmm7,%zmm7 vpxorq %zmm13,%zmm8,%zmm8 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_4_6: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 288(%rdi),%zmm15 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 vpxorq %zmm13,%zmm12,%zmm12 vpsrldq $8,%zmm12,%zmm7 vpslldq $8,%zmm12,%zmm8 vpxorq %zmm7,%zmm9,%zmm1 vpxorq %zmm8,%zmm10,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_3_6: kmovq (%r12),%k1 vmovdqu8 0(%r10),%zmm11{%k1}{z} vpshufb %zmm16,%zmm11,%zmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 304(%rdi),%ymm15 vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_2_6: kmovq (%r12),%k1 vmovdqu8 0(%r10),%ymm11{%k1}{z} vpshufb %ymm16,%ymm11,%ymm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 320(%rdi),%ymm15 vpclmulqdq $0x01,%ymm15,%ymm11,%ymm7 vpclmulqdq $0x10,%ymm15,%ymm11,%ymm8 vpclmulqdq $0x11,%ymm15,%ymm11,%ymm1 vpclmulqdq $0x00,%ymm15,%ymm11,%ymm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 jmp .L_CALC_AAD_done_6 .L_AAD_blocks_1_6: kmovq (%r12),%k1 vmovdqu8 0(%r10),%xmm11{%k1}{z} vpshufb %xmm16,%xmm11,%xmm11 vpxorq %zmm14,%zmm11,%zmm11 vmovdqu64 336(%rdi),%xmm15 vpclmulqdq $0x01,%xmm15,%xmm11,%xmm7 vpclmulqdq $0x10,%xmm15,%xmm11,%xmm8 vpclmulqdq $0x11,%xmm15,%xmm11,%xmm1 vpclmulqdq $0x00,%xmm15,%xmm11,%xmm6 vpxorq %zmm8,%zmm7,%zmm7 vpsrldq $8,%zmm7,%zmm12 vpslldq $8,%zmm7,%zmm13 vpxorq %zmm12,%zmm1,%zmm1 vpxorq %zmm13,%zmm6,%zmm6 vextracti64x4 $1,%zmm1,%ymm12 vpxorq %ymm12,%ymm1,%ymm1 vextracti32x4 $1,%ymm1,%xmm12 vpxorq %xmm12,%xmm1,%xmm1 vextracti64x4 $1,%zmm6,%ymm13 vpxorq %ymm13,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm13 vpxorq %xmm13,%xmm6,%xmm6 vmovdqa64 POLY2(%rip),%xmm15 vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 vpslldq $8,%xmm7,%xmm7 vpxorq %xmm7,%xmm6,%xmm7 vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 vpsrldq $4,%xmm8,%xmm8 vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm1,%xmm8,%xmm14 .L_CALC_AAD_done_6: vmovdqu64 %xmm14,64(%rdi) cmpq $256,%rdx jbe .Lskip_hkeys_cleanup_9 vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_9: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .Lexit_update_aad: .byte 0xf3,0xc3 .Lghash_seh_end: .cfi_endproc .size ossl_aes_gcm_update_aad_avx512, .-ossl_aes_gcm_update_aad_avx512 .globl ossl_aes_gcm_encrypt_avx512 .type ossl_aes_gcm_encrypt_avx512,@function .align 32 ossl_aes_gcm_encrypt_avx512: .cfi_startproc .Lencrypt_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Lencrypt_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Lencrypt_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Lencrypt_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Lencrypt_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Lencrypt_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Lencrypt_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Lencrypt_seh_setfp: .Lencrypt_seh_prolog_end: subq $1588,%rsp andq $(-64),%rsp movl 240(%rdi),%eax cmpl $9,%eax je .Laes_gcm_encrypt_128_avx512 cmpl $11,%eax je .Laes_gcm_encrypt_192_avx512 cmpl $13,%eax je .Laes_gcm_encrypt_256_avx512 xorl %eax,%eax jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_128_avx512: orq %r8,%r8 je .L_enc_dec_done_10 xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_11 movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_11 subq %r13,%r12 .L_no_extra_mask_11: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_11 vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_11 .L_partial_incomplete_11: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_11: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_11: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_10 cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_10 vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_12 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_12 .L_next_16_overflow_12: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_12: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_13 vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_13: cmpq $512,%r8 jb .L_message_below_32_blocks_10 cmpb $240,%r15b jae .L_next_16_overflow_14 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_14 .L_next_16_overflow_14: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_14: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_15 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_15: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_10 .L_encrypt_big_nblocks_10: cmpb $240,%r15b jae .L_16_blocks_overflow_16 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_16 .L_16_blocks_overflow_16: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_16: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_17 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_17 .L_16_blocks_overflow_17: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_17: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_18 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_18 .L_16_blocks_overflow_18: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_18: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_10 .L_no_more_big_nblocks_10: cmpq $512,%r8 jae .L_encrypt_32_blocks_10 cmpq $256,%r8 jae .L_encrypt_16_blocks_10 .L_encrypt_0_blocks_ghash_32_10: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_19 cmpl $8,%r10d je .L_last_num_blocks_is_8_19 jb .L_last_num_blocks_is_7_1_19 cmpl $12,%r10d je .L_last_num_blocks_is_12_19 jb .L_last_num_blocks_is_11_9_19 cmpl $15,%r10d je .L_last_num_blocks_is_15_19 ja .L_last_num_blocks_is_16_19 cmpl $14,%r10d je .L_last_num_blocks_is_14_19 jmp .L_last_num_blocks_is_13_19 .L_last_num_blocks_is_11_9_19: cmpl $10,%r10d je .L_last_num_blocks_is_10_19 ja .L_last_num_blocks_is_11_19 jmp .L_last_num_blocks_is_9_19 .L_last_num_blocks_is_7_1_19: cmpl $4,%r10d je .L_last_num_blocks_is_4_19 jb .L_last_num_blocks_is_3_1_19 cmpl $6,%r10d ja .L_last_num_blocks_is_7_19 je .L_last_num_blocks_is_6_19 jmp .L_last_num_blocks_is_5_19 .L_last_num_blocks_is_3_1_19: cmpl $2,%r10d ja .L_last_num_blocks_is_3_19 je .L_last_num_blocks_is_2_19 .L_last_num_blocks_is_1_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_20 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_20 .L_16_blocks_overflow_20: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_20: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_21 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_21 .L_small_initial_partial_block_21: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_21 .L_small_initial_compute_done_21: .L_after_reduction_21: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_2_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_22 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_22 .L_16_blocks_overflow_22: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_22: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_23 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_23 .L_small_initial_partial_block_23: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_23: orq %r8,%r8 je .L_after_reduction_23 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_23: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_3_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_24 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_24 .L_16_blocks_overflow_24: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_24: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_25 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_25 .L_small_initial_partial_block_25: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_25: orq %r8,%r8 je .L_after_reduction_25 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_25: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_4_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_26 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_26 .L_16_blocks_overflow_26: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_26: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_27 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_27 .L_small_initial_partial_block_27: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_27: orq %r8,%r8 je .L_after_reduction_27 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_27: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_5_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_28 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_28 .L_16_blocks_overflow_28: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_28: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_29 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_29 .L_small_initial_partial_block_29: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_29: orq %r8,%r8 je .L_after_reduction_29 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_29: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_6_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_30 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_30 .L_16_blocks_overflow_30: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_30: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_31 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_31 .L_small_initial_partial_block_31: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_31: orq %r8,%r8 je .L_after_reduction_31 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_31: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_7_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_32 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_32 .L_16_blocks_overflow_32: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_32: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_33 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_33 .L_small_initial_partial_block_33: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_33: orq %r8,%r8 je .L_after_reduction_33 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_33: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_8_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_34 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_34 .L_16_blocks_overflow_34: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_34: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_35 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_35 .L_small_initial_partial_block_35: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_35: orq %r8,%r8 je .L_after_reduction_35 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_35: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_9_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_36 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_36 .L_16_blocks_overflow_36: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_36: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_37 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_37 .L_small_initial_partial_block_37: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_37: orq %r8,%r8 je .L_after_reduction_37 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_37: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_10_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_38 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_38 .L_16_blocks_overflow_38: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_38: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_39 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_39 .L_small_initial_partial_block_39: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_39: orq %r8,%r8 je .L_after_reduction_39 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_39: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_11_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_40 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_40 .L_16_blocks_overflow_40: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_40: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_41 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_41 .L_small_initial_partial_block_41: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_41: orq %r8,%r8 je .L_after_reduction_41 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_41: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_12_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_42 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_42 .L_16_blocks_overflow_42: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_42: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_43 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_43 .L_small_initial_partial_block_43: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_43: orq %r8,%r8 je .L_after_reduction_43 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_43: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_13_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_44 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_44 .L_16_blocks_overflow_44: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_44: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_45 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_45 .L_small_initial_partial_block_45: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_45: orq %r8,%r8 je .L_after_reduction_45 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_45: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_14_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_46 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_46 .L_16_blocks_overflow_46: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_46: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_47 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_47 .L_small_initial_partial_block_47: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_47: orq %r8,%r8 je .L_after_reduction_47 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_47: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_15_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_48 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_48 .L_16_blocks_overflow_48: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_48: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_49 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_49 .L_small_initial_partial_block_49: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_49: orq %r8,%r8 je .L_after_reduction_49 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_49: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_16_19: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_50 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_50 .L_16_blocks_overflow_50: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_50: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_51: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_51: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_51: jmp .L_last_blocks_done_19 .L_last_num_blocks_is_0_19: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_19: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_10 .L_encrypt_32_blocks_10: cmpb $240,%r15b jae .L_16_blocks_overflow_52 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_52 .L_16_blocks_overflow_52: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_52: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_53 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_53 .L_16_blocks_overflow_53: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_53: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_54 cmpl $8,%r10d je .L_last_num_blocks_is_8_54 jb .L_last_num_blocks_is_7_1_54 cmpl $12,%r10d je .L_last_num_blocks_is_12_54 jb .L_last_num_blocks_is_11_9_54 cmpl $15,%r10d je .L_last_num_blocks_is_15_54 ja .L_last_num_blocks_is_16_54 cmpl $14,%r10d je .L_last_num_blocks_is_14_54 jmp .L_last_num_blocks_is_13_54 .L_last_num_blocks_is_11_9_54: cmpl $10,%r10d je .L_last_num_blocks_is_10_54 ja .L_last_num_blocks_is_11_54 jmp .L_last_num_blocks_is_9_54 .L_last_num_blocks_is_7_1_54: cmpl $4,%r10d je .L_last_num_blocks_is_4_54 jb .L_last_num_blocks_is_3_1_54 cmpl $6,%r10d ja .L_last_num_blocks_is_7_54 je .L_last_num_blocks_is_6_54 jmp .L_last_num_blocks_is_5_54 .L_last_num_blocks_is_3_1_54: cmpl $2,%r10d ja .L_last_num_blocks_is_3_54 je .L_last_num_blocks_is_2_54 .L_last_num_blocks_is_1_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_55 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_55 .L_16_blocks_overflow_55: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_55: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_56 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_56 .L_small_initial_partial_block_56: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_56 .L_small_initial_compute_done_56: .L_after_reduction_56: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_2_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_57 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_57 .L_16_blocks_overflow_57: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_57: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_58 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_58 .L_small_initial_partial_block_58: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_58: orq %r8,%r8 je .L_after_reduction_58 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_58: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_3_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_59 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_59 .L_16_blocks_overflow_59: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_59: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_60 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_60 .L_small_initial_partial_block_60: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_60: orq %r8,%r8 je .L_after_reduction_60 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_60: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_4_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_61 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_61 .L_16_blocks_overflow_61: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_61: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_62 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_62 .L_small_initial_partial_block_62: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_62: orq %r8,%r8 je .L_after_reduction_62 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_62: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_5_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_63 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_63 .L_16_blocks_overflow_63: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_63: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_64 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_64 .L_small_initial_partial_block_64: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_64: orq %r8,%r8 je .L_after_reduction_64 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_64: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_6_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_65 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_65 .L_16_blocks_overflow_65: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_65: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_66 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_66 .L_small_initial_partial_block_66: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_66: orq %r8,%r8 je .L_after_reduction_66 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_66: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_7_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_67 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_67 .L_16_blocks_overflow_67: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_67: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_68 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_68 .L_small_initial_partial_block_68: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_68: orq %r8,%r8 je .L_after_reduction_68 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_68: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_8_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_69 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_69 .L_16_blocks_overflow_69: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_69: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_70 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_70 .L_small_initial_partial_block_70: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_70: orq %r8,%r8 je .L_after_reduction_70 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_70: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_9_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_71 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_71 .L_16_blocks_overflow_71: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_71: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_72 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_72 .L_small_initial_partial_block_72: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_72: orq %r8,%r8 je .L_after_reduction_72 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_72: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_10_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_73 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_73 .L_16_blocks_overflow_73: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_73: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_74 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_74 .L_small_initial_partial_block_74: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_74: orq %r8,%r8 je .L_after_reduction_74 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_74: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_11_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_75 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_75 .L_16_blocks_overflow_75: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_75: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_76 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_76 .L_small_initial_partial_block_76: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_76: orq %r8,%r8 je .L_after_reduction_76 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_76: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_12_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_77 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_77 .L_16_blocks_overflow_77: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_77: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_78 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_78 .L_small_initial_partial_block_78: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_78: orq %r8,%r8 je .L_after_reduction_78 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_78: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_13_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_79 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_79 .L_16_blocks_overflow_79: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_79: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_80 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_80 .L_small_initial_partial_block_80: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_80: orq %r8,%r8 je .L_after_reduction_80 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_80: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_14_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_81 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_81 .L_16_blocks_overflow_81: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_81: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_82 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_82 .L_small_initial_partial_block_82: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_82: orq %r8,%r8 je .L_after_reduction_82 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_82: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_15_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_83 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_83 .L_16_blocks_overflow_83: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_83: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_84 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_84 .L_small_initial_partial_block_84: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_84: orq %r8,%r8 je .L_after_reduction_84 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_84: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_16_54: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_85 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_85 .L_16_blocks_overflow_85: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_85: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_86: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_86: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_86: jmp .L_last_blocks_done_54 .L_last_num_blocks_is_0_54: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_54: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_10 .L_encrypt_16_blocks_10: cmpb $240,%r15b jae .L_16_blocks_overflow_87 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_87 .L_16_blocks_overflow_87: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_87: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_88 cmpl $8,%r10d je .L_last_num_blocks_is_8_88 jb .L_last_num_blocks_is_7_1_88 cmpl $12,%r10d je .L_last_num_blocks_is_12_88 jb .L_last_num_blocks_is_11_9_88 cmpl $15,%r10d je .L_last_num_blocks_is_15_88 ja .L_last_num_blocks_is_16_88 cmpl $14,%r10d je .L_last_num_blocks_is_14_88 jmp .L_last_num_blocks_is_13_88 .L_last_num_blocks_is_11_9_88: cmpl $10,%r10d je .L_last_num_blocks_is_10_88 ja .L_last_num_blocks_is_11_88 jmp .L_last_num_blocks_is_9_88 .L_last_num_blocks_is_7_1_88: cmpl $4,%r10d je .L_last_num_blocks_is_4_88 jb .L_last_num_blocks_is_3_1_88 cmpl $6,%r10d ja .L_last_num_blocks_is_7_88 je .L_last_num_blocks_is_6_88 jmp .L_last_num_blocks_is_5_88 .L_last_num_blocks_is_3_1_88: cmpl $2,%r10d ja .L_last_num_blocks_is_3_88 je .L_last_num_blocks_is_2_88 .L_last_num_blocks_is_1_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_89 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_89 .L_16_blocks_overflow_89: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_89: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_90 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_90 .L_small_initial_partial_block_90: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_90 .L_small_initial_compute_done_90: .L_after_reduction_90: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_2_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_91 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_91 .L_16_blocks_overflow_91: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_91: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_92 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_92 .L_small_initial_partial_block_92: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_92: orq %r8,%r8 je .L_after_reduction_92 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_92: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_3_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_93 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_93 .L_16_blocks_overflow_93: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_93: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_94 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_94 .L_small_initial_partial_block_94: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_94: orq %r8,%r8 je .L_after_reduction_94 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_94: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_4_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_95 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_95 .L_16_blocks_overflow_95: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_95: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_96 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_96 .L_small_initial_partial_block_96: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_96: orq %r8,%r8 je .L_after_reduction_96 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_96: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_5_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_97 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_97 .L_16_blocks_overflow_97: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_97: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_98 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_98 .L_small_initial_partial_block_98: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_98: orq %r8,%r8 je .L_after_reduction_98 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_98: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_6_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_99 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_99 .L_16_blocks_overflow_99: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_99: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_100 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_100 .L_small_initial_partial_block_100: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_100: orq %r8,%r8 je .L_after_reduction_100 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_100: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_7_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_101 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_101 .L_16_blocks_overflow_101: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_101: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_102 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_102 .L_small_initial_partial_block_102: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_102: orq %r8,%r8 je .L_after_reduction_102 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_102: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_8_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_103 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_103 .L_16_blocks_overflow_103: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_103: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_104 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_104 .L_small_initial_partial_block_104: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_104: orq %r8,%r8 je .L_after_reduction_104 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_104: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_9_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_105 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_105 .L_16_blocks_overflow_105: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_105: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_106 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_106 .L_small_initial_partial_block_106: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_106: orq %r8,%r8 je .L_after_reduction_106 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_106: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_10_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_107 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_107 .L_16_blocks_overflow_107: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_107: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_108 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_108 .L_small_initial_partial_block_108: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_108: orq %r8,%r8 je .L_after_reduction_108 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_108: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_11_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_109 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_109 .L_16_blocks_overflow_109: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_109: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_110 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_110 .L_small_initial_partial_block_110: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_110: orq %r8,%r8 je .L_after_reduction_110 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_110: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_12_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_111 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_111 .L_16_blocks_overflow_111: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_111: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_112 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_112 .L_small_initial_partial_block_112: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_112: orq %r8,%r8 je .L_after_reduction_112 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_112: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_13_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_113 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_113 .L_16_blocks_overflow_113: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_113: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_114 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_114 .L_small_initial_partial_block_114: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_114: orq %r8,%r8 je .L_after_reduction_114 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_114: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_14_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_115 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_115 .L_16_blocks_overflow_115: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_115: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_116 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_116 .L_small_initial_partial_block_116: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_116: orq %r8,%r8 je .L_after_reduction_116 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_116: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_15_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_117 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_117 .L_16_blocks_overflow_117: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_117: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_118 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_118 .L_small_initial_partial_block_118: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_118: orq %r8,%r8 je .L_after_reduction_118 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_118: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_16_88: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_119 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_119 .L_16_blocks_overflow_119: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_119: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_120: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_120: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_120: jmp .L_last_blocks_done_88 .L_last_num_blocks_is_0_88: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_88: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_10 .L_message_below_32_blocks_10: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_121 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_121: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_122 cmpl $8,%r10d je .L_last_num_blocks_is_8_122 jb .L_last_num_blocks_is_7_1_122 cmpl $12,%r10d je .L_last_num_blocks_is_12_122 jb .L_last_num_blocks_is_11_9_122 cmpl $15,%r10d je .L_last_num_blocks_is_15_122 ja .L_last_num_blocks_is_16_122 cmpl $14,%r10d je .L_last_num_blocks_is_14_122 jmp .L_last_num_blocks_is_13_122 .L_last_num_blocks_is_11_9_122: cmpl $10,%r10d je .L_last_num_blocks_is_10_122 ja .L_last_num_blocks_is_11_122 jmp .L_last_num_blocks_is_9_122 .L_last_num_blocks_is_7_1_122: cmpl $4,%r10d je .L_last_num_blocks_is_4_122 jb .L_last_num_blocks_is_3_1_122 cmpl $6,%r10d ja .L_last_num_blocks_is_7_122 je .L_last_num_blocks_is_6_122 jmp .L_last_num_blocks_is_5_122 .L_last_num_blocks_is_3_1_122: cmpl $2,%r10d ja .L_last_num_blocks_is_3_122 je .L_last_num_blocks_is_2_122 .L_last_num_blocks_is_1_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_123 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_123 .L_16_blocks_overflow_123: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_123: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_124 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_124 .L_small_initial_partial_block_124: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_124 .L_small_initial_compute_done_124: .L_after_reduction_124: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_2_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_125 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_125 .L_16_blocks_overflow_125: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_125: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_126 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_126 .L_small_initial_partial_block_126: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_126: orq %r8,%r8 je .L_after_reduction_126 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_126: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_3_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_127 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_127 .L_16_blocks_overflow_127: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_127: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_128 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_128 .L_small_initial_partial_block_128: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_128: orq %r8,%r8 je .L_after_reduction_128 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_128: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_4_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_129 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_129 .L_16_blocks_overflow_129: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_129: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_130 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_130 .L_small_initial_partial_block_130: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_130: orq %r8,%r8 je .L_after_reduction_130 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_130: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_5_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_131 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_131 .L_16_blocks_overflow_131: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_131: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_132 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_132 .L_small_initial_partial_block_132: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_132: orq %r8,%r8 je .L_after_reduction_132 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_132: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_6_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_133 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_133 .L_16_blocks_overflow_133: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_133: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_134 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_134 .L_small_initial_partial_block_134: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_134: orq %r8,%r8 je .L_after_reduction_134 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_134: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_7_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_135 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_135 .L_16_blocks_overflow_135: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_135: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_136 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_136 .L_small_initial_partial_block_136: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_136: orq %r8,%r8 je .L_after_reduction_136 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_136: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_8_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_137 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_137 .L_16_blocks_overflow_137: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_137: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_138 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_138 .L_small_initial_partial_block_138: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_138: orq %r8,%r8 je .L_after_reduction_138 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_138: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_9_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_139 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_139 .L_16_blocks_overflow_139: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_139: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_140 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_140 .L_small_initial_partial_block_140: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_140: orq %r8,%r8 je .L_after_reduction_140 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_140: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_10_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_141 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_141 .L_16_blocks_overflow_141: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_141: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_142 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_142 .L_small_initial_partial_block_142: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_142: orq %r8,%r8 je .L_after_reduction_142 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_142: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_11_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_143 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_143 .L_16_blocks_overflow_143: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_143: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_144 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_144 .L_small_initial_partial_block_144: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_144: orq %r8,%r8 je .L_after_reduction_144 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_144: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_12_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_145 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_145 .L_16_blocks_overflow_145: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_145: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_146 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_146 .L_small_initial_partial_block_146: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_146: orq %r8,%r8 je .L_after_reduction_146 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_146: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_13_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_147 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_147 .L_16_blocks_overflow_147: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_147: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_148 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_148 .L_small_initial_partial_block_148: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_148: orq %r8,%r8 je .L_after_reduction_148 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_148: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_14_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_149 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_149 .L_16_blocks_overflow_149: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_149: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_150 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_150 .L_small_initial_partial_block_150: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_150: orq %r8,%r8 je .L_after_reduction_150 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_150: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_15_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_151 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_151 .L_16_blocks_overflow_151: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_151: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_152 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_152 .L_small_initial_partial_block_152: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_152: orq %r8,%r8 je .L_after_reduction_152 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_152: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_16_122: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_153 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_153 .L_16_blocks_overflow_153: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_153: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_154: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_154: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_154: jmp .L_last_blocks_done_122 .L_last_num_blocks_is_0_122: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_122: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_10 .L_message_below_equal_16_blocks_10: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_155 jl .L_small_initial_num_blocks_is_7_1_155 cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_155 jl .L_small_initial_num_blocks_is_11_9_155 cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_155 cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_155 cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_155 jmp .L_small_initial_num_blocks_is_13_155 .L_small_initial_num_blocks_is_11_9_155: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_155 cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_155 jmp .L_small_initial_num_blocks_is_9_155 .L_small_initial_num_blocks_is_7_1_155: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_155 jl .L_small_initial_num_blocks_is_3_1_155 cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_155 cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_155 jmp .L_small_initial_num_blocks_is_5_155 .L_small_initial_num_blocks_is_3_1_155: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_155 cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_155 .L_small_initial_num_blocks_is_1_155: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_156 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_156 .L_small_initial_partial_block_156: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_156 .L_small_initial_compute_done_156: .L_after_reduction_156: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_2_155: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_157 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_157 .L_small_initial_partial_block_157: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_157: orq %r8,%r8 je .L_after_reduction_157 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_157: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_3_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_158 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_158 .L_small_initial_partial_block_158: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_158: orq %r8,%r8 je .L_after_reduction_158 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_158: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_4_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_159 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_159 .L_small_initial_partial_block_159: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_159: orq %r8,%r8 je .L_after_reduction_159 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_159: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_5_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_160 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_160 .L_small_initial_partial_block_160: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_160: orq %r8,%r8 je .L_after_reduction_160 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_160: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_6_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_161 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_161 .L_small_initial_partial_block_161: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_161: orq %r8,%r8 je .L_after_reduction_161 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_161: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_7_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_162 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_162 .L_small_initial_partial_block_162: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_162: orq %r8,%r8 je .L_after_reduction_162 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_162: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_8_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_163 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_163 .L_small_initial_partial_block_163: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_163: orq %r8,%r8 je .L_after_reduction_163 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_163: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_9_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_164 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_164 .L_small_initial_partial_block_164: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_164: orq %r8,%r8 je .L_after_reduction_164 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_164: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_10_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_165 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_165 .L_small_initial_partial_block_165: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_165: orq %r8,%r8 je .L_after_reduction_165 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_165: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_11_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_166 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_166 .L_small_initial_partial_block_166: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_166: orq %r8,%r8 je .L_after_reduction_166 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_166: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_12_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_167 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_167 .L_small_initial_partial_block_167: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_167: orq %r8,%r8 je .L_after_reduction_167 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_167: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_13_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_168 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_168 .L_small_initial_partial_block_168: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_168: orq %r8,%r8 je .L_after_reduction_168 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_168: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_14_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_169 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_169 .L_small_initial_partial_block_169: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_169: orq %r8,%r8 je .L_after_reduction_169 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_169: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_15_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_170 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_170 .L_small_initial_partial_block_170: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_170: orq %r8,%r8 je .L_after_reduction_170 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_170: jmp .L_small_initial_blocks_encrypted_155 .L_small_initial_num_blocks_is_16_155: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_171: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_171: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_171: .L_small_initial_blocks_encrypted_155: .L_ghash_done_10: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_10: jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_192_avx512: orq %r8,%r8 je .L_enc_dec_done_172 xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_173 movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_173 subq %r13,%r12 .L_no_extra_mask_173: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_173 vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_173 .L_partial_incomplete_173: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_173: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_173: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_172 cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_172 vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_174 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_174 .L_next_16_overflow_174: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_174: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_175 vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_175: cmpq $512,%r8 jb .L_message_below_32_blocks_172 cmpb $240,%r15b jae .L_next_16_overflow_176 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_176 .L_next_16_overflow_176: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_176: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_177 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_177: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_172 .L_encrypt_big_nblocks_172: cmpb $240,%r15b jae .L_16_blocks_overflow_178 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_178 .L_16_blocks_overflow_178: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_178: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_179 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_179 .L_16_blocks_overflow_179: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_179: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_180 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_180 .L_16_blocks_overflow_180: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_180: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_172 .L_no_more_big_nblocks_172: cmpq $512,%r8 jae .L_encrypt_32_blocks_172 cmpq $256,%r8 jae .L_encrypt_16_blocks_172 .L_encrypt_0_blocks_ghash_32_172: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_181 cmpl $8,%r10d je .L_last_num_blocks_is_8_181 jb .L_last_num_blocks_is_7_1_181 cmpl $12,%r10d je .L_last_num_blocks_is_12_181 jb .L_last_num_blocks_is_11_9_181 cmpl $15,%r10d je .L_last_num_blocks_is_15_181 ja .L_last_num_blocks_is_16_181 cmpl $14,%r10d je .L_last_num_blocks_is_14_181 jmp .L_last_num_blocks_is_13_181 .L_last_num_blocks_is_11_9_181: cmpl $10,%r10d je .L_last_num_blocks_is_10_181 ja .L_last_num_blocks_is_11_181 jmp .L_last_num_blocks_is_9_181 .L_last_num_blocks_is_7_1_181: cmpl $4,%r10d je .L_last_num_blocks_is_4_181 jb .L_last_num_blocks_is_3_1_181 cmpl $6,%r10d ja .L_last_num_blocks_is_7_181 je .L_last_num_blocks_is_6_181 jmp .L_last_num_blocks_is_5_181 .L_last_num_blocks_is_3_1_181: cmpl $2,%r10d ja .L_last_num_blocks_is_3_181 je .L_last_num_blocks_is_2_181 .L_last_num_blocks_is_1_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_182 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_182 .L_16_blocks_overflow_182: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_182: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_183 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_183 .L_small_initial_partial_block_183: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_183 .L_small_initial_compute_done_183: .L_after_reduction_183: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_2_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_184 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_184 .L_16_blocks_overflow_184: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_184: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_185 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_185 .L_small_initial_partial_block_185: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_185: orq %r8,%r8 je .L_after_reduction_185 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_185: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_3_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_186 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_186 .L_16_blocks_overflow_186: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_186: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_187 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_187 .L_small_initial_partial_block_187: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_187: orq %r8,%r8 je .L_after_reduction_187 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_187: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_4_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_188 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_188 .L_16_blocks_overflow_188: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_188: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_189 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_189 .L_small_initial_partial_block_189: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_189: orq %r8,%r8 je .L_after_reduction_189 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_189: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_5_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_190 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_190 .L_16_blocks_overflow_190: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_190: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_191 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_191 .L_small_initial_partial_block_191: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_191: orq %r8,%r8 je .L_after_reduction_191 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_191: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_6_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_192 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_192 .L_16_blocks_overflow_192: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_192: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_193 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_193 .L_small_initial_partial_block_193: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_193: orq %r8,%r8 je .L_after_reduction_193 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_193: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_7_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_194 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_194 .L_16_blocks_overflow_194: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_194: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_195 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_195 .L_small_initial_partial_block_195: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_195: orq %r8,%r8 je .L_after_reduction_195 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_195: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_8_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_196 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_196 .L_16_blocks_overflow_196: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_196: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_197 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_197 .L_small_initial_partial_block_197: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_197: orq %r8,%r8 je .L_after_reduction_197 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_197: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_9_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_198 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_198 .L_16_blocks_overflow_198: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_198: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_199 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_199 .L_small_initial_partial_block_199: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_199: orq %r8,%r8 je .L_after_reduction_199 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_199: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_10_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_200 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_200 .L_16_blocks_overflow_200: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_200: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_201 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_201 .L_small_initial_partial_block_201: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_201: orq %r8,%r8 je .L_after_reduction_201 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_201: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_11_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_202 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_202 .L_16_blocks_overflow_202: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_202: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_203 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_203 .L_small_initial_partial_block_203: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_203: orq %r8,%r8 je .L_after_reduction_203 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_203: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_12_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_204 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_204 .L_16_blocks_overflow_204: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_204: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_205 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_205 .L_small_initial_partial_block_205: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_205: orq %r8,%r8 je .L_after_reduction_205 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_205: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_13_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_206 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_206 .L_16_blocks_overflow_206: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_206: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_207 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_207 .L_small_initial_partial_block_207: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_207: orq %r8,%r8 je .L_after_reduction_207 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_207: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_14_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_208 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_208 .L_16_blocks_overflow_208: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_208: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_209 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_209 .L_small_initial_partial_block_209: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_209: orq %r8,%r8 je .L_after_reduction_209 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_209: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_15_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_210 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_210 .L_16_blocks_overflow_210: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_210: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_211 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_211 .L_small_initial_partial_block_211: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_211: orq %r8,%r8 je .L_after_reduction_211 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_211: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_16_181: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_212 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_212 .L_16_blocks_overflow_212: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_212: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_213: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_213: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_213: jmp .L_last_blocks_done_181 .L_last_num_blocks_is_0_181: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_181: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_172 .L_encrypt_32_blocks_172: cmpb $240,%r15b jae .L_16_blocks_overflow_214 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_214 .L_16_blocks_overflow_214: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_214: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_215 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_215 .L_16_blocks_overflow_215: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_215: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_216 cmpl $8,%r10d je .L_last_num_blocks_is_8_216 jb .L_last_num_blocks_is_7_1_216 cmpl $12,%r10d je .L_last_num_blocks_is_12_216 jb .L_last_num_blocks_is_11_9_216 cmpl $15,%r10d je .L_last_num_blocks_is_15_216 ja .L_last_num_blocks_is_16_216 cmpl $14,%r10d je .L_last_num_blocks_is_14_216 jmp .L_last_num_blocks_is_13_216 .L_last_num_blocks_is_11_9_216: cmpl $10,%r10d je .L_last_num_blocks_is_10_216 ja .L_last_num_blocks_is_11_216 jmp .L_last_num_blocks_is_9_216 .L_last_num_blocks_is_7_1_216: cmpl $4,%r10d je .L_last_num_blocks_is_4_216 jb .L_last_num_blocks_is_3_1_216 cmpl $6,%r10d ja .L_last_num_blocks_is_7_216 je .L_last_num_blocks_is_6_216 jmp .L_last_num_blocks_is_5_216 .L_last_num_blocks_is_3_1_216: cmpl $2,%r10d ja .L_last_num_blocks_is_3_216 je .L_last_num_blocks_is_2_216 .L_last_num_blocks_is_1_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_217 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_217 .L_16_blocks_overflow_217: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_217: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_218 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_218 .L_small_initial_partial_block_218: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_218 .L_small_initial_compute_done_218: .L_after_reduction_218: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_2_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_219 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_219 .L_16_blocks_overflow_219: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_219: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_220 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_220 .L_small_initial_partial_block_220: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_220: orq %r8,%r8 je .L_after_reduction_220 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_220: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_3_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_221 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_221 .L_16_blocks_overflow_221: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_221: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_222 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_222 .L_small_initial_partial_block_222: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_222: orq %r8,%r8 je .L_after_reduction_222 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_222: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_4_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_223 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_223 .L_16_blocks_overflow_223: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_223: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_224 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_224 .L_small_initial_partial_block_224: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_224: orq %r8,%r8 je .L_after_reduction_224 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_224: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_5_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_225 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_225 .L_16_blocks_overflow_225: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_225: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_226 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_226 .L_small_initial_partial_block_226: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_226: orq %r8,%r8 je .L_after_reduction_226 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_226: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_6_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_227 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_227 .L_16_blocks_overflow_227: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_227: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_228 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_228 .L_small_initial_partial_block_228: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_228: orq %r8,%r8 je .L_after_reduction_228 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_228: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_7_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_229 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_229 .L_16_blocks_overflow_229: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_229: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_230 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_230 .L_small_initial_partial_block_230: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_230: orq %r8,%r8 je .L_after_reduction_230 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_230: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_8_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_231 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_231 .L_16_blocks_overflow_231: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_231: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_232 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_232 .L_small_initial_partial_block_232: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_232: orq %r8,%r8 je .L_after_reduction_232 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_232: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_9_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_233 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_233 .L_16_blocks_overflow_233: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_233: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_234 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_234 .L_small_initial_partial_block_234: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_234: orq %r8,%r8 je .L_after_reduction_234 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_234: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_10_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_235 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_235 .L_16_blocks_overflow_235: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_235: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_236 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_236 .L_small_initial_partial_block_236: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_236: orq %r8,%r8 je .L_after_reduction_236 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_236: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_11_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_237 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_237 .L_16_blocks_overflow_237: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_237: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_238 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_238 .L_small_initial_partial_block_238: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_238: orq %r8,%r8 je .L_after_reduction_238 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_238: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_12_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_239 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_239 .L_16_blocks_overflow_239: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_239: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_240 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_240 .L_small_initial_partial_block_240: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_240: orq %r8,%r8 je .L_after_reduction_240 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_240: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_13_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_241 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_241 .L_16_blocks_overflow_241: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_241: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_242 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_242 .L_small_initial_partial_block_242: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_242: orq %r8,%r8 je .L_after_reduction_242 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_242: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_14_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_243 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_243 .L_16_blocks_overflow_243: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_243: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_244 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_244 .L_small_initial_partial_block_244: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_244: orq %r8,%r8 je .L_after_reduction_244 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_244: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_15_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_245 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_245 .L_16_blocks_overflow_245: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_245: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_246 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_246 .L_small_initial_partial_block_246: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_246: orq %r8,%r8 je .L_after_reduction_246 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_246: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_16_216: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_247 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_247 .L_16_blocks_overflow_247: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_247: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_248: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_248: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_248: jmp .L_last_blocks_done_216 .L_last_num_blocks_is_0_216: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_216: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_172 .L_encrypt_16_blocks_172: cmpb $240,%r15b jae .L_16_blocks_overflow_249 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_249 .L_16_blocks_overflow_249: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_249: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_250 cmpl $8,%r10d je .L_last_num_blocks_is_8_250 jb .L_last_num_blocks_is_7_1_250 cmpl $12,%r10d je .L_last_num_blocks_is_12_250 jb .L_last_num_blocks_is_11_9_250 cmpl $15,%r10d je .L_last_num_blocks_is_15_250 ja .L_last_num_blocks_is_16_250 cmpl $14,%r10d je .L_last_num_blocks_is_14_250 jmp .L_last_num_blocks_is_13_250 .L_last_num_blocks_is_11_9_250: cmpl $10,%r10d je .L_last_num_blocks_is_10_250 ja .L_last_num_blocks_is_11_250 jmp .L_last_num_blocks_is_9_250 .L_last_num_blocks_is_7_1_250: cmpl $4,%r10d je .L_last_num_blocks_is_4_250 jb .L_last_num_blocks_is_3_1_250 cmpl $6,%r10d ja .L_last_num_blocks_is_7_250 je .L_last_num_blocks_is_6_250 jmp .L_last_num_blocks_is_5_250 .L_last_num_blocks_is_3_1_250: cmpl $2,%r10d ja .L_last_num_blocks_is_3_250 je .L_last_num_blocks_is_2_250 .L_last_num_blocks_is_1_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_251 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_251 .L_16_blocks_overflow_251: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_251: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_252 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_252 .L_small_initial_partial_block_252: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_252 .L_small_initial_compute_done_252: .L_after_reduction_252: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_2_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_253 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_253 .L_16_blocks_overflow_253: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_253: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_254 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_254 .L_small_initial_partial_block_254: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_254: orq %r8,%r8 je .L_after_reduction_254 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_254: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_3_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_255 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_255 .L_16_blocks_overflow_255: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_255: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_256 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_256 .L_small_initial_partial_block_256: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_256: orq %r8,%r8 je .L_after_reduction_256 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_256: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_4_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_257 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_257 .L_16_blocks_overflow_257: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_257: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_258 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_258 .L_small_initial_partial_block_258: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_258: orq %r8,%r8 je .L_after_reduction_258 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_258: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_5_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_259 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_259 .L_16_blocks_overflow_259: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_259: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_260 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_260 .L_small_initial_partial_block_260: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_260: orq %r8,%r8 je .L_after_reduction_260 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_260: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_6_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_261 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_261 .L_16_blocks_overflow_261: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_261: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_262 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_262 .L_small_initial_partial_block_262: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_262: orq %r8,%r8 je .L_after_reduction_262 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_262: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_7_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_263 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_263 .L_16_blocks_overflow_263: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_263: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_264 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_264 .L_small_initial_partial_block_264: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_264: orq %r8,%r8 je .L_after_reduction_264 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_264: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_8_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_265 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_265 .L_16_blocks_overflow_265: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_265: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_266 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_266 .L_small_initial_partial_block_266: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_266: orq %r8,%r8 je .L_after_reduction_266 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_266: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_9_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_267 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_267 .L_16_blocks_overflow_267: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_267: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_268 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_268 .L_small_initial_partial_block_268: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_268: orq %r8,%r8 je .L_after_reduction_268 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_268: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_10_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_269 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_269 .L_16_blocks_overflow_269: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_269: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_270 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_270 .L_small_initial_partial_block_270: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_270: orq %r8,%r8 je .L_after_reduction_270 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_270: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_11_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_271 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_271 .L_16_blocks_overflow_271: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_271: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_272 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_272 .L_small_initial_partial_block_272: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_272: orq %r8,%r8 je .L_after_reduction_272 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_272: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_12_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_273 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_273 .L_16_blocks_overflow_273: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_273: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_274 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_274 .L_small_initial_partial_block_274: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_274: orq %r8,%r8 je .L_after_reduction_274 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_274: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_13_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_275 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_275 .L_16_blocks_overflow_275: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_275: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_276 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_276 .L_small_initial_partial_block_276: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_276: orq %r8,%r8 je .L_after_reduction_276 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_276: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_14_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_277 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_277 .L_16_blocks_overflow_277: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_277: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_278 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_278 .L_small_initial_partial_block_278: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_278: orq %r8,%r8 je .L_after_reduction_278 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_278: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_15_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_279 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_279 .L_16_blocks_overflow_279: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_279: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_280 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_280 .L_small_initial_partial_block_280: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_280: orq %r8,%r8 je .L_after_reduction_280 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_280: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_16_250: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_281 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_281 .L_16_blocks_overflow_281: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_281: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_282: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_282: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_282: jmp .L_last_blocks_done_250 .L_last_num_blocks_is_0_250: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_250: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_172 .L_message_below_32_blocks_172: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_283 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_283: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_284 cmpl $8,%r10d je .L_last_num_blocks_is_8_284 jb .L_last_num_blocks_is_7_1_284 cmpl $12,%r10d je .L_last_num_blocks_is_12_284 jb .L_last_num_blocks_is_11_9_284 cmpl $15,%r10d je .L_last_num_blocks_is_15_284 ja .L_last_num_blocks_is_16_284 cmpl $14,%r10d je .L_last_num_blocks_is_14_284 jmp .L_last_num_blocks_is_13_284 .L_last_num_blocks_is_11_9_284: cmpl $10,%r10d je .L_last_num_blocks_is_10_284 ja .L_last_num_blocks_is_11_284 jmp .L_last_num_blocks_is_9_284 .L_last_num_blocks_is_7_1_284: cmpl $4,%r10d je .L_last_num_blocks_is_4_284 jb .L_last_num_blocks_is_3_1_284 cmpl $6,%r10d ja .L_last_num_blocks_is_7_284 je .L_last_num_blocks_is_6_284 jmp .L_last_num_blocks_is_5_284 .L_last_num_blocks_is_3_1_284: cmpl $2,%r10d ja .L_last_num_blocks_is_3_284 je .L_last_num_blocks_is_2_284 .L_last_num_blocks_is_1_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_285 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_285 .L_16_blocks_overflow_285: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_285: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_286 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_286 .L_small_initial_partial_block_286: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_286 .L_small_initial_compute_done_286: .L_after_reduction_286: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_2_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_287 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_287 .L_16_blocks_overflow_287: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_287: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_288 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_288 .L_small_initial_partial_block_288: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_288: orq %r8,%r8 je .L_after_reduction_288 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_288: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_3_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_289 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_289 .L_16_blocks_overflow_289: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_289: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_290 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_290 .L_small_initial_partial_block_290: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_290: orq %r8,%r8 je .L_after_reduction_290 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_290: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_4_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_291 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_291 .L_16_blocks_overflow_291: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_291: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_292 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_292 .L_small_initial_partial_block_292: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_292: orq %r8,%r8 je .L_after_reduction_292 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_292: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_5_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_293 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_293 .L_16_blocks_overflow_293: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_293: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_294 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_294 .L_small_initial_partial_block_294: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_294: orq %r8,%r8 je .L_after_reduction_294 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_294: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_6_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_295 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_295 .L_16_blocks_overflow_295: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_295: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_296 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_296 .L_small_initial_partial_block_296: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_296: orq %r8,%r8 je .L_after_reduction_296 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_296: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_7_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_297 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_297 .L_16_blocks_overflow_297: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_297: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_298 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_298 .L_small_initial_partial_block_298: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_298: orq %r8,%r8 je .L_after_reduction_298 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_298: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_8_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_299 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_299 .L_16_blocks_overflow_299: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_299: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_300 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_300 .L_small_initial_partial_block_300: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_300: orq %r8,%r8 je .L_after_reduction_300 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_300: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_9_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_301 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_301 .L_16_blocks_overflow_301: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_301: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_302 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_302 .L_small_initial_partial_block_302: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_302: orq %r8,%r8 je .L_after_reduction_302 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_302: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_10_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_303 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_303 .L_16_blocks_overflow_303: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_303: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_304 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_304 .L_small_initial_partial_block_304: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_304: orq %r8,%r8 je .L_after_reduction_304 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_304: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_11_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_305 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_305 .L_16_blocks_overflow_305: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_305: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_306 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_306 .L_small_initial_partial_block_306: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_306: orq %r8,%r8 je .L_after_reduction_306 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_306: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_12_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_307 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_307 .L_16_blocks_overflow_307: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_307: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_308 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_308 .L_small_initial_partial_block_308: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_308: orq %r8,%r8 je .L_after_reduction_308 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_308: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_13_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_309 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_309 .L_16_blocks_overflow_309: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_309: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_310 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_310 .L_small_initial_partial_block_310: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_310: orq %r8,%r8 je .L_after_reduction_310 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_310: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_14_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_311 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_311 .L_16_blocks_overflow_311: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_311: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_312 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_312 .L_small_initial_partial_block_312: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_312: orq %r8,%r8 je .L_after_reduction_312 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_312: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_15_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_313 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_313 .L_16_blocks_overflow_313: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_313: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_314 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_314 .L_small_initial_partial_block_314: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_314: orq %r8,%r8 je .L_after_reduction_314 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_314: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_16_284: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_315 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_315 .L_16_blocks_overflow_315: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_315: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_316: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_316: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_316: jmp .L_last_blocks_done_284 .L_last_num_blocks_is_0_284: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_284: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_172 .L_message_below_equal_16_blocks_172: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_317 jl .L_small_initial_num_blocks_is_7_1_317 cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_317 jl .L_small_initial_num_blocks_is_11_9_317 cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_317 cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_317 cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_317 jmp .L_small_initial_num_blocks_is_13_317 .L_small_initial_num_blocks_is_11_9_317: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_317 cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_317 jmp .L_small_initial_num_blocks_is_9_317 .L_small_initial_num_blocks_is_7_1_317: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_317 jl .L_small_initial_num_blocks_is_3_1_317 cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_317 cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_317 jmp .L_small_initial_num_blocks_is_5_317 .L_small_initial_num_blocks_is_3_1_317: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_317 cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_317 .L_small_initial_num_blocks_is_1_317: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_318 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_318 .L_small_initial_partial_block_318: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_318 .L_small_initial_compute_done_318: .L_after_reduction_318: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_2_317: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_319 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_319 .L_small_initial_partial_block_319: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_319: orq %r8,%r8 je .L_after_reduction_319 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_319: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_3_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_320 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_320 .L_small_initial_partial_block_320: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_320: orq %r8,%r8 je .L_after_reduction_320 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_320: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_4_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_321 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_321 .L_small_initial_partial_block_321: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_321: orq %r8,%r8 je .L_after_reduction_321 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_321: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_5_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_322 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_322 .L_small_initial_partial_block_322: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_322: orq %r8,%r8 je .L_after_reduction_322 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_322: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_6_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_323 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_323 .L_small_initial_partial_block_323: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_323: orq %r8,%r8 je .L_after_reduction_323 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_323: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_7_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_324 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_324 .L_small_initial_partial_block_324: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_324: orq %r8,%r8 je .L_after_reduction_324 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_324: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_8_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_325 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_325 .L_small_initial_partial_block_325: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_325: orq %r8,%r8 je .L_after_reduction_325 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_325: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_9_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_326 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_326 .L_small_initial_partial_block_326: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_326: orq %r8,%r8 je .L_after_reduction_326 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_326: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_10_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_327 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_327 .L_small_initial_partial_block_327: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_327: orq %r8,%r8 je .L_after_reduction_327 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_327: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_11_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_328 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_328 .L_small_initial_partial_block_328: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_328: orq %r8,%r8 je .L_after_reduction_328 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_328: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_12_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_329 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_329 .L_small_initial_partial_block_329: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_329: orq %r8,%r8 je .L_after_reduction_329 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_329: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_13_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_330 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_330 .L_small_initial_partial_block_330: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_330: orq %r8,%r8 je .L_after_reduction_330 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_330: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_14_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_331 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_331 .L_small_initial_partial_block_331: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_331: orq %r8,%r8 je .L_after_reduction_331 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_331: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_15_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_332 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_332 .L_small_initial_partial_block_332: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_332: orq %r8,%r8 je .L_after_reduction_332 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_332: jmp .L_small_initial_blocks_encrypted_317 .L_small_initial_num_blocks_is_16_317: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_333: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_333: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_333: .L_small_initial_blocks_encrypted_317: .L_ghash_done_172: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_172: jmp .Lexit_gcm_encrypt .align 32 .Laes_gcm_encrypt_256_avx512: orq %r8,%r8 je .L_enc_dec_done_334 xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_335 movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_335 subq %r13,%r12 .L_no_extra_mask_335: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 vpxorq %xmm3,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_335 vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_335 .L_partial_incomplete_335: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_335: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 vpshufb %xmm5,%xmm3,%xmm3 movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_335: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_334 cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_334 vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_336 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_336 .L_next_16_overflow_336: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_336: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_337 vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_337: cmpq $512,%r8 jb .L_message_below_32_blocks_334 cmpb $240,%r15b jae .L_next_16_overflow_338 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_338 .L_next_16_overflow_338: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_338: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_339 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_339: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_334 .L_encrypt_big_nblocks_334: cmpb $240,%r15b jae .L_16_blocks_overflow_340 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_340 .L_16_blocks_overflow_340: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_340: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_341 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_341 .L_16_blocks_overflow_341: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_341: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_342 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_342 .L_16_blocks_overflow_342: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_342: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_334 .L_no_more_big_nblocks_334: cmpq $512,%r8 jae .L_encrypt_32_blocks_334 cmpq $256,%r8 jae .L_encrypt_16_blocks_334 .L_encrypt_0_blocks_ghash_32_334: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_343 cmpl $8,%r10d je .L_last_num_blocks_is_8_343 jb .L_last_num_blocks_is_7_1_343 cmpl $12,%r10d je .L_last_num_blocks_is_12_343 jb .L_last_num_blocks_is_11_9_343 cmpl $15,%r10d je .L_last_num_blocks_is_15_343 ja .L_last_num_blocks_is_16_343 cmpl $14,%r10d je .L_last_num_blocks_is_14_343 jmp .L_last_num_blocks_is_13_343 .L_last_num_blocks_is_11_9_343: cmpl $10,%r10d je .L_last_num_blocks_is_10_343 ja .L_last_num_blocks_is_11_343 jmp .L_last_num_blocks_is_9_343 .L_last_num_blocks_is_7_1_343: cmpl $4,%r10d je .L_last_num_blocks_is_4_343 jb .L_last_num_blocks_is_3_1_343 cmpl $6,%r10d ja .L_last_num_blocks_is_7_343 je .L_last_num_blocks_is_6_343 jmp .L_last_num_blocks_is_5_343 .L_last_num_blocks_is_3_1_343: cmpl $2,%r10d ja .L_last_num_blocks_is_3_343 je .L_last_num_blocks_is_2_343 .L_last_num_blocks_is_1_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_344 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_344 .L_16_blocks_overflow_344: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_344: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_345 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_345 .L_small_initial_partial_block_345: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_345 .L_small_initial_compute_done_345: .L_after_reduction_345: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_2_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_346 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_346 .L_16_blocks_overflow_346: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_346: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_347 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_347 .L_small_initial_partial_block_347: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_347: orq %r8,%r8 je .L_after_reduction_347 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_347: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_3_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_348 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_348 .L_16_blocks_overflow_348: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_348: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_349 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_349 .L_small_initial_partial_block_349: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_349: orq %r8,%r8 je .L_after_reduction_349 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_349: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_4_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_350 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_350 .L_16_blocks_overflow_350: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_350: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_351 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_351 .L_small_initial_partial_block_351: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_351: orq %r8,%r8 je .L_after_reduction_351 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_351: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_5_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_352 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_352 .L_16_blocks_overflow_352: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_352: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_353 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_353 .L_small_initial_partial_block_353: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_353: orq %r8,%r8 je .L_after_reduction_353 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_353: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_6_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_354 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_354 .L_16_blocks_overflow_354: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_354: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_355 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_355 .L_small_initial_partial_block_355: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_355: orq %r8,%r8 je .L_after_reduction_355 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_355: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_7_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_356 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_356 .L_16_blocks_overflow_356: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_356: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_357 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_357 .L_small_initial_partial_block_357: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_357: orq %r8,%r8 je .L_after_reduction_357 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_357: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_8_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_358 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_358 .L_16_blocks_overflow_358: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_358: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_359 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_359 .L_small_initial_partial_block_359: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_359: orq %r8,%r8 je .L_after_reduction_359 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_359: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_9_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_360 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_360 .L_16_blocks_overflow_360: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_360: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_361 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_361 .L_small_initial_partial_block_361: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_361: orq %r8,%r8 je .L_after_reduction_361 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_361: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_10_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_362 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_362 .L_16_blocks_overflow_362: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_362: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_363 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_363 .L_small_initial_partial_block_363: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_363: orq %r8,%r8 je .L_after_reduction_363 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_363: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_11_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_364 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_364 .L_16_blocks_overflow_364: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_364: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_365 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_365 .L_small_initial_partial_block_365: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_365: orq %r8,%r8 je .L_after_reduction_365 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_365: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_12_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_366 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_366 .L_16_blocks_overflow_366: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_366: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_367 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_367 .L_small_initial_partial_block_367: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_367: orq %r8,%r8 je .L_after_reduction_367 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_367: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_13_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_368 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_368 .L_16_blocks_overflow_368: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_368: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_369 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_369 .L_small_initial_partial_block_369: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_369: orq %r8,%r8 je .L_after_reduction_369 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_369: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_14_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_370 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_370 .L_16_blocks_overflow_370: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_370: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_371 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_371 .L_small_initial_partial_block_371: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_371: orq %r8,%r8 je .L_after_reduction_371 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_371: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_15_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_372 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_372 .L_16_blocks_overflow_372: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_372: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_373 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_373 .L_small_initial_partial_block_373: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_373: orq %r8,%r8 je .L_after_reduction_373 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_373: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_16_343: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_374 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_374 .L_16_blocks_overflow_374: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_374: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_375: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_375: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_375: jmp .L_last_blocks_done_343 .L_last_num_blocks_is_0_343: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_343: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_334 .L_encrypt_32_blocks_334: cmpb $240,%r15b jae .L_16_blocks_overflow_376 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_376 .L_16_blocks_overflow_376: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_376: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_377 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_377 .L_16_blocks_overflow_377: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_377: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_378 cmpl $8,%r10d je .L_last_num_blocks_is_8_378 jb .L_last_num_blocks_is_7_1_378 cmpl $12,%r10d je .L_last_num_blocks_is_12_378 jb .L_last_num_blocks_is_11_9_378 cmpl $15,%r10d je .L_last_num_blocks_is_15_378 ja .L_last_num_blocks_is_16_378 cmpl $14,%r10d je .L_last_num_blocks_is_14_378 jmp .L_last_num_blocks_is_13_378 .L_last_num_blocks_is_11_9_378: cmpl $10,%r10d je .L_last_num_blocks_is_10_378 ja .L_last_num_blocks_is_11_378 jmp .L_last_num_blocks_is_9_378 .L_last_num_blocks_is_7_1_378: cmpl $4,%r10d je .L_last_num_blocks_is_4_378 jb .L_last_num_blocks_is_3_1_378 cmpl $6,%r10d ja .L_last_num_blocks_is_7_378 je .L_last_num_blocks_is_6_378 jmp .L_last_num_blocks_is_5_378 .L_last_num_blocks_is_3_1_378: cmpl $2,%r10d ja .L_last_num_blocks_is_3_378 je .L_last_num_blocks_is_2_378 .L_last_num_blocks_is_1_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_379 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_379 .L_16_blocks_overflow_379: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_379: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_380 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_380 .L_small_initial_partial_block_380: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_380 .L_small_initial_compute_done_380: .L_after_reduction_380: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_2_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_381 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_381 .L_16_blocks_overflow_381: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_381: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_382 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_382 .L_small_initial_partial_block_382: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_382: orq %r8,%r8 je .L_after_reduction_382 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_382: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_3_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_383 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_383 .L_16_blocks_overflow_383: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_383: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_384 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_384 .L_small_initial_partial_block_384: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_384: orq %r8,%r8 je .L_after_reduction_384 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_384: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_4_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_385 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_385 .L_16_blocks_overflow_385: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_385: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_386 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_386 .L_small_initial_partial_block_386: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_386: orq %r8,%r8 je .L_after_reduction_386 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_386: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_5_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_387 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_387 .L_16_blocks_overflow_387: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_387: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_388 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_388 .L_small_initial_partial_block_388: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_388: orq %r8,%r8 je .L_after_reduction_388 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_388: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_6_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_389 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_389 .L_16_blocks_overflow_389: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_389: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_390 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_390 .L_small_initial_partial_block_390: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_390: orq %r8,%r8 je .L_after_reduction_390 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_390: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_7_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_391 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_391 .L_16_blocks_overflow_391: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_391: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_392 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_392 .L_small_initial_partial_block_392: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_392: orq %r8,%r8 je .L_after_reduction_392 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_392: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_8_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_393 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_393 .L_16_blocks_overflow_393: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_393: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_394 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_394 .L_small_initial_partial_block_394: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_394: orq %r8,%r8 je .L_after_reduction_394 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_394: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_9_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_395 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_395 .L_16_blocks_overflow_395: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_395: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_396 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_396 .L_small_initial_partial_block_396: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_396: orq %r8,%r8 je .L_after_reduction_396 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_396: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_10_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_397 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_397 .L_16_blocks_overflow_397: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_397: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_398 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_398 .L_small_initial_partial_block_398: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_398: orq %r8,%r8 je .L_after_reduction_398 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_398: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_11_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_399 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_399 .L_16_blocks_overflow_399: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_399: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_400 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_400 .L_small_initial_partial_block_400: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_400: orq %r8,%r8 je .L_after_reduction_400 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_400: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_12_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_401 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_401 .L_16_blocks_overflow_401: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_401: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_402 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_402 .L_small_initial_partial_block_402: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_402: orq %r8,%r8 je .L_after_reduction_402 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_402: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_13_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_403 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_403 .L_16_blocks_overflow_403: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_403: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_404 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_404 .L_small_initial_partial_block_404: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_404: orq %r8,%r8 je .L_after_reduction_404 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_404: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_14_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_405 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_405 .L_16_blocks_overflow_405: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_405: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_406 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_406 .L_small_initial_partial_block_406: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_406: orq %r8,%r8 je .L_after_reduction_406 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_406: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_15_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_407 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_407 .L_16_blocks_overflow_407: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_407: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_408 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_408 .L_small_initial_partial_block_408: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_408: orq %r8,%r8 je .L_after_reduction_408 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_408: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_16_378: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_409 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_409 .L_16_blocks_overflow_409: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_409: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_410: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_410: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_410: jmp .L_last_blocks_done_378 .L_last_num_blocks_is_0_378: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_378: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_334 .L_encrypt_16_blocks_334: cmpb $240,%r15b jae .L_16_blocks_overflow_411 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_411 .L_16_blocks_overflow_411: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_411: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_412 cmpl $8,%r10d je .L_last_num_blocks_is_8_412 jb .L_last_num_blocks_is_7_1_412 cmpl $12,%r10d je .L_last_num_blocks_is_12_412 jb .L_last_num_blocks_is_11_9_412 cmpl $15,%r10d je .L_last_num_blocks_is_15_412 ja .L_last_num_blocks_is_16_412 cmpl $14,%r10d je .L_last_num_blocks_is_14_412 jmp .L_last_num_blocks_is_13_412 .L_last_num_blocks_is_11_9_412: cmpl $10,%r10d je .L_last_num_blocks_is_10_412 ja .L_last_num_blocks_is_11_412 jmp .L_last_num_blocks_is_9_412 .L_last_num_blocks_is_7_1_412: cmpl $4,%r10d je .L_last_num_blocks_is_4_412 jb .L_last_num_blocks_is_3_1_412 cmpl $6,%r10d ja .L_last_num_blocks_is_7_412 je .L_last_num_blocks_is_6_412 jmp .L_last_num_blocks_is_5_412 .L_last_num_blocks_is_3_1_412: cmpl $2,%r10d ja .L_last_num_blocks_is_3_412 je .L_last_num_blocks_is_2_412 .L_last_num_blocks_is_1_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_413 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_413 .L_16_blocks_overflow_413: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_413: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_414 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_414 .L_small_initial_partial_block_414: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_414 .L_small_initial_compute_done_414: .L_after_reduction_414: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_2_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_415 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_415 .L_16_blocks_overflow_415: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_415: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_416 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_416 .L_small_initial_partial_block_416: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_416: orq %r8,%r8 je .L_after_reduction_416 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_416: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_3_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_417 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_417 .L_16_blocks_overflow_417: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_417: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_418 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_418 .L_small_initial_partial_block_418: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_418: orq %r8,%r8 je .L_after_reduction_418 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_418: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_4_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_419 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_419 .L_16_blocks_overflow_419: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_419: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_420 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_420 .L_small_initial_partial_block_420: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_420: orq %r8,%r8 je .L_after_reduction_420 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_420: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_5_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_421 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_421 .L_16_blocks_overflow_421: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_421: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_422 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_422 .L_small_initial_partial_block_422: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_422: orq %r8,%r8 je .L_after_reduction_422 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_422: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_6_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_423 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_423 .L_16_blocks_overflow_423: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_423: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_424 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_424 .L_small_initial_partial_block_424: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_424: orq %r8,%r8 je .L_after_reduction_424 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_424: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_7_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_425 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_425 .L_16_blocks_overflow_425: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_425: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_426 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_426 .L_small_initial_partial_block_426: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_426: orq %r8,%r8 je .L_after_reduction_426 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_426: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_8_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_427 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_427 .L_16_blocks_overflow_427: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_427: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_428 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_428 .L_small_initial_partial_block_428: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_428: orq %r8,%r8 je .L_after_reduction_428 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_428: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_9_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_429 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_429 .L_16_blocks_overflow_429: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_429: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_430 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_430 .L_small_initial_partial_block_430: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_430: orq %r8,%r8 je .L_after_reduction_430 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_430: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_10_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_431 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_431 .L_16_blocks_overflow_431: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_431: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_432 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_432 .L_small_initial_partial_block_432: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_432: orq %r8,%r8 je .L_after_reduction_432 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_432: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_11_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_433 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_433 .L_16_blocks_overflow_433: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_433: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_434 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_434 .L_small_initial_partial_block_434: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_434: orq %r8,%r8 je .L_after_reduction_434 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_434: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_12_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_435 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_435 .L_16_blocks_overflow_435: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_435: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_436 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_436 .L_small_initial_partial_block_436: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_436: orq %r8,%r8 je .L_after_reduction_436 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_436: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_13_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_437 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_437 .L_16_blocks_overflow_437: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_437: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_438 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_438 .L_small_initial_partial_block_438: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_438: orq %r8,%r8 je .L_after_reduction_438 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_438: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_14_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_439 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_439 .L_16_blocks_overflow_439: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_439: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_440 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_440 .L_small_initial_partial_block_440: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_440: orq %r8,%r8 je .L_after_reduction_440 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_440: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_15_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_441 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_441 .L_16_blocks_overflow_441: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_441: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_442 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_442 .L_small_initial_partial_block_442: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_442: orq %r8,%r8 je .L_after_reduction_442 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_442: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_16_412: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_443 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_443 .L_16_blocks_overflow_443: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_443: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_444: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_444: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_444: jmp .L_last_blocks_done_412 .L_last_num_blocks_is_0_412: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_412: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_334 .L_message_below_32_blocks_334: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_445 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_445: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_446 cmpl $8,%r10d je .L_last_num_blocks_is_8_446 jb .L_last_num_blocks_is_7_1_446 cmpl $12,%r10d je .L_last_num_blocks_is_12_446 jb .L_last_num_blocks_is_11_9_446 cmpl $15,%r10d je .L_last_num_blocks_is_15_446 ja .L_last_num_blocks_is_16_446 cmpl $14,%r10d je .L_last_num_blocks_is_14_446 jmp .L_last_num_blocks_is_13_446 .L_last_num_blocks_is_11_9_446: cmpl $10,%r10d je .L_last_num_blocks_is_10_446 ja .L_last_num_blocks_is_11_446 jmp .L_last_num_blocks_is_9_446 .L_last_num_blocks_is_7_1_446: cmpl $4,%r10d je .L_last_num_blocks_is_4_446 jb .L_last_num_blocks_is_3_1_446 cmpl $6,%r10d ja .L_last_num_blocks_is_7_446 je .L_last_num_blocks_is_6_446 jmp .L_last_num_blocks_is_5_446 .L_last_num_blocks_is_3_1_446: cmpl $2,%r10d ja .L_last_num_blocks_is_3_446 je .L_last_num_blocks_is_2_446 .L_last_num_blocks_is_1_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_447 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_447 .L_16_blocks_overflow_447: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_447: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_448 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_448 .L_small_initial_partial_block_448: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_448 .L_small_initial_compute_done_448: .L_after_reduction_448: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_2_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_449 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_449 .L_16_blocks_overflow_449: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_449: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_450 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_450 .L_small_initial_partial_block_450: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_450: orq %r8,%r8 je .L_after_reduction_450 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_450: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_3_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_451 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_451 .L_16_blocks_overflow_451: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_451: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_452 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_452 .L_small_initial_partial_block_452: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_452: orq %r8,%r8 je .L_after_reduction_452 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_452: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_4_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_453 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_453 .L_16_blocks_overflow_453: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_453: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_454 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_454 .L_small_initial_partial_block_454: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_454: orq %r8,%r8 je .L_after_reduction_454 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_454: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_5_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_455 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_455 .L_16_blocks_overflow_455: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_455: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %xmm29,%xmm3,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_456 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_456 .L_small_initial_partial_block_456: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_456: orq %r8,%r8 je .L_after_reduction_456 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_456: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_6_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_457 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_457 .L_16_blocks_overflow_457: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_457: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %ymm29,%ymm3,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_458 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_458 .L_small_initial_partial_block_458: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_458: orq %r8,%r8 je .L_after_reduction_458 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_458: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_7_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_459 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_459 .L_16_blocks_overflow_459: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_459: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_460 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_460 .L_small_initial_partial_block_460: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_460: orq %r8,%r8 je .L_after_reduction_460 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_460: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_8_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_461 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_461 .L_16_blocks_overflow_461: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_461: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_462 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_462 .L_small_initial_partial_block_462: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_462: orq %r8,%r8 je .L_after_reduction_462 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_462: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_9_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_463 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_463 .L_16_blocks_overflow_463: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_463: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %xmm29,%xmm4,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_464 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_464 .L_small_initial_partial_block_464: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_464: orq %r8,%r8 je .L_after_reduction_464 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_464: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_10_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_465 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_465 .L_16_blocks_overflow_465: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_465: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %ymm29,%ymm4,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_466 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_466 .L_small_initial_partial_block_466: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_466: orq %r8,%r8 je .L_after_reduction_466 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_466: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_11_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_467 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_467 .L_16_blocks_overflow_467: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_467: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_468 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_468 .L_small_initial_partial_block_468: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_468: orq %r8,%r8 je .L_after_reduction_468 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_468: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_12_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_469 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_469 .L_16_blocks_overflow_469: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_469: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_470 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_470 .L_small_initial_partial_block_470: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_470: orq %r8,%r8 je .L_after_reduction_470 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_470: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_13_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_471 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_471 .L_16_blocks_overflow_471: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_471: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %xmm29,%xmm5,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_472 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_472 .L_small_initial_partial_block_472: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_472: orq %r8,%r8 je .L_after_reduction_472 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_472: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_14_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_473 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_473 .L_16_blocks_overflow_473: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_473: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %ymm29,%ymm5,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_474 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_474 .L_small_initial_partial_block_474: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_474: orq %r8,%r8 je .L_after_reduction_474 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_474: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_15_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_475 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_475 .L_16_blocks_overflow_475: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_475: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_476 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_476 .L_small_initial_partial_block_476: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_476: orq %r8,%r8 je .L_after_reduction_476 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_476: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_16_446: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_477 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_477 .L_16_blocks_overflow_477: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_477: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm17 vpshufb %zmm29,%zmm3,%zmm19 vpshufb %zmm29,%zmm4,%zmm20 vpshufb %zmm29,%zmm5,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_478: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_478: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_478: jmp .L_last_blocks_done_446 .L_last_num_blocks_is_0_446: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_446: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_334 .L_message_below_equal_16_blocks_334: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_479 jl .L_small_initial_num_blocks_is_7_1_479 cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_479 jl .L_small_initial_num_blocks_is_11_9_479 cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_479 cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_479 cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_479 jmp .L_small_initial_num_blocks_is_13_479 .L_small_initial_num_blocks_is_11_9_479: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_479 cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_479 jmp .L_small_initial_num_blocks_is_9_479 .L_small_initial_num_blocks_is_7_1_479: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_479 jl .L_small_initial_num_blocks_is_3_1_479 cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_479 cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_479 jmp .L_small_initial_num_blocks_is_5_479 .L_small_initial_num_blocks_is_3_1_479: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_479 cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_479 .L_small_initial_num_blocks_is_1_479: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm0,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_480 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_480 .L_small_initial_partial_block_480: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_480 .L_small_initial_compute_done_480: .L_after_reduction_480: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_2_479: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm0,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_481 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_481 .L_small_initial_partial_block_481: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_481: orq %r8,%r8 je .L_after_reduction_481 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_481: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_3_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_482 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_482 .L_small_initial_partial_block_482: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_482: orq %r8,%r8 je .L_after_reduction_482 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_482: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_4_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_483 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_483 .L_small_initial_partial_block_483: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_483: orq %r8,%r8 je .L_after_reduction_483 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_483: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_5_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %xmm29,%xmm3,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_484 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_484 .L_small_initial_partial_block_484: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_484: orq %r8,%r8 je .L_after_reduction_484 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_484: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_6_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %ymm29,%ymm3,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_485 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_485 .L_small_initial_partial_block_485: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_485: orq %r8,%r8 je .L_after_reduction_485 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_485: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_7_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_486 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_486 .L_small_initial_partial_block_486: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_486: orq %r8,%r8 je .L_after_reduction_486 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_486: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_8_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_487 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_487 .L_small_initial_partial_block_487: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_487: orq %r8,%r8 je .L_after_reduction_487 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_487: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_9_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %xmm29,%xmm4,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_488 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_488 .L_small_initial_partial_block_488: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_488: orq %r8,%r8 je .L_after_reduction_488 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_488: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_10_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %ymm29,%ymm4,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_489 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_489 .L_small_initial_partial_block_489: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_489: orq %r8,%r8 je .L_after_reduction_489 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_489: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_11_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_490 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_490 .L_small_initial_partial_block_490: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_490: orq %r8,%r8 je .L_after_reduction_490 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_490: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_12_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_491 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_491 .L_small_initial_partial_block_491: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_491: orq %r8,%r8 je .L_after_reduction_491 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_491: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_13_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %xmm29,%xmm5,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_492 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_492 .L_small_initial_partial_block_492: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_492: orq %r8,%r8 je .L_after_reduction_492 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_492: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_14_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %ymm29,%ymm5,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_493 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_493 .L_small_initial_partial_block_493: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_493: orq %r8,%r8 je .L_after_reduction_493 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_493: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_15_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_494 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_494 .L_small_initial_partial_block_494: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_494: orq %r8,%r8 je .L_after_reduction_494 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_494: jmp .L_small_initial_blocks_encrypted_479 .L_small_initial_num_blocks_is_16_479: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm0,%zmm6 vpshufb %zmm29,%zmm3,%zmm7 vpshufb %zmm29,%zmm4,%zmm10 vpshufb %zmm29,%zmm5,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_495: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_495: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_495: .L_small_initial_blocks_encrypted_479: .L_ghash_done_334: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_334: jmp .Lexit_gcm_encrypt .Lexit_gcm_encrypt: cmpq $256,%r8 jbe .Lskip_hkeys_cleanup_496 vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_496: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .byte 0xf3,0xc3 .Lencrypt_seh_end: .cfi_endproc .size ossl_aes_gcm_encrypt_avx512, .-ossl_aes_gcm_encrypt_avx512 .globl ossl_aes_gcm_decrypt_avx512 .type ossl_aes_gcm_decrypt_avx512,@function .align 32 ossl_aes_gcm_decrypt_avx512: .cfi_startproc .Ldecrypt_seh_begin: .byte 243,15,30,250 pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset %rbx,-16 .Ldecrypt_seh_push_rbx: pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp,-24 .Ldecrypt_seh_push_rbp: pushq %r12 .cfi_adjust_cfa_offset 8 .cfi_offset %r12,-32 .Ldecrypt_seh_push_r12: pushq %r13 .cfi_adjust_cfa_offset 8 .cfi_offset %r13,-40 .Ldecrypt_seh_push_r13: pushq %r14 .cfi_adjust_cfa_offset 8 .cfi_offset %r14,-48 .Ldecrypt_seh_push_r14: pushq %r15 .cfi_adjust_cfa_offset 8 .cfi_offset %r15,-56 .Ldecrypt_seh_push_r15: leaq 0(%rsp),%rbp .cfi_def_cfa_register %rbp .Ldecrypt_seh_setfp: .Ldecrypt_seh_prolog_end: subq $1588,%rsp andq $(-64),%rsp movl 240(%rdi),%eax cmpl $9,%eax je .Laes_gcm_decrypt_128_avx512 cmpl $11,%eax je .Laes_gcm_decrypt_192_avx512 cmpl $13,%eax je .Laes_gcm_decrypt_256_avx512 xorl %eax,%eax jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_128_avx512: orq %r8,%r8 je .L_enc_dec_done_497 xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_498 movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_498 subq %r13,%r12 .L_no_extra_mask_498: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_498 vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_498 .L_partial_incomplete_498: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_498: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_498: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_497 cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_497 vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_499 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_499 .L_next_16_overflow_499: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_499: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_500 vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_500: cmpq $512,%r8 jb .L_message_below_32_blocks_497 cmpb $240,%r15b jae .L_next_16_overflow_501 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_501 .L_next_16_overflow_501: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_501: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_502 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_502: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_497 .L_encrypt_big_nblocks_497: cmpb $240,%r15b jae .L_16_blocks_overflow_503 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_503 .L_16_blocks_overflow_503: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_503: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_504 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_504 .L_16_blocks_overflow_504: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_504: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_505 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_505 .L_16_blocks_overflow_505: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_505: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_497 .L_no_more_big_nblocks_497: cmpq $512,%r8 jae .L_encrypt_32_blocks_497 cmpq $256,%r8 jae .L_encrypt_16_blocks_497 .L_encrypt_0_blocks_ghash_32_497: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_506 cmpl $8,%r10d je .L_last_num_blocks_is_8_506 jb .L_last_num_blocks_is_7_1_506 cmpl $12,%r10d je .L_last_num_blocks_is_12_506 jb .L_last_num_blocks_is_11_9_506 cmpl $15,%r10d je .L_last_num_blocks_is_15_506 ja .L_last_num_blocks_is_16_506 cmpl $14,%r10d je .L_last_num_blocks_is_14_506 jmp .L_last_num_blocks_is_13_506 .L_last_num_blocks_is_11_9_506: cmpl $10,%r10d je .L_last_num_blocks_is_10_506 ja .L_last_num_blocks_is_11_506 jmp .L_last_num_blocks_is_9_506 .L_last_num_blocks_is_7_1_506: cmpl $4,%r10d je .L_last_num_blocks_is_4_506 jb .L_last_num_blocks_is_3_1_506 cmpl $6,%r10d ja .L_last_num_blocks_is_7_506 je .L_last_num_blocks_is_6_506 jmp .L_last_num_blocks_is_5_506 .L_last_num_blocks_is_3_1_506: cmpl $2,%r10d ja .L_last_num_blocks_is_3_506 je .L_last_num_blocks_is_2_506 .L_last_num_blocks_is_1_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_507 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_507 .L_16_blocks_overflow_507: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_507: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_508 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_508 .L_small_initial_partial_block_508: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_508 .L_small_initial_compute_done_508: .L_after_reduction_508: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_2_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_509 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_509 .L_16_blocks_overflow_509: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_509: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_510 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_510 .L_small_initial_partial_block_510: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_510: orq %r8,%r8 je .L_after_reduction_510 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_510: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_3_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_511 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_511 .L_16_blocks_overflow_511: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_511: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_512 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_512 .L_small_initial_partial_block_512: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_512: orq %r8,%r8 je .L_after_reduction_512 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_512: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_4_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_513 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_513 .L_16_blocks_overflow_513: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_513: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_514 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_514 .L_small_initial_partial_block_514: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_514: orq %r8,%r8 je .L_after_reduction_514 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_514: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_5_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_515 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_515 .L_16_blocks_overflow_515: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_515: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_516 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_516 .L_small_initial_partial_block_516: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_516: orq %r8,%r8 je .L_after_reduction_516 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_516: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_6_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_517 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_517 .L_16_blocks_overflow_517: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_517: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_518 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_518 .L_small_initial_partial_block_518: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_518: orq %r8,%r8 je .L_after_reduction_518 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_518: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_7_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_519 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_519 .L_16_blocks_overflow_519: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_519: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_520 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_520 .L_small_initial_partial_block_520: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_520: orq %r8,%r8 je .L_after_reduction_520 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_520: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_8_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_521 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_521 .L_16_blocks_overflow_521: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_521: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_522 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_522 .L_small_initial_partial_block_522: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_522: orq %r8,%r8 je .L_after_reduction_522 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_522: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_9_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_523 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_523 .L_16_blocks_overflow_523: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_523: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_524 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_524 .L_small_initial_partial_block_524: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_524: orq %r8,%r8 je .L_after_reduction_524 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_524: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_10_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_525 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_525 .L_16_blocks_overflow_525: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_525: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_526 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_526 .L_small_initial_partial_block_526: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_526: orq %r8,%r8 je .L_after_reduction_526 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_526: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_11_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_527 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_527 .L_16_blocks_overflow_527: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_527: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_528 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_528 .L_small_initial_partial_block_528: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_528: orq %r8,%r8 je .L_after_reduction_528 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_528: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_12_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_529 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_529 .L_16_blocks_overflow_529: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_529: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_530 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_530 .L_small_initial_partial_block_530: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_530: orq %r8,%r8 je .L_after_reduction_530 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_530: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_13_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_531 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_531 .L_16_blocks_overflow_531: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_531: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_532 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_532 .L_small_initial_partial_block_532: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_532: orq %r8,%r8 je .L_after_reduction_532 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_532: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_14_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_533 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_533 .L_16_blocks_overflow_533: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_533: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_534 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_534 .L_small_initial_partial_block_534: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_534: orq %r8,%r8 je .L_after_reduction_534 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_534: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_15_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_535 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_535 .L_16_blocks_overflow_535: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_535: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_536 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_536 .L_small_initial_partial_block_536: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_536: orq %r8,%r8 je .L_after_reduction_536 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_536: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_16_506: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_537 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_537 .L_16_blocks_overflow_537: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_537: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_538: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_538: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_538: jmp .L_last_blocks_done_506 .L_last_num_blocks_is_0_506: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_506: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_497 .L_encrypt_32_blocks_497: cmpb $240,%r15b jae .L_16_blocks_overflow_539 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_539 .L_16_blocks_overflow_539: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_539: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_540 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_540 .L_16_blocks_overflow_540: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_540: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_541 cmpl $8,%r10d je .L_last_num_blocks_is_8_541 jb .L_last_num_blocks_is_7_1_541 cmpl $12,%r10d je .L_last_num_blocks_is_12_541 jb .L_last_num_blocks_is_11_9_541 cmpl $15,%r10d je .L_last_num_blocks_is_15_541 ja .L_last_num_blocks_is_16_541 cmpl $14,%r10d je .L_last_num_blocks_is_14_541 jmp .L_last_num_blocks_is_13_541 .L_last_num_blocks_is_11_9_541: cmpl $10,%r10d je .L_last_num_blocks_is_10_541 ja .L_last_num_blocks_is_11_541 jmp .L_last_num_blocks_is_9_541 .L_last_num_blocks_is_7_1_541: cmpl $4,%r10d je .L_last_num_blocks_is_4_541 jb .L_last_num_blocks_is_3_1_541 cmpl $6,%r10d ja .L_last_num_blocks_is_7_541 je .L_last_num_blocks_is_6_541 jmp .L_last_num_blocks_is_5_541 .L_last_num_blocks_is_3_1_541: cmpl $2,%r10d ja .L_last_num_blocks_is_3_541 je .L_last_num_blocks_is_2_541 .L_last_num_blocks_is_1_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_542 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_542 .L_16_blocks_overflow_542: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_542: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_543 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_543 .L_small_initial_partial_block_543: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_543 .L_small_initial_compute_done_543: .L_after_reduction_543: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_2_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_544 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_544 .L_16_blocks_overflow_544: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_544: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_545 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_545 .L_small_initial_partial_block_545: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_545: orq %r8,%r8 je .L_after_reduction_545 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_545: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_3_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_546 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_546 .L_16_blocks_overflow_546: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_546: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_547 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_547 .L_small_initial_partial_block_547: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_547: orq %r8,%r8 je .L_after_reduction_547 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_547: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_4_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_548 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_548 .L_16_blocks_overflow_548: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_548: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_549 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_549 .L_small_initial_partial_block_549: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_549: orq %r8,%r8 je .L_after_reduction_549 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_549: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_5_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_550 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_550 .L_16_blocks_overflow_550: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_550: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_551 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_551 .L_small_initial_partial_block_551: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_551: orq %r8,%r8 je .L_after_reduction_551 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_551: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_6_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_552 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_552 .L_16_blocks_overflow_552: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_552: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_553 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_553 .L_small_initial_partial_block_553: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_553: orq %r8,%r8 je .L_after_reduction_553 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_553: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_7_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_554 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_554 .L_16_blocks_overflow_554: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_554: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_555 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_555 .L_small_initial_partial_block_555: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_555: orq %r8,%r8 je .L_after_reduction_555 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_555: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_8_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_556 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_556 .L_16_blocks_overflow_556: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_556: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_557 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_557 .L_small_initial_partial_block_557: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_557: orq %r8,%r8 je .L_after_reduction_557 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_557: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_9_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_558 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_558 .L_16_blocks_overflow_558: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_558: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_559 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_559 .L_small_initial_partial_block_559: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_559: orq %r8,%r8 je .L_after_reduction_559 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_559: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_10_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_560 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_560 .L_16_blocks_overflow_560: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_560: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_561 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_561 .L_small_initial_partial_block_561: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_561: orq %r8,%r8 je .L_after_reduction_561 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_561: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_11_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_562 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_562 .L_16_blocks_overflow_562: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_562: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_563 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_563 .L_small_initial_partial_block_563: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_563: orq %r8,%r8 je .L_after_reduction_563 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_563: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_12_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_564 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_564 .L_16_blocks_overflow_564: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_564: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_565 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_565 .L_small_initial_partial_block_565: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_565: orq %r8,%r8 je .L_after_reduction_565 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_565: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_13_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_566 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_566 .L_16_blocks_overflow_566: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_566: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_567 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_567 .L_small_initial_partial_block_567: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_567: orq %r8,%r8 je .L_after_reduction_567 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_567: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_14_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_568 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_568 .L_16_blocks_overflow_568: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_568: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_569 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_569 .L_small_initial_partial_block_569: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_569: orq %r8,%r8 je .L_after_reduction_569 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_569: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_15_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_570 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_570 .L_16_blocks_overflow_570: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_570: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_571 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_571 .L_small_initial_partial_block_571: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_571: orq %r8,%r8 je .L_after_reduction_571 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_571: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_16_541: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_572 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_572 .L_16_blocks_overflow_572: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_572: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_573: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_573: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_573: jmp .L_last_blocks_done_541 .L_last_num_blocks_is_0_541: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_541: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_497 .L_encrypt_16_blocks_497: cmpb $240,%r15b jae .L_16_blocks_overflow_574 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_574 .L_16_blocks_overflow_574: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_574: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_575 cmpl $8,%r10d je .L_last_num_blocks_is_8_575 jb .L_last_num_blocks_is_7_1_575 cmpl $12,%r10d je .L_last_num_blocks_is_12_575 jb .L_last_num_blocks_is_11_9_575 cmpl $15,%r10d je .L_last_num_blocks_is_15_575 ja .L_last_num_blocks_is_16_575 cmpl $14,%r10d je .L_last_num_blocks_is_14_575 jmp .L_last_num_blocks_is_13_575 .L_last_num_blocks_is_11_9_575: cmpl $10,%r10d je .L_last_num_blocks_is_10_575 ja .L_last_num_blocks_is_11_575 jmp .L_last_num_blocks_is_9_575 .L_last_num_blocks_is_7_1_575: cmpl $4,%r10d je .L_last_num_blocks_is_4_575 jb .L_last_num_blocks_is_3_1_575 cmpl $6,%r10d ja .L_last_num_blocks_is_7_575 je .L_last_num_blocks_is_6_575 jmp .L_last_num_blocks_is_5_575 .L_last_num_blocks_is_3_1_575: cmpl $2,%r10d ja .L_last_num_blocks_is_3_575 je .L_last_num_blocks_is_2_575 .L_last_num_blocks_is_1_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_576 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_576 .L_16_blocks_overflow_576: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_576: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_577 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_577 .L_small_initial_partial_block_577: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_577 .L_small_initial_compute_done_577: .L_after_reduction_577: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_2_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_578 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_578 .L_16_blocks_overflow_578: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_578: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_579 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_579 .L_small_initial_partial_block_579: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_579: orq %r8,%r8 je .L_after_reduction_579 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_579: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_3_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_580 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_580 .L_16_blocks_overflow_580: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_580: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_581 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_581 .L_small_initial_partial_block_581: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_581: orq %r8,%r8 je .L_after_reduction_581 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_581: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_4_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_582 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_582 .L_16_blocks_overflow_582: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_582: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_583 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_583 .L_small_initial_partial_block_583: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_583: orq %r8,%r8 je .L_after_reduction_583 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_583: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_5_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_584 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_584 .L_16_blocks_overflow_584: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_584: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_585 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_585 .L_small_initial_partial_block_585: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_585: orq %r8,%r8 je .L_after_reduction_585 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_585: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_6_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_586 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_586 .L_16_blocks_overflow_586: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_586: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_587 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_587 .L_small_initial_partial_block_587: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_587: orq %r8,%r8 je .L_after_reduction_587 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_587: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_7_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_588 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_588 .L_16_blocks_overflow_588: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_588: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_589 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_589 .L_small_initial_partial_block_589: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_589: orq %r8,%r8 je .L_after_reduction_589 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_589: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_8_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_590 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_590 .L_16_blocks_overflow_590: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_590: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_591 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_591 .L_small_initial_partial_block_591: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_591: orq %r8,%r8 je .L_after_reduction_591 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_591: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_9_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_592 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_592 .L_16_blocks_overflow_592: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_592: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_593 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_593 .L_small_initial_partial_block_593: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_593: orq %r8,%r8 je .L_after_reduction_593 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_593: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_10_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_594 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_594 .L_16_blocks_overflow_594: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_594: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_595 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_595 .L_small_initial_partial_block_595: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_595: orq %r8,%r8 je .L_after_reduction_595 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_595: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_11_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_596 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_596 .L_16_blocks_overflow_596: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_596: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_597 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_597 .L_small_initial_partial_block_597: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_597: orq %r8,%r8 je .L_after_reduction_597 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_597: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_12_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_598 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_598 .L_16_blocks_overflow_598: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_598: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_599 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_599 .L_small_initial_partial_block_599: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_599: orq %r8,%r8 je .L_after_reduction_599 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_599: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_13_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_600 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_600 .L_16_blocks_overflow_600: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_600: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_601 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_601 .L_small_initial_partial_block_601: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_601: orq %r8,%r8 je .L_after_reduction_601 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_601: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_14_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_602 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_602 .L_16_blocks_overflow_602: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_602: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_603 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_603 .L_small_initial_partial_block_603: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_603: orq %r8,%r8 je .L_after_reduction_603 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_603: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_15_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_604 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_604 .L_16_blocks_overflow_604: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_604: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_605 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_605 .L_small_initial_partial_block_605: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_605: orq %r8,%r8 je .L_after_reduction_605 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_605: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_16_575: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_606 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_606 .L_16_blocks_overflow_606: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_606: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_607: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_607: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_607: jmp .L_last_blocks_done_575 .L_last_num_blocks_is_0_575: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_575: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_497 .L_message_below_32_blocks_497: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_608 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_608: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_609 cmpl $8,%r10d je .L_last_num_blocks_is_8_609 jb .L_last_num_blocks_is_7_1_609 cmpl $12,%r10d je .L_last_num_blocks_is_12_609 jb .L_last_num_blocks_is_11_9_609 cmpl $15,%r10d je .L_last_num_blocks_is_15_609 ja .L_last_num_blocks_is_16_609 cmpl $14,%r10d je .L_last_num_blocks_is_14_609 jmp .L_last_num_blocks_is_13_609 .L_last_num_blocks_is_11_9_609: cmpl $10,%r10d je .L_last_num_blocks_is_10_609 ja .L_last_num_blocks_is_11_609 jmp .L_last_num_blocks_is_9_609 .L_last_num_blocks_is_7_1_609: cmpl $4,%r10d je .L_last_num_blocks_is_4_609 jb .L_last_num_blocks_is_3_1_609 cmpl $6,%r10d ja .L_last_num_blocks_is_7_609 je .L_last_num_blocks_is_6_609 jmp .L_last_num_blocks_is_5_609 .L_last_num_blocks_is_3_1_609: cmpl $2,%r10d ja .L_last_num_blocks_is_3_609 je .L_last_num_blocks_is_2_609 .L_last_num_blocks_is_1_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_610 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_610 .L_16_blocks_overflow_610: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_610: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_611 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_611 .L_small_initial_partial_block_611: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_611 .L_small_initial_compute_done_611: .L_after_reduction_611: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_2_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_612 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_612 .L_16_blocks_overflow_612: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_612: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_613 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_613 .L_small_initial_partial_block_613: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_613: orq %r8,%r8 je .L_after_reduction_613 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_613: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_3_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_614 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_614 .L_16_blocks_overflow_614: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_614: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_615 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_615 .L_small_initial_partial_block_615: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_615: orq %r8,%r8 je .L_after_reduction_615 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_615: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_4_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_616 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_616 .L_16_blocks_overflow_616: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_616: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_617 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_617 .L_small_initial_partial_block_617: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_617: orq %r8,%r8 je .L_after_reduction_617 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_617: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_5_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_618 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_618 .L_16_blocks_overflow_618: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_618: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_619 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_619 .L_small_initial_partial_block_619: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_619: orq %r8,%r8 je .L_after_reduction_619 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_619: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_6_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_620 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_620 .L_16_blocks_overflow_620: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_620: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_621 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_621 .L_small_initial_partial_block_621: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_621: orq %r8,%r8 je .L_after_reduction_621 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_621: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_7_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_622 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_622 .L_16_blocks_overflow_622: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_622: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_623 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_623 .L_small_initial_partial_block_623: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_623: orq %r8,%r8 je .L_after_reduction_623 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_623: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_8_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_624 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_624 .L_16_blocks_overflow_624: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_624: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_625 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_625 .L_small_initial_partial_block_625: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_625: orq %r8,%r8 je .L_after_reduction_625 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_625: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_9_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_626 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_626 .L_16_blocks_overflow_626: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_626: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_627 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_627 .L_small_initial_partial_block_627: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_627: orq %r8,%r8 je .L_after_reduction_627 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_627: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_10_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_628 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_628 .L_16_blocks_overflow_628: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_628: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_629 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_629 .L_small_initial_partial_block_629: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_629: orq %r8,%r8 je .L_after_reduction_629 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_629: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_11_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_630 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_630 .L_16_blocks_overflow_630: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_630: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_631 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_631 .L_small_initial_partial_block_631: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_631: orq %r8,%r8 je .L_after_reduction_631 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_631: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_12_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_632 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_632 .L_16_blocks_overflow_632: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_632: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_633 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_633 .L_small_initial_partial_block_633: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_633: orq %r8,%r8 je .L_after_reduction_633 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_633: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_13_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_634 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_634 .L_16_blocks_overflow_634: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_634: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_635 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_635 .L_small_initial_partial_block_635: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_635: orq %r8,%r8 je .L_after_reduction_635 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_635: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_14_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_636 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_636 .L_16_blocks_overflow_636: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_636: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_637 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_637 .L_small_initial_partial_block_637: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_637: orq %r8,%r8 je .L_after_reduction_637 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_637: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_15_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_638 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_638 .L_16_blocks_overflow_638: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_638: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_639 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_639 .L_small_initial_partial_block_639: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_639: orq %r8,%r8 je .L_after_reduction_639 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_639: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_16_609: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_640 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_640 .L_16_blocks_overflow_640: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_640: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_641: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_641: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_641: jmp .L_last_blocks_done_609 .L_last_num_blocks_is_0_609: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_609: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_497 .L_message_below_equal_16_blocks_497: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_642 jl .L_small_initial_num_blocks_is_7_1_642 cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_642 jl .L_small_initial_num_blocks_is_11_9_642 cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_642 cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_642 cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_642 jmp .L_small_initial_num_blocks_is_13_642 .L_small_initial_num_blocks_is_11_9_642: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_642 cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_642 jmp .L_small_initial_num_blocks_is_9_642 .L_small_initial_num_blocks_is_7_1_642: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_642 jl .L_small_initial_num_blocks_is_3_1_642 cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_642 cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_642 jmp .L_small_initial_num_blocks_is_5_642 .L_small_initial_num_blocks_is_3_1_642: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_642 cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_642 .L_small_initial_num_blocks_is_1_642: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_643 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_643 .L_small_initial_partial_block_643: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_643 .L_small_initial_compute_done_643: .L_after_reduction_643: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_2_642: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_644 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_644 .L_small_initial_partial_block_644: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_644: orq %r8,%r8 je .L_after_reduction_644 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_644: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_3_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_645 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_645 .L_small_initial_partial_block_645: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_645: orq %r8,%r8 je .L_after_reduction_645 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_645: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_4_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_646 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_646 .L_small_initial_partial_block_646: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_646: orq %r8,%r8 je .L_after_reduction_646 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_646: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_5_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_647 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_647 .L_small_initial_partial_block_647: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_647: orq %r8,%r8 je .L_after_reduction_647 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_647: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_6_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_648 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_648 .L_small_initial_partial_block_648: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_648: orq %r8,%r8 je .L_after_reduction_648 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_648: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_7_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_649 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_649 .L_small_initial_partial_block_649: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_649: orq %r8,%r8 je .L_after_reduction_649 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_649: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_8_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_650 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_650 .L_small_initial_partial_block_650: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_650: orq %r8,%r8 je .L_after_reduction_650 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_650: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_9_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_651 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_651 .L_small_initial_partial_block_651: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_651: orq %r8,%r8 je .L_after_reduction_651 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_651: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_10_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_652 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_652 .L_small_initial_partial_block_652: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_652: orq %r8,%r8 je .L_after_reduction_652 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_652: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_11_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_653 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_653 .L_small_initial_partial_block_653: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_653: orq %r8,%r8 je .L_after_reduction_653 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_653: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_12_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_654 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_654 .L_small_initial_partial_block_654: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_654: orq %r8,%r8 je .L_after_reduction_654 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_654: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_13_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_655 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_655 .L_small_initial_partial_block_655: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_655: orq %r8,%r8 je .L_after_reduction_655 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_655: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_14_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_656 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_656 .L_small_initial_partial_block_656: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_656: orq %r8,%r8 je .L_after_reduction_656 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_656: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_15_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_657 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_657 .L_small_initial_partial_block_657: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_657: orq %r8,%r8 je .L_after_reduction_657 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_657: jmp .L_small_initial_blocks_encrypted_642 .L_small_initial_num_blocks_is_16_642: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_658: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_658: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_658: .L_small_initial_blocks_encrypted_642: .L_ghash_done_497: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_497: jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_192_avx512: orq %r8,%r8 je .L_enc_dec_done_659 xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_660 movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_660 subq %r13,%r12 .L_no_extra_mask_660: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_660 vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_660 .L_partial_incomplete_660: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_660: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_660: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_659 cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_659 vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_661 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_661 .L_next_16_overflow_661: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_661: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_662 vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_662: cmpq $512,%r8 jb .L_message_below_32_blocks_659 cmpb $240,%r15b jae .L_next_16_overflow_663 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_663 .L_next_16_overflow_663: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_663: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_664 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_664: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_659 .L_encrypt_big_nblocks_659: cmpb $240,%r15b jae .L_16_blocks_overflow_665 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_665 .L_16_blocks_overflow_665: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_665: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_666 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_666 .L_16_blocks_overflow_666: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_666: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_667 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_667 .L_16_blocks_overflow_667: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_667: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_659 .L_no_more_big_nblocks_659: cmpq $512,%r8 jae .L_encrypt_32_blocks_659 cmpq $256,%r8 jae .L_encrypt_16_blocks_659 .L_encrypt_0_blocks_ghash_32_659: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_668 cmpl $8,%r10d je .L_last_num_blocks_is_8_668 jb .L_last_num_blocks_is_7_1_668 cmpl $12,%r10d je .L_last_num_blocks_is_12_668 jb .L_last_num_blocks_is_11_9_668 cmpl $15,%r10d je .L_last_num_blocks_is_15_668 ja .L_last_num_blocks_is_16_668 cmpl $14,%r10d je .L_last_num_blocks_is_14_668 jmp .L_last_num_blocks_is_13_668 .L_last_num_blocks_is_11_9_668: cmpl $10,%r10d je .L_last_num_blocks_is_10_668 ja .L_last_num_blocks_is_11_668 jmp .L_last_num_blocks_is_9_668 .L_last_num_blocks_is_7_1_668: cmpl $4,%r10d je .L_last_num_blocks_is_4_668 jb .L_last_num_blocks_is_3_1_668 cmpl $6,%r10d ja .L_last_num_blocks_is_7_668 je .L_last_num_blocks_is_6_668 jmp .L_last_num_blocks_is_5_668 .L_last_num_blocks_is_3_1_668: cmpl $2,%r10d ja .L_last_num_blocks_is_3_668 je .L_last_num_blocks_is_2_668 .L_last_num_blocks_is_1_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_669 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_669 .L_16_blocks_overflow_669: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_669: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_670 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_670 .L_small_initial_partial_block_670: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_670 .L_small_initial_compute_done_670: .L_after_reduction_670: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_2_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_671 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_671 .L_16_blocks_overflow_671: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_671: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_672 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_672 .L_small_initial_partial_block_672: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_672: orq %r8,%r8 je .L_after_reduction_672 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_672: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_3_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_673 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_673 .L_16_blocks_overflow_673: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_673: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_674 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_674 .L_small_initial_partial_block_674: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_674: orq %r8,%r8 je .L_after_reduction_674 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_674: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_4_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_675 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_675 .L_16_blocks_overflow_675: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_675: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_676 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_676 .L_small_initial_partial_block_676: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_676: orq %r8,%r8 je .L_after_reduction_676 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_676: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_5_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_677 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_677 .L_16_blocks_overflow_677: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_677: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_678 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_678 .L_small_initial_partial_block_678: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_678: orq %r8,%r8 je .L_after_reduction_678 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_678: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_6_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_679 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_679 .L_16_blocks_overflow_679: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_679: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_680 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_680 .L_small_initial_partial_block_680: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_680: orq %r8,%r8 je .L_after_reduction_680 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_680: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_7_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_681 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_681 .L_16_blocks_overflow_681: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_681: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_682 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_682 .L_small_initial_partial_block_682: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_682: orq %r8,%r8 je .L_after_reduction_682 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_682: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_8_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_683 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_683 .L_16_blocks_overflow_683: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_683: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_684 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_684 .L_small_initial_partial_block_684: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_684: orq %r8,%r8 je .L_after_reduction_684 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_684: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_9_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_685 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_685 .L_16_blocks_overflow_685: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_685: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_686 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_686 .L_small_initial_partial_block_686: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_686: orq %r8,%r8 je .L_after_reduction_686 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_686: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_10_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_687 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_687 .L_16_blocks_overflow_687: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_687: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_688 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_688 .L_small_initial_partial_block_688: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_688: orq %r8,%r8 je .L_after_reduction_688 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_688: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_11_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_689 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_689 .L_16_blocks_overflow_689: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_689: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_690 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_690 .L_small_initial_partial_block_690: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_690: orq %r8,%r8 je .L_after_reduction_690 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_690: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_12_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_691 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_691 .L_16_blocks_overflow_691: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_691: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_692 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_692 .L_small_initial_partial_block_692: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_692: orq %r8,%r8 je .L_after_reduction_692 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_692: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_13_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_693 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_693 .L_16_blocks_overflow_693: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_693: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_694 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_694 .L_small_initial_partial_block_694: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_694: orq %r8,%r8 je .L_after_reduction_694 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_694: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_14_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_695 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_695 .L_16_blocks_overflow_695: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_695: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_696 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_696 .L_small_initial_partial_block_696: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_696: orq %r8,%r8 je .L_after_reduction_696 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_696: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_15_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_697 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_697 .L_16_blocks_overflow_697: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_697: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_698 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_698 .L_small_initial_partial_block_698: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_698: orq %r8,%r8 je .L_after_reduction_698 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_698: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_16_668: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_699 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_699 .L_16_blocks_overflow_699: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_699: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_700: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_700: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_700: jmp .L_last_blocks_done_668 .L_last_num_blocks_is_0_668: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_668: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_659 .L_encrypt_32_blocks_659: cmpb $240,%r15b jae .L_16_blocks_overflow_701 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_701 .L_16_blocks_overflow_701: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_701: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_702 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_702 .L_16_blocks_overflow_702: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_702: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_703 cmpl $8,%r10d je .L_last_num_blocks_is_8_703 jb .L_last_num_blocks_is_7_1_703 cmpl $12,%r10d je .L_last_num_blocks_is_12_703 jb .L_last_num_blocks_is_11_9_703 cmpl $15,%r10d je .L_last_num_blocks_is_15_703 ja .L_last_num_blocks_is_16_703 cmpl $14,%r10d je .L_last_num_blocks_is_14_703 jmp .L_last_num_blocks_is_13_703 .L_last_num_blocks_is_11_9_703: cmpl $10,%r10d je .L_last_num_blocks_is_10_703 ja .L_last_num_blocks_is_11_703 jmp .L_last_num_blocks_is_9_703 .L_last_num_blocks_is_7_1_703: cmpl $4,%r10d je .L_last_num_blocks_is_4_703 jb .L_last_num_blocks_is_3_1_703 cmpl $6,%r10d ja .L_last_num_blocks_is_7_703 je .L_last_num_blocks_is_6_703 jmp .L_last_num_blocks_is_5_703 .L_last_num_blocks_is_3_1_703: cmpl $2,%r10d ja .L_last_num_blocks_is_3_703 je .L_last_num_blocks_is_2_703 .L_last_num_blocks_is_1_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_704 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_704 .L_16_blocks_overflow_704: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_704: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_705 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_705 .L_small_initial_partial_block_705: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_705 .L_small_initial_compute_done_705: .L_after_reduction_705: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_2_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_706 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_706 .L_16_blocks_overflow_706: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_706: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_707 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_707 .L_small_initial_partial_block_707: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_707: orq %r8,%r8 je .L_after_reduction_707 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_707: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_3_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_708 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_708 .L_16_blocks_overflow_708: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_708: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_709 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_709 .L_small_initial_partial_block_709: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_709: orq %r8,%r8 je .L_after_reduction_709 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_709: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_4_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_710 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_710 .L_16_blocks_overflow_710: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_710: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_711 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_711 .L_small_initial_partial_block_711: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_711: orq %r8,%r8 je .L_after_reduction_711 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_711: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_5_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_712 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_712 .L_16_blocks_overflow_712: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_712: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_713 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_713 .L_small_initial_partial_block_713: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_713: orq %r8,%r8 je .L_after_reduction_713 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_713: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_6_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_714 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_714 .L_16_blocks_overflow_714: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_714: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_715 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_715 .L_small_initial_partial_block_715: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_715: orq %r8,%r8 je .L_after_reduction_715 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_715: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_7_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_716 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_716 .L_16_blocks_overflow_716: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_716: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_717 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_717 .L_small_initial_partial_block_717: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_717: orq %r8,%r8 je .L_after_reduction_717 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_717: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_8_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_718 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_718 .L_16_blocks_overflow_718: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_718: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_719 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_719 .L_small_initial_partial_block_719: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_719: orq %r8,%r8 je .L_after_reduction_719 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_719: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_9_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_720 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_720 .L_16_blocks_overflow_720: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_720: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_721 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_721 .L_small_initial_partial_block_721: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_721: orq %r8,%r8 je .L_after_reduction_721 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_721: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_10_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_722 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_722 .L_16_blocks_overflow_722: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_722: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_723 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_723 .L_small_initial_partial_block_723: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_723: orq %r8,%r8 je .L_after_reduction_723 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_723: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_11_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_724 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_724 .L_16_blocks_overflow_724: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_724: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_725 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_725 .L_small_initial_partial_block_725: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_725: orq %r8,%r8 je .L_after_reduction_725 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_725: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_12_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_726 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_726 .L_16_blocks_overflow_726: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_726: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_727 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_727 .L_small_initial_partial_block_727: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_727: orq %r8,%r8 je .L_after_reduction_727 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_727: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_13_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_728 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_728 .L_16_blocks_overflow_728: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_728: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_729 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_729 .L_small_initial_partial_block_729: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_729: orq %r8,%r8 je .L_after_reduction_729 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_729: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_14_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_730 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_730 .L_16_blocks_overflow_730: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_730: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_731 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_731 .L_small_initial_partial_block_731: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_731: orq %r8,%r8 je .L_after_reduction_731 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_731: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_15_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_732 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_732 .L_16_blocks_overflow_732: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_732: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_733 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_733 .L_small_initial_partial_block_733: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_733: orq %r8,%r8 je .L_after_reduction_733 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_733: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_16_703: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_734 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_734 .L_16_blocks_overflow_734: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_734: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_735: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_735: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_735: jmp .L_last_blocks_done_703 .L_last_num_blocks_is_0_703: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_703: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_659 .L_encrypt_16_blocks_659: cmpb $240,%r15b jae .L_16_blocks_overflow_736 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_736 .L_16_blocks_overflow_736: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_736: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_737 cmpl $8,%r10d je .L_last_num_blocks_is_8_737 jb .L_last_num_blocks_is_7_1_737 cmpl $12,%r10d je .L_last_num_blocks_is_12_737 jb .L_last_num_blocks_is_11_9_737 cmpl $15,%r10d je .L_last_num_blocks_is_15_737 ja .L_last_num_blocks_is_16_737 cmpl $14,%r10d je .L_last_num_blocks_is_14_737 jmp .L_last_num_blocks_is_13_737 .L_last_num_blocks_is_11_9_737: cmpl $10,%r10d je .L_last_num_blocks_is_10_737 ja .L_last_num_blocks_is_11_737 jmp .L_last_num_blocks_is_9_737 .L_last_num_blocks_is_7_1_737: cmpl $4,%r10d je .L_last_num_blocks_is_4_737 jb .L_last_num_blocks_is_3_1_737 cmpl $6,%r10d ja .L_last_num_blocks_is_7_737 je .L_last_num_blocks_is_6_737 jmp .L_last_num_blocks_is_5_737 .L_last_num_blocks_is_3_1_737: cmpl $2,%r10d ja .L_last_num_blocks_is_3_737 je .L_last_num_blocks_is_2_737 .L_last_num_blocks_is_1_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_738 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_738 .L_16_blocks_overflow_738: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_738: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_739 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_739 .L_small_initial_partial_block_739: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_739 .L_small_initial_compute_done_739: .L_after_reduction_739: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_2_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_740 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_740 .L_16_blocks_overflow_740: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_740: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_741 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_741 .L_small_initial_partial_block_741: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_741: orq %r8,%r8 je .L_after_reduction_741 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_741: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_3_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_742 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_742 .L_16_blocks_overflow_742: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_742: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_743 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_743 .L_small_initial_partial_block_743: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_743: orq %r8,%r8 je .L_after_reduction_743 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_743: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_4_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_744 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_744 .L_16_blocks_overflow_744: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_744: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_745 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_745 .L_small_initial_partial_block_745: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_745: orq %r8,%r8 je .L_after_reduction_745 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_745: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_5_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_746 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_746 .L_16_blocks_overflow_746: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_746: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_747 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_747 .L_small_initial_partial_block_747: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_747: orq %r8,%r8 je .L_after_reduction_747 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_747: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_6_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_748 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_748 .L_16_blocks_overflow_748: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_748: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_749 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_749 .L_small_initial_partial_block_749: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_749: orq %r8,%r8 je .L_after_reduction_749 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_749: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_7_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_750 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_750 .L_16_blocks_overflow_750: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_750: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_751 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_751 .L_small_initial_partial_block_751: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_751: orq %r8,%r8 je .L_after_reduction_751 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_751: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_8_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_752 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_752 .L_16_blocks_overflow_752: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_752: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_753 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_753 .L_small_initial_partial_block_753: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_753: orq %r8,%r8 je .L_after_reduction_753 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_753: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_9_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_754 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_754 .L_16_blocks_overflow_754: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_754: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_755 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_755 .L_small_initial_partial_block_755: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_755: orq %r8,%r8 je .L_after_reduction_755 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_755: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_10_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_756 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_756 .L_16_blocks_overflow_756: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_756: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_757 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_757 .L_small_initial_partial_block_757: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_757: orq %r8,%r8 je .L_after_reduction_757 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_757: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_11_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_758 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_758 .L_16_blocks_overflow_758: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_758: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_759 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_759 .L_small_initial_partial_block_759: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_759: orq %r8,%r8 je .L_after_reduction_759 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_759: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_12_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_760 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_760 .L_16_blocks_overflow_760: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_760: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_761 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_761 .L_small_initial_partial_block_761: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_761: orq %r8,%r8 je .L_after_reduction_761 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_761: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_13_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_762 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_762 .L_16_blocks_overflow_762: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_762: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_763 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_763 .L_small_initial_partial_block_763: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_763: orq %r8,%r8 je .L_after_reduction_763 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_763: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_14_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_764 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_764 .L_16_blocks_overflow_764: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_764: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_765 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_765 .L_small_initial_partial_block_765: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_765: orq %r8,%r8 je .L_after_reduction_765 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_765: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_15_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_766 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_766 .L_16_blocks_overflow_766: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_766: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_767 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_767 .L_small_initial_partial_block_767: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_767: orq %r8,%r8 je .L_after_reduction_767 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_767: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_16_737: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_768 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_768 .L_16_blocks_overflow_768: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_768: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_769: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_769: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_769: jmp .L_last_blocks_done_737 .L_last_num_blocks_is_0_737: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_737: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_659 .L_message_below_32_blocks_659: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_770 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_770: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_771 cmpl $8,%r10d je .L_last_num_blocks_is_8_771 jb .L_last_num_blocks_is_7_1_771 cmpl $12,%r10d je .L_last_num_blocks_is_12_771 jb .L_last_num_blocks_is_11_9_771 cmpl $15,%r10d je .L_last_num_blocks_is_15_771 ja .L_last_num_blocks_is_16_771 cmpl $14,%r10d je .L_last_num_blocks_is_14_771 jmp .L_last_num_blocks_is_13_771 .L_last_num_blocks_is_11_9_771: cmpl $10,%r10d je .L_last_num_blocks_is_10_771 ja .L_last_num_blocks_is_11_771 jmp .L_last_num_blocks_is_9_771 .L_last_num_blocks_is_7_1_771: cmpl $4,%r10d je .L_last_num_blocks_is_4_771 jb .L_last_num_blocks_is_3_1_771 cmpl $6,%r10d ja .L_last_num_blocks_is_7_771 je .L_last_num_blocks_is_6_771 jmp .L_last_num_blocks_is_5_771 .L_last_num_blocks_is_3_1_771: cmpl $2,%r10d ja .L_last_num_blocks_is_3_771 je .L_last_num_blocks_is_2_771 .L_last_num_blocks_is_1_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_772 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_772 .L_16_blocks_overflow_772: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_772: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_773 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_773 .L_small_initial_partial_block_773: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_773 .L_small_initial_compute_done_773: .L_after_reduction_773: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_2_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_774 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_774 .L_16_blocks_overflow_774: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_774: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_775 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_775 .L_small_initial_partial_block_775: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_775: orq %r8,%r8 je .L_after_reduction_775 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_775: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_3_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_776 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_776 .L_16_blocks_overflow_776: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_776: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_777 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_777 .L_small_initial_partial_block_777: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_777: orq %r8,%r8 je .L_after_reduction_777 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_777: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_4_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_778 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_778 .L_16_blocks_overflow_778: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_778: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_779 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_779 .L_small_initial_partial_block_779: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_779: orq %r8,%r8 je .L_after_reduction_779 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_779: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_5_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_780 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_780 .L_16_blocks_overflow_780: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_780: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_781 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_781 .L_small_initial_partial_block_781: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_781: orq %r8,%r8 je .L_after_reduction_781 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_781: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_6_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_782 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_782 .L_16_blocks_overflow_782: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_782: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_783 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_783 .L_small_initial_partial_block_783: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_783: orq %r8,%r8 je .L_after_reduction_783 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_783: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_7_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_784 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_784 .L_16_blocks_overflow_784: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_784: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_785 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_785 .L_small_initial_partial_block_785: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_785: orq %r8,%r8 je .L_after_reduction_785 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_785: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_8_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_786 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_786 .L_16_blocks_overflow_786: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_786: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_787 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_787 .L_small_initial_partial_block_787: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_787: orq %r8,%r8 je .L_after_reduction_787 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_787: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_9_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_788 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_788 .L_16_blocks_overflow_788: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_788: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_789 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_789 .L_small_initial_partial_block_789: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_789: orq %r8,%r8 je .L_after_reduction_789 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_789: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_10_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_790 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_790 .L_16_blocks_overflow_790: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_790: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_791 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_791 .L_small_initial_partial_block_791: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_791: orq %r8,%r8 je .L_after_reduction_791 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_791: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_11_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_792 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_792 .L_16_blocks_overflow_792: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_792: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_793 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_793 .L_small_initial_partial_block_793: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_793: orq %r8,%r8 je .L_after_reduction_793 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_793: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_12_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_794 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_794 .L_16_blocks_overflow_794: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_794: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_795 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_795 .L_small_initial_partial_block_795: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_795: orq %r8,%r8 je .L_after_reduction_795 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_795: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_13_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_796 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_796 .L_16_blocks_overflow_796: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_796: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_797 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_797 .L_small_initial_partial_block_797: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_797: orq %r8,%r8 je .L_after_reduction_797 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_797: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_14_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_798 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_798 .L_16_blocks_overflow_798: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_798: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_799 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_799 .L_small_initial_partial_block_799: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_799: orq %r8,%r8 je .L_after_reduction_799 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_799: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_15_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_800 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_800 .L_16_blocks_overflow_800: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_800: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_801 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_801 .L_small_initial_partial_block_801: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_801: orq %r8,%r8 je .L_after_reduction_801 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_801: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_16_771: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_802 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_802 .L_16_blocks_overflow_802: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_802: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_803: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_803: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_803: jmp .L_last_blocks_done_771 .L_last_num_blocks_is_0_771: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_771: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_659 .L_message_below_equal_16_blocks_659: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_804 jl .L_small_initial_num_blocks_is_7_1_804 cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_804 jl .L_small_initial_num_blocks_is_11_9_804 cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_804 cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_804 cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_804 jmp .L_small_initial_num_blocks_is_13_804 .L_small_initial_num_blocks_is_11_9_804: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_804 cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_804 jmp .L_small_initial_num_blocks_is_9_804 .L_small_initial_num_blocks_is_7_1_804: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_804 jl .L_small_initial_num_blocks_is_3_1_804 cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_804 cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_804 jmp .L_small_initial_num_blocks_is_5_804 .L_small_initial_num_blocks_is_3_1_804: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_804 cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_804 .L_small_initial_num_blocks_is_1_804: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_805 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_805 .L_small_initial_partial_block_805: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_805 .L_small_initial_compute_done_805: .L_after_reduction_805: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_2_804: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_806 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_806 .L_small_initial_partial_block_806: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_806: orq %r8,%r8 je .L_after_reduction_806 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_806: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_3_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_807 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_807 .L_small_initial_partial_block_807: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_807: orq %r8,%r8 je .L_after_reduction_807 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_807: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_4_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_808 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_808 .L_small_initial_partial_block_808: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_808: orq %r8,%r8 je .L_after_reduction_808 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_808: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_5_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_809 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_809 .L_small_initial_partial_block_809: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_809: orq %r8,%r8 je .L_after_reduction_809 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_809: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_6_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_810 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_810 .L_small_initial_partial_block_810: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_810: orq %r8,%r8 je .L_after_reduction_810 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_810: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_7_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_811 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_811 .L_small_initial_partial_block_811: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_811: orq %r8,%r8 je .L_after_reduction_811 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_811: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_8_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_812 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_812 .L_small_initial_partial_block_812: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_812: orq %r8,%r8 je .L_after_reduction_812 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_812: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_9_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_813 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_813 .L_small_initial_partial_block_813: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_813: orq %r8,%r8 je .L_after_reduction_813 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_813: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_10_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_814 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_814 .L_small_initial_partial_block_814: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_814: orq %r8,%r8 je .L_after_reduction_814 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_814: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_11_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_815 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_815 .L_small_initial_partial_block_815: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_815: orq %r8,%r8 je .L_after_reduction_815 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_815: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_12_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_816 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_816 .L_small_initial_partial_block_816: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_816: orq %r8,%r8 je .L_after_reduction_816 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_816: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_13_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_817 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_817 .L_small_initial_partial_block_817: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_817: orq %r8,%r8 je .L_after_reduction_817 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_817: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_14_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_818 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_818 .L_small_initial_partial_block_818: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_818: orq %r8,%r8 je .L_after_reduction_818 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_818: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_15_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_819 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_819 .L_small_initial_partial_block_819: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_819: orq %r8,%r8 je .L_after_reduction_819 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_819: jmp .L_small_initial_blocks_encrypted_804 .L_small_initial_num_blocks_is_16_804: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_820: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_820: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_820: .L_small_initial_blocks_encrypted_804: .L_ghash_done_659: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_659: jmp .Lexit_gcm_decrypt .align 32 .Laes_gcm_decrypt_256_avx512: orq %r8,%r8 je .L_enc_dec_done_821 xorq %r14,%r14 vmovdqu64 64(%rsi),%xmm14 movq (%rdx),%r11 orq %r11,%r11 je .L_partial_block_done_822 movl $16,%r10d leaq byte_len_to_mask_table(%rip),%r12 cmpq %r10,%r8 cmovcq %r8,%r10 kmovw (%r12,%r10,2),%k1 vmovdqu8 (%rcx),%xmm0{%k1}{z} vmovdqu64 16(%rsi),%xmm3 vmovdqu64 336(%rsi),%xmm4 leaq SHIFT_MASK(%rip),%r12 addq %r11,%r12 vmovdqu64 (%r12),%xmm5 vpshufb %xmm5,%xmm3,%xmm3 vmovdqa64 %xmm0,%xmm6 vpxorq %xmm0,%xmm3,%xmm3 leaq (%r8,%r11,1),%r13 subq $16,%r13 jge .L_no_extra_mask_822 subq %r13,%r12 .L_no_extra_mask_822: vmovdqu64 16(%r12),%xmm0 vpand %xmm0,%xmm3,%xmm3 vpand %xmm0,%xmm6,%xmm6 vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 vpshufb %xmm5,%xmm6,%xmm6 vpxorq %xmm6,%xmm14,%xmm14 cmpq $0,%r13 jl .L_partial_incomplete_822 vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 vpxorq %xmm11,%xmm14,%xmm14 vpsrldq $8,%xmm14,%xmm11 vpslldq $8,%xmm14,%xmm14 vpxorq %xmm11,%xmm7,%xmm7 vpxorq %xmm10,%xmm14,%xmm14 vmovdqu64 POLY2(%rip),%xmm11 vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 vpslldq $8,%xmm10,%xmm10 vpxorq %xmm10,%xmm14,%xmm14 vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 vpsrldq $4,%xmm10,%xmm10 vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm10,%xmm7,%xmm14 movq $0,(%rdx) movq %r11,%r12 movq $16,%r11 subq %r12,%r11 jmp .L_enc_dec_done_822 .L_partial_incomplete_822: addq %r8,(%rdx) movq %r8,%r11 .L_enc_dec_done_822: leaq byte_len_to_mask_table(%rip),%r12 kmovw (%r12,%r11,2),%k1 vmovdqu64 %xmm14,64(%rsi) movq %r9,%r12 vmovdqu8 %xmm3,(%r12){%k1} .L_partial_block_done_822: vmovdqu64 0(%rsi),%xmm2 subq %r11,%r8 je .L_enc_dec_done_821 cmpq $256,%r8 jbe .L_message_below_equal_16_blocks_821 vmovdqa64 SHUF_MASK(%rip),%zmm29 vmovdqa64 ddq_addbe_4444(%rip),%zmm27 vmovdqa64 ddq_addbe_1234(%rip),%zmm28 vmovd %xmm2,%r15d andl $255,%r15d vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpshufb %zmm29,%zmm2,%zmm2 cmpb $240,%r15b jae .L_next_16_overflow_823 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_823 .L_next_16_overflow_823: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_823: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 0(%rcx,%r11,1),%zmm0 vmovdqu8 64(%rcx,%r11,1),%zmm3 vmovdqu8 128(%rcx,%r11,1),%zmm4 vmovdqu8 192(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,0(%r10,%r11,1) vmovdqu8 %zmm10,64(%r10,%r11,1) vmovdqu8 %zmm11,128(%r10,%r11,1) vmovdqu8 %zmm12,192(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,768(%rsp) vmovdqa64 %zmm10,832(%rsp) vmovdqa64 %zmm11,896(%rsp) vmovdqa64 %zmm12,960(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_824 vmovdqu64 288(%rsi),%zmm0 vmovdqu64 %zmm0,704(%rsp) vmovdqu64 224(%rsi),%zmm3 vmovdqu64 %zmm3,640(%rsp) vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 160(%rsi),%zmm4 vmovdqu64 %zmm4,576(%rsp) vmovdqu64 96(%rsi),%zmm5 vmovdqu64 %zmm5,512(%rsp) .L_skip_hkeys_precomputation_824: cmpq $512,%r8 jb .L_message_below_32_blocks_821 cmpb $240,%r15b jae .L_next_16_overflow_825 vpaddd %zmm28,%zmm2,%zmm7 vpaddd %zmm27,%zmm7,%zmm10 vpaddd %zmm27,%zmm10,%zmm11 vpaddd %zmm27,%zmm11,%zmm12 jmp .L_next_16_ok_825 .L_next_16_overflow_825: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm12 vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 vpaddd %zmm12,%zmm7,%zmm10 vpaddd %zmm12,%zmm10,%zmm11 vpaddd %zmm12,%zmm11,%zmm12 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vpshufb %zmm29,%zmm12,%zmm12 .L_next_16_ok_825: vshufi64x2 $255,%zmm12,%zmm12,%zmm2 addb $16,%r15b vmovdqu8 256(%rcx,%r11,1),%zmm0 vmovdqu8 320(%rcx,%r11,1),%zmm3 vmovdqu8 384(%rcx,%r11,1),%zmm4 vmovdqu8 448(%rcx,%r11,1),%zmm5 vbroadcastf64x2 0(%rdi),%zmm6 vpxorq %zmm6,%zmm7,%zmm7 vpxorq %zmm6,%zmm10,%zmm10 vpxorq %zmm6,%zmm11,%zmm11 vpxorq %zmm6,%zmm12,%zmm12 vbroadcastf64x2 16(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 32(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 48(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 64(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 80(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 96(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 112(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 128(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 144(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 160(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 176(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 192(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 208(%rdi),%zmm6 vaesenc %zmm6,%zmm7,%zmm7 vaesenc %zmm6,%zmm10,%zmm10 vaesenc %zmm6,%zmm11,%zmm11 vaesenc %zmm6,%zmm12,%zmm12 vbroadcastf64x2 224(%rdi),%zmm6 vaesenclast %zmm6,%zmm7,%zmm7 vaesenclast %zmm6,%zmm10,%zmm10 vaesenclast %zmm6,%zmm11,%zmm11 vaesenclast %zmm6,%zmm12,%zmm12 vpxorq %zmm0,%zmm7,%zmm7 vpxorq %zmm3,%zmm10,%zmm10 vpxorq %zmm4,%zmm11,%zmm11 vpxorq %zmm5,%zmm12,%zmm12 movq %r9,%r10 vmovdqu8 %zmm7,256(%r10,%r11,1) vmovdqu8 %zmm10,320(%r10,%r11,1) vmovdqu8 %zmm11,384(%r10,%r11,1) vmovdqu8 %zmm12,448(%r10,%r11,1) vpshufb %zmm29,%zmm0,%zmm7 vpshufb %zmm29,%zmm3,%zmm10 vpshufb %zmm29,%zmm4,%zmm11 vpshufb %zmm29,%zmm5,%zmm12 vmovdqa64 %zmm7,1024(%rsp) vmovdqa64 %zmm10,1088(%rsp) vmovdqa64 %zmm11,1152(%rsp) vmovdqa64 %zmm12,1216(%rsp) testq %r14,%r14 jnz .L_skip_hkeys_precomputation_826 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,192(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,128(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,64(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,0(%rsp) .L_skip_hkeys_precomputation_826: movq $1,%r14 addq $512,%r11 subq $512,%r8 cmpq $768,%r8 jb .L_no_more_big_nblocks_821 .L_encrypt_big_nblocks_821: cmpb $240,%r15b jae .L_16_blocks_overflow_827 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_827 .L_16_blocks_overflow_827: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_827: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_828 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_828 .L_16_blocks_overflow_828: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_828: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_829 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_829 .L_16_blocks_overflow_829: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_829: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 512(%rcx,%r11,1),%zmm17 vmovdqu8 576(%rcx,%r11,1),%zmm19 vmovdqu8 640(%rcx,%r11,1),%zmm20 vmovdqu8 704(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpternlogq $0x96,%zmm15,%zmm12,%zmm6 vpxorq %zmm24,%zmm6,%zmm6 vpternlogq $0x96,%zmm10,%zmm13,%zmm7 vpxorq %zmm25,%zmm7,%zmm7 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vextracti64x4 $1,%zmm6,%ymm12 vpxorq %ymm12,%ymm6,%ymm6 vextracti32x4 $1,%ymm6,%xmm12 vpxorq %xmm12,%xmm6,%xmm6 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm6 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,512(%r10,%r11,1) vmovdqu8 %zmm3,576(%r10,%r11,1) vmovdqu8 %zmm4,640(%r10,%r11,1) vmovdqu8 %zmm5,704(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1024(%rsp) vmovdqa64 %zmm3,1088(%rsp) vmovdqa64 %zmm4,1152(%rsp) vmovdqa64 %zmm5,1216(%rsp) vmovdqa64 %zmm6,%zmm14 addq $768,%r11 subq $768,%r8 cmpq $768,%r8 jae .L_encrypt_big_nblocks_821 .L_no_more_big_nblocks_821: cmpq $512,%r8 jae .L_encrypt_32_blocks_821 cmpq $256,%r8 jae .L_encrypt_16_blocks_821 .L_encrypt_0_blocks_ghash_32_821: movl %r8d,%r10d andl $~15,%r10d movl $256,%ebx subl %r10d,%ebx vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 addl $256,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_830 cmpl $8,%r10d je .L_last_num_blocks_is_8_830 jb .L_last_num_blocks_is_7_1_830 cmpl $12,%r10d je .L_last_num_blocks_is_12_830 jb .L_last_num_blocks_is_11_9_830 cmpl $15,%r10d je .L_last_num_blocks_is_15_830 ja .L_last_num_blocks_is_16_830 cmpl $14,%r10d je .L_last_num_blocks_is_14_830 jmp .L_last_num_blocks_is_13_830 .L_last_num_blocks_is_11_9_830: cmpl $10,%r10d je .L_last_num_blocks_is_10_830 ja .L_last_num_blocks_is_11_830 jmp .L_last_num_blocks_is_9_830 .L_last_num_blocks_is_7_1_830: cmpl $4,%r10d je .L_last_num_blocks_is_4_830 jb .L_last_num_blocks_is_3_1_830 cmpl $6,%r10d ja .L_last_num_blocks_is_7_830 je .L_last_num_blocks_is_6_830 jmp .L_last_num_blocks_is_5_830 .L_last_num_blocks_is_3_1_830: cmpl $2,%r10d ja .L_last_num_blocks_is_3_830 je .L_last_num_blocks_is_2_830 .L_last_num_blocks_is_1_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_831 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_831 .L_16_blocks_overflow_831: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_831: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_832 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_832 .L_small_initial_partial_block_832: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_832 .L_small_initial_compute_done_832: .L_after_reduction_832: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_2_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_833 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_833 .L_16_blocks_overflow_833: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_833: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_834 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_834 .L_small_initial_partial_block_834: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_834: orq %r8,%r8 je .L_after_reduction_834 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_834: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_3_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_835 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_835 .L_16_blocks_overflow_835: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_835: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_836 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_836 .L_small_initial_partial_block_836: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_836: orq %r8,%r8 je .L_after_reduction_836 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_836: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_4_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_837 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_837 .L_16_blocks_overflow_837: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_837: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_838 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_838 .L_small_initial_partial_block_838: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_838: orq %r8,%r8 je .L_after_reduction_838 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_838: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_5_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_839 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_839 .L_16_blocks_overflow_839: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_839: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_840 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_840 .L_small_initial_partial_block_840: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_840: orq %r8,%r8 je .L_after_reduction_840 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_840: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_6_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_841 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_841 .L_16_blocks_overflow_841: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_841: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_842 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_842 .L_small_initial_partial_block_842: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_842: orq %r8,%r8 je .L_after_reduction_842 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_842: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_7_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_843 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_843 .L_16_blocks_overflow_843: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_843: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_844 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_844 .L_small_initial_partial_block_844: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_844: orq %r8,%r8 je .L_after_reduction_844 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_844: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_8_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_845 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_845 .L_16_blocks_overflow_845: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_845: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_846 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_846 .L_small_initial_partial_block_846: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_846: orq %r8,%r8 je .L_after_reduction_846 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_846: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_9_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_847 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_847 .L_16_blocks_overflow_847: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_847: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_848 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_848 .L_small_initial_partial_block_848: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_848: orq %r8,%r8 je .L_after_reduction_848 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_848: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_10_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_849 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_849 .L_16_blocks_overflow_849: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_849: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_850 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_850 .L_small_initial_partial_block_850: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_850: orq %r8,%r8 je .L_after_reduction_850 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_850: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_11_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_851 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_851 .L_16_blocks_overflow_851: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_851: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_852 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_852 .L_small_initial_partial_block_852: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_852: orq %r8,%r8 je .L_after_reduction_852 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_852: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_12_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_853 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_853 .L_16_blocks_overflow_853: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_853: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_854 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_854 .L_small_initial_partial_block_854: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_854: orq %r8,%r8 je .L_after_reduction_854 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_854: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_13_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_855 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_855 .L_16_blocks_overflow_855: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_855: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_856 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_856 .L_small_initial_partial_block_856: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_856: orq %r8,%r8 je .L_after_reduction_856 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_856: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_14_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_857 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_857 .L_16_blocks_overflow_857: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_857: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_858 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_858 .L_small_initial_partial_block_858: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_858: orq %r8,%r8 je .L_after_reduction_858 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_858: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_15_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_859 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_859 .L_16_blocks_overflow_859: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_859: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_860 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_860 .L_small_initial_partial_block_860: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_860: orq %r8,%r8 je .L_after_reduction_860 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_860: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_16_830: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_861 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_861 .L_16_blocks_overflow_861: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_861: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm14,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_862: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_862: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_862: jmp .L_last_blocks_done_830 .L_last_num_blocks_is_0_830: vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_830: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_821 .L_encrypt_32_blocks_821: cmpb $240,%r15b jae .L_16_blocks_overflow_863 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_863 .L_16_blocks_overflow_863: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_863: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) cmpb $240,%r15b jae .L_16_blocks_overflow_864 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_864 .L_16_blocks_overflow_864: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_864: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1024(%rsp),%zmm8 vmovdqu64 256(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 320(%rsp),%zmm18 vmovdqa64 1088(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 384(%rsp),%zmm1 vmovdqa64 1152(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 448(%rsp),%zmm18 vmovdqa64 1216(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 256(%rcx,%r11,1),%zmm17 vmovdqu8 320(%rcx,%r11,1),%zmm19 vmovdqu8 384(%rcx,%r11,1),%zmm20 vmovdqu8 448(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm15,%zmm10,%zmm26 vpternlogq $0x96,%zmm12,%zmm6,%zmm24 vpternlogq $0x96,%zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,256(%r10,%r11,1) vmovdqu8 %zmm3,320(%r10,%r11,1) vmovdqu8 %zmm4,384(%r10,%r11,1) vmovdqu8 %zmm5,448(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,768(%rsp) vmovdqa64 %zmm3,832(%rsp) vmovdqa64 %zmm4,896(%rsp) vmovdqa64 %zmm5,960(%rsp) vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 subq $512,%r8 addq $512,%r11 movl %r8d,%r10d andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_865 cmpl $8,%r10d je .L_last_num_blocks_is_8_865 jb .L_last_num_blocks_is_7_1_865 cmpl $12,%r10d je .L_last_num_blocks_is_12_865 jb .L_last_num_blocks_is_11_9_865 cmpl $15,%r10d je .L_last_num_blocks_is_15_865 ja .L_last_num_blocks_is_16_865 cmpl $14,%r10d je .L_last_num_blocks_is_14_865 jmp .L_last_num_blocks_is_13_865 .L_last_num_blocks_is_11_9_865: cmpl $10,%r10d je .L_last_num_blocks_is_10_865 ja .L_last_num_blocks_is_11_865 jmp .L_last_num_blocks_is_9_865 .L_last_num_blocks_is_7_1_865: cmpl $4,%r10d je .L_last_num_blocks_is_4_865 jb .L_last_num_blocks_is_3_1_865 cmpl $6,%r10d ja .L_last_num_blocks_is_7_865 je .L_last_num_blocks_is_6_865 jmp .L_last_num_blocks_is_5_865 .L_last_num_blocks_is_3_1_865: cmpl $2,%r10d ja .L_last_num_blocks_is_3_865 je .L_last_num_blocks_is_2_865 .L_last_num_blocks_is_1_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_866 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_866 .L_16_blocks_overflow_866: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_866: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_867 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_867 .L_small_initial_partial_block_867: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_867 .L_small_initial_compute_done_867: .L_after_reduction_867: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_2_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_868 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_868 .L_16_blocks_overflow_868: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_868: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_869 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_869 .L_small_initial_partial_block_869: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_869: orq %r8,%r8 je .L_after_reduction_869 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_869: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_3_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_870 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_870 .L_16_blocks_overflow_870: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_870: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_871 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_871 .L_small_initial_partial_block_871: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_871: orq %r8,%r8 je .L_after_reduction_871 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_871: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_4_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_872 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_872 .L_16_blocks_overflow_872: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_872: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_873 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_873 .L_small_initial_partial_block_873: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_873: orq %r8,%r8 je .L_after_reduction_873 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_873: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_5_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_874 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_874 .L_16_blocks_overflow_874: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_874: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_875 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_875 .L_small_initial_partial_block_875: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_875: orq %r8,%r8 je .L_after_reduction_875 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_875: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_6_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_876 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_876 .L_16_blocks_overflow_876: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_876: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_877 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_877 .L_small_initial_partial_block_877: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_877: orq %r8,%r8 je .L_after_reduction_877 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_877: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_7_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_878 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_878 .L_16_blocks_overflow_878: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_878: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_879 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_879 .L_small_initial_partial_block_879: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_879: orq %r8,%r8 je .L_after_reduction_879 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_879: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_8_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_880 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_880 .L_16_blocks_overflow_880: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_880: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_881 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_881 .L_small_initial_partial_block_881: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_881: orq %r8,%r8 je .L_after_reduction_881 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_881: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_9_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_882 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_882 .L_16_blocks_overflow_882: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_882: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_883 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_883 .L_small_initial_partial_block_883: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_883: orq %r8,%r8 je .L_after_reduction_883 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_883: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_10_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_884 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_884 .L_16_blocks_overflow_884: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_884: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_885 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_885 .L_small_initial_partial_block_885: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_885: orq %r8,%r8 je .L_after_reduction_885 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_885: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_11_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_886 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_886 .L_16_blocks_overflow_886: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_886: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_887 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_887 .L_small_initial_partial_block_887: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_887: orq %r8,%r8 je .L_after_reduction_887 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_887: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_12_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_888 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_888 .L_16_blocks_overflow_888: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_888: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_889 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_889 .L_small_initial_partial_block_889: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_889: orq %r8,%r8 je .L_after_reduction_889 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_889: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_13_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_890 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_890 .L_16_blocks_overflow_890: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_890: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_891 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_891 .L_small_initial_partial_block_891: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_891: orq %r8,%r8 je .L_after_reduction_891 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_891: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_14_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_892 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_892 .L_16_blocks_overflow_892: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_892: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_893 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_893 .L_small_initial_partial_block_893: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_893: orq %r8,%r8 je .L_after_reduction_893 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_893: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_15_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_894 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_894 .L_16_blocks_overflow_894: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_894: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_895 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_895 .L_small_initial_partial_block_895: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_895: orq %r8,%r8 je .L_after_reduction_895 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_895: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_16_865: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_896 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_896 .L_16_blocks_overflow_896: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_896: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_897: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_897: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_897: jmp .L_last_blocks_done_865 .L_last_num_blocks_is_0_865: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_865: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_821 .L_encrypt_16_blocks_821: cmpb $240,%r15b jae .L_16_blocks_overflow_898 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_898 .L_16_blocks_overflow_898: vpshufb %zmm29,%zmm2,%zmm2 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_898: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp),%zmm1 vshufi64x2 $255,%zmm5,%zmm5,%zmm2 addb $16,%r15b vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm6 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm15,%zmm10,%zmm26 vpxorq %zmm12,%zmm6,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1) vpshufb %zmm29,%zmm17,%zmm0 vpshufb %zmm29,%zmm19,%zmm3 vpshufb %zmm29,%zmm20,%zmm4 vpshufb %zmm29,%zmm21,%zmm5 vmovdqa64 %zmm0,1280(%rsp) vmovdqa64 %zmm3,1344(%rsp) vmovdqa64 %zmm4,1408(%rsp) vmovdqa64 %zmm5,1472(%rsp) vmovdqa64 1024(%rsp),%zmm13 vmovdqu64 256(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1088(%rsp),%zmm13 vmovdqu64 320(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1152(%rsp),%zmm13 vmovdqu64 384(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1216(%rsp),%zmm13 vmovdqu64 448(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 subq $256,%r8 addq $256,%r11 movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_899 cmpl $8,%r10d je .L_last_num_blocks_is_8_899 jb .L_last_num_blocks_is_7_1_899 cmpl $12,%r10d je .L_last_num_blocks_is_12_899 jb .L_last_num_blocks_is_11_9_899 cmpl $15,%r10d je .L_last_num_blocks_is_15_899 ja .L_last_num_blocks_is_16_899 cmpl $14,%r10d je .L_last_num_blocks_is_14_899 jmp .L_last_num_blocks_is_13_899 .L_last_num_blocks_is_11_9_899: cmpl $10,%r10d je .L_last_num_blocks_is_10_899 ja .L_last_num_blocks_is_11_899 jmp .L_last_num_blocks_is_9_899 .L_last_num_blocks_is_7_1_899: cmpl $4,%r10d je .L_last_num_blocks_is_4_899 jb .L_last_num_blocks_is_3_1_899 cmpl $6,%r10d ja .L_last_num_blocks_is_7_899 je .L_last_num_blocks_is_6_899 jmp .L_last_num_blocks_is_5_899 .L_last_num_blocks_is_3_1_899: cmpl $2,%r10d ja .L_last_num_blocks_is_3_899 je .L_last_num_blocks_is_2_899 .L_last_num_blocks_is_1_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_900 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_900 .L_16_blocks_overflow_900: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_900: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %xmm31,%xmm0,%xmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_901 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_901 .L_small_initial_partial_block_901: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_901 .L_small_initial_compute_done_901: .L_after_reduction_901: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_2_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_902 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_902 .L_16_blocks_overflow_902: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_902: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %ymm31,%ymm0,%ymm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_903 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_903 .L_small_initial_partial_block_903: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_903: orq %r8,%r8 je .L_after_reduction_903 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_903: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_3_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_904 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_904 .L_16_blocks_overflow_904: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_904: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_905 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_905 .L_small_initial_partial_block_905: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_905: orq %r8,%r8 je .L_after_reduction_905 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_905: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_4_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_906 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_906 .L_16_blocks_overflow_906: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_906: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_907 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_907 .L_small_initial_partial_block_907: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_907: orq %r8,%r8 je .L_after_reduction_907 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_907: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_5_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_908 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_908 .L_16_blocks_overflow_908: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_908: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_909 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_909 .L_small_initial_partial_block_909: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_909: orq %r8,%r8 je .L_after_reduction_909 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_909: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_6_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_910 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_910 .L_16_blocks_overflow_910: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_910: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_911 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_911 .L_small_initial_partial_block_911: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_911: orq %r8,%r8 je .L_after_reduction_911 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_911: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_7_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_912 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_912 .L_16_blocks_overflow_912: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_912: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_913 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_913 .L_small_initial_partial_block_913: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_913: orq %r8,%r8 je .L_after_reduction_913 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_913: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_8_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_914 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_914 .L_16_blocks_overflow_914: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_914: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_915 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_915 .L_small_initial_partial_block_915: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_915: orq %r8,%r8 je .L_after_reduction_915 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_915: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_9_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_916 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_916 .L_16_blocks_overflow_916: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_916: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_917 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_917 .L_small_initial_partial_block_917: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_917: orq %r8,%r8 je .L_after_reduction_917 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_917: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_10_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_918 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_918 .L_16_blocks_overflow_918: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_918: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_919 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_919 .L_small_initial_partial_block_919: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_919: orq %r8,%r8 je .L_after_reduction_919 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_919: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_11_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_920 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_920 .L_16_blocks_overflow_920: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_920: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_921 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_921 .L_small_initial_partial_block_921: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_921: orq %r8,%r8 je .L_after_reduction_921 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_921: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_12_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_922 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_922 .L_16_blocks_overflow_922: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_922: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_923 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_923 .L_small_initial_partial_block_923: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_923: orq %r8,%r8 je .L_after_reduction_923 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_923: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_13_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_924 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_924 .L_16_blocks_overflow_924: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_924: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_925 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_925 .L_small_initial_partial_block_925: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_925: orq %r8,%r8 je .L_after_reduction_925 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_925: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_14_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_926 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_926 .L_16_blocks_overflow_926: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_926: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_927 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_927 .L_small_initial_partial_block_927: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_927: orq %r8,%r8 je .L_after_reduction_927 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_927: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_15_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_928 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_928 .L_16_blocks_overflow_928: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_928: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_929 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_929 .L_small_initial_partial_block_929: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_929: orq %r8,%r8 je .L_after_reduction_929 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_929: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_16_899: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_930 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_930 .L_16_blocks_overflow_930: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_930: vbroadcastf64x2 0(%rdi),%zmm30 vmovdqa64 1280(%rsp),%zmm8 vmovdqu64 512(%rsp),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 576(%rsp),%zmm18 vmovdqa64 1344(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 640(%rsp),%zmm1 vmovdqa64 1408(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 704(%rsp),%zmm18 vmovdqa64 1472(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpternlogq $0x96,%zmm12,%zmm24,%zmm14 vpternlogq $0x96,%zmm13,%zmm25,%zmm7 vpternlogq $0x96,%zmm15,%zmm26,%zmm10 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vpsrldq $8,%zmm10,%zmm15 vpslldq $8,%zmm10,%zmm10 vmovdqa64 POLY2(%rip),%xmm16 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vpxorq %zmm15,%zmm14,%zmm14 vpxorq %zmm10,%zmm7,%zmm7 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vextracti64x4 $1,%zmm14,%ymm12 vpxorq %ymm12,%ymm14,%ymm14 vextracti32x4 $1,%ymm14,%xmm12 vpxorq %xmm12,%xmm14,%xmm14 vextracti64x4 $1,%zmm7,%ymm13 vpxorq %ymm13,%ymm7,%ymm7 vextracti32x4 $1,%ymm7,%xmm13 vpxorq %xmm13,%xmm7,%xmm7 vbroadcastf64x2 176(%rdi),%zmm31 vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 vpslldq $8,%xmm13,%xmm13 vpxorq %xmm13,%xmm7,%xmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 vpsrldq $4,%xmm12,%xmm12 vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 vpslldq $4,%xmm15,%xmm15 vpternlogq $0x96,%xmm12,%xmm15,%xmm14 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_931: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpxorq %zmm14,%zmm17,%zmm17 vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm31,%zmm5,%zmm5 vpxorq %zmm8,%zmm0,%zmm0 vpxorq %zmm22,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_931: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_931: jmp .L_last_blocks_done_899 .L_last_num_blocks_is_0_899: vmovdqa64 1280(%rsp),%zmm13 vmovdqu64 512(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1344(%rsp),%zmm13 vmovdqu64 576(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 1408(%rsp),%zmm13 vmovdqu64 640(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 1472(%rsp),%zmm13 vmovdqu64 704(%rsp),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_899: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_821 .L_message_below_32_blocks_821: subq $256,%r8 addq $256,%r11 movl %r8d,%r10d testq %r14,%r14 jnz .L_skip_hkeys_precomputation_932 vmovdqu64 640(%rsp),%zmm3 vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 vmovdqu64 576(%rsp),%zmm4 vmovdqu64 512(%rsp),%zmm5 vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,448(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,384(%rsp) vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 vpxorq %zmm10,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm10 vpslldq $8,%zmm4,%zmm4 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm4,%zmm4 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm4,%zmm4 vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 vpslldq $4,%zmm4,%zmm4 vpternlogq $0x96,%zmm7,%zmm6,%zmm4 vmovdqu64 %zmm4,320(%rsp) vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 vpxorq %zmm10,%zmm5,%zmm5 vpsrldq $8,%zmm5,%zmm10 vpslldq $8,%zmm5,%zmm5 vpxorq %zmm10,%zmm6,%zmm6 vpxorq %zmm7,%zmm5,%zmm5 vmovdqu64 POLY2(%rip),%zmm10 vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 vpslldq $8,%zmm7,%zmm7 vpxorq %zmm7,%zmm5,%zmm5 vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 vpsrldq $4,%zmm7,%zmm7 vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 vpslldq $4,%zmm5,%zmm5 vpternlogq $0x96,%zmm7,%zmm6,%zmm5 vmovdqu64 %zmm5,256(%rsp) .L_skip_hkeys_precomputation_932: movq $1,%r14 andl $~15,%r10d movl $512,%ebx subl %r10d,%ebx movl %r8d,%r10d addl $15,%r10d shrl $4,%r10d je .L_last_num_blocks_is_0_933 cmpl $8,%r10d je .L_last_num_blocks_is_8_933 jb .L_last_num_blocks_is_7_1_933 cmpl $12,%r10d je .L_last_num_blocks_is_12_933 jb .L_last_num_blocks_is_11_9_933 cmpl $15,%r10d je .L_last_num_blocks_is_15_933 ja .L_last_num_blocks_is_16_933 cmpl $14,%r10d je .L_last_num_blocks_is_14_933 jmp .L_last_num_blocks_is_13_933 .L_last_num_blocks_is_11_9_933: cmpl $10,%r10d je .L_last_num_blocks_is_10_933 ja .L_last_num_blocks_is_11_933 jmp .L_last_num_blocks_is_9_933 .L_last_num_blocks_is_7_1_933: cmpl $4,%r10d je .L_last_num_blocks_is_4_933 jb .L_last_num_blocks_is_3_1_933 cmpl $6,%r10d ja .L_last_num_blocks_is_7_933 je .L_last_num_blocks_is_6_933 jmp .L_last_num_blocks_is_5_933 .L_last_num_blocks_is_3_1_933: cmpl $2,%r10d ja .L_last_num_blocks_is_3_933 je .L_last_num_blocks_is_2_933 .L_last_num_blocks_is_1_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $255,%r15d jae .L_16_blocks_overflow_934 vpaddd %xmm28,%xmm2,%xmm0 jmp .L_16_blocks_ok_934 .L_16_blocks_overflow_934: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %xmm29,%xmm0,%xmm0 .L_16_blocks_ok_934: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %xmm30,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %xmm30,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %xmm31,%xmm0,%xmm0 vaesenclast %xmm30,%xmm0,%xmm0 vpxorq %xmm17,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %xmm29,%xmm17,%xmm17 vextracti32x4 $0,%zmm17,%xmm7 cmpq $16,%r8 jl .L_small_initial_partial_block_935 subq $16,%r8 movq $0,(%rdx) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_935 .L_small_initial_partial_block_935: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm0 vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 vpslldq $8,%xmm3,%xmm3 vpxorq %xmm3,%xmm25,%xmm3 vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm4,%xmm14 vpxorq %xmm7,%xmm14,%xmm14 jmp .L_after_reduction_935 .L_small_initial_compute_done_935: .L_after_reduction_935: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_2_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $254,%r15d jae .L_16_blocks_overflow_936 vpaddd %ymm28,%ymm2,%ymm0 jmp .L_16_blocks_ok_936 .L_16_blocks_overflow_936: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %ymm29,%ymm0,%ymm0 .L_16_blocks_ok_936: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %ymm30,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %ymm30,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %ymm31,%ymm0,%ymm0 vaesenclast %ymm30,%ymm0,%ymm0 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %ymm29,%ymm17,%ymm17 vextracti32x4 $1,%zmm17,%xmm7 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_937 subq $16,%r8 movq $0,(%rdx) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_937 .L_small_initial_partial_block_937: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_937: orq %r8,%r8 je .L_after_reduction_937 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_937: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_3_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $253,%r15d jae .L_16_blocks_overflow_938 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_938 .L_16_blocks_overflow_938: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_938: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $2,%zmm17,%xmm7 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_939 subq $16,%r8 movq $0,(%rdx) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_939 .L_small_initial_partial_block_939: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_939: orq %r8,%r8 je .L_after_reduction_939 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_939: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_4_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax kmovq (%r10,%rax,8),%k1 cmpl $252,%r15d jae .L_16_blocks_overflow_940 vpaddd %zmm28,%zmm2,%zmm0 jmp .L_16_blocks_ok_940 .L_16_blocks_overflow_940: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpshufb %zmm29,%zmm0,%zmm0 .L_16_blocks_ok_940: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm0,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenclast %zmm30,%zmm0,%zmm0 vpxorq %zmm17,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm17,%zmm17{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vextracti32x4 $3,%zmm17,%xmm7 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_941 subq $16,%r8 movq $0,(%rdx) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_941 .L_small_initial_partial_block_941: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpxorq %zmm26,%zmm4,%zmm4 vpxorq %zmm24,%zmm0,%zmm0 vpxorq %zmm25,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_941: orq %r8,%r8 je .L_after_reduction_941 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_941: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_5_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $251,%r15d jae .L_16_blocks_overflow_942 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %xmm27,%xmm0,%xmm3 jmp .L_16_blocks_ok_942 .L_16_blocks_overflow_942: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 .L_16_blocks_ok_942: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %xmm30,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %xmm30,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %xmm31,%xmm3,%xmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %xmm30,%xmm3,%xmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %xmm19,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %xmm29,%xmm19,%xmm19 vextracti32x4 $0,%zmm19,%xmm7 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_943 subq $16,%r8 movq $0,(%rdx) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_943 .L_small_initial_partial_block_943: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_943: orq %r8,%r8 je .L_after_reduction_943 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_943: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_6_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $250,%r15d jae .L_16_blocks_overflow_944 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %ymm27,%ymm0,%ymm3 jmp .L_16_blocks_ok_944 .L_16_blocks_overflow_944: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 .L_16_blocks_ok_944: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %ymm30,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %ymm30,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %ymm31,%ymm3,%ymm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %ymm30,%ymm3,%ymm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %ymm29,%ymm19,%ymm19 vextracti32x4 $1,%zmm19,%xmm7 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_945 subq $16,%r8 movq $0,(%rdx) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_945 .L_small_initial_partial_block_945: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_945: orq %r8,%r8 je .L_after_reduction_945 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_945: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_7_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $249,%r15d jae .L_16_blocks_overflow_946 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_946 .L_16_blocks_overflow_946: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_946: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $2,%zmm19,%xmm7 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_947 subq $16,%r8 movq $0,(%rdx) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_947 .L_small_initial_partial_block_947: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_947: orq %r8,%r8 je .L_after_reduction_947 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_947: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_8_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $64,%rax kmovq (%r10,%rax,8),%k1 cmpl $248,%r15d jae .L_16_blocks_overflow_948 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 jmp .L_16_blocks_ok_948 .L_16_blocks_overflow_948: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 .L_16_blocks_ok_948: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm3,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm19,%zmm19{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vextracti32x4 $3,%zmm19,%xmm7 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_949 subq $16,%r8 movq $0,(%rdx) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_949 .L_small_initial_partial_block_949: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_949: orq %r8,%r8 je .L_after_reduction_949 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_949: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_9_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $247,%r15d jae .L_16_blocks_overflow_950 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %xmm27,%xmm3,%xmm4 jmp .L_16_blocks_ok_950 .L_16_blocks_overflow_950: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 .L_16_blocks_ok_950: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %xmm30,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %xmm30,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %xmm31,%xmm4,%xmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %xmm30,%xmm4,%xmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %xmm20,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %xmm29,%xmm20,%xmm20 vextracti32x4 $0,%zmm20,%xmm7 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_951 subq $16,%r8 movq $0,(%rdx) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_951 .L_small_initial_partial_block_951: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_951: orq %r8,%r8 je .L_after_reduction_951 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_951: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_10_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $246,%r15d jae .L_16_blocks_overflow_952 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %ymm27,%ymm3,%ymm4 jmp .L_16_blocks_ok_952 .L_16_blocks_overflow_952: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 .L_16_blocks_ok_952: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %ymm30,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %ymm30,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %ymm31,%ymm4,%ymm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %ymm30,%ymm4,%ymm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %ymm20,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %ymm29,%ymm20,%ymm20 vextracti32x4 $1,%zmm20,%xmm7 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_953 subq $16,%r8 movq $0,(%rdx) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_953 .L_small_initial_partial_block_953: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_953: orq %r8,%r8 je .L_after_reduction_953 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_953: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_11_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $245,%r15d jae .L_16_blocks_overflow_954 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_954 .L_16_blocks_overflow_954: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_954: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $2,%zmm20,%xmm7 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_955 subq $16,%r8 movq $0,(%rdx) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_955 .L_small_initial_partial_block_955: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_955: orq %r8,%r8 je .L_after_reduction_955 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_955: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_12_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $128,%rax kmovq (%r10,%rax,8),%k1 cmpl $244,%r15d jae .L_16_blocks_overflow_956 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 jmp .L_16_blocks_ok_956 .L_16_blocks_overflow_956: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 .L_16_blocks_ok_956: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm4,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm20,%zmm20{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vextracti32x4 $3,%zmm20,%xmm7 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_957 subq $16,%r8 movq $0,(%rdx) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_957 .L_small_initial_partial_block_957: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vpxorq %zmm8,%zmm0,%zmm8 vpxorq %zmm22,%zmm3,%zmm22 vpxorq %zmm30,%zmm4,%zmm30 vpxorq %zmm31,%zmm5,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_957: orq %r8,%r8 je .L_after_reduction_957 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_957: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_13_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $243,%r15d jae .L_16_blocks_overflow_958 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %xmm27,%xmm4,%xmm5 jmp .L_16_blocks_ok_958 .L_16_blocks_overflow_958: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 .L_16_blocks_ok_958: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $0,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %xmm30,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %xmm30,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %xmm31,%xmm5,%xmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %xmm30,%xmm5,%xmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %xmm21,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %xmm29,%xmm21,%xmm21 vextracti32x4 $0,%zmm21,%xmm7 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_959 subq $16,%r8 movq $0,(%rdx) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_959 .L_small_initial_partial_block_959: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 160(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 224(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 288(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vpxorq %zmm26,%zmm30,%zmm30 vpxorq %zmm24,%zmm8,%zmm8 vpxorq %zmm25,%zmm22,%zmm22 vpxorq %zmm31,%zmm30,%zmm30 vpsrldq $8,%zmm30,%zmm4 vpslldq $8,%zmm30,%zmm5 vpxorq %zmm4,%zmm8,%zmm0 vpxorq %zmm5,%zmm22,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_959: orq %r8,%r8 je .L_after_reduction_959 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_959: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_14_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $242,%r15d jae .L_16_blocks_overflow_960 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %ymm27,%ymm4,%ymm5 jmp .L_16_blocks_ok_960 .L_16_blocks_overflow_960: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 .L_16_blocks_ok_960: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $1,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %ymm30,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %ymm30,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %ymm31,%ymm5,%ymm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %ymm30,%ymm5,%ymm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %ymm21,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %ymm29,%ymm21,%ymm21 vextracti32x4 $1,%zmm21,%xmm7 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_961 subq $16,%r8 movq $0,(%rdx) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_961 .L_small_initial_partial_block_961: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 144(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 208(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 272(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 336(%rsi),%xmm1 vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_961: orq %r8,%r8 je .L_after_reduction_961 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_961: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_15_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $241,%r15d jae .L_16_blocks_overflow_962 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_962 .L_16_blocks_overflow_962: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_962: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $2,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $2,%zmm21,%xmm7 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_963 subq $16,%r8 movq $0,(%rdx) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_963 .L_small_initial_partial_block_963: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 128(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 192(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 256(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 320(%rsi),%ymm1 vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_963: orq %r8,%r8 je .L_after_reduction_963 vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_963: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_16_933: leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%rax subq $192,%rax kmovq (%r10,%rax,8),%k1 cmpl $240,%r15d jae .L_16_blocks_overflow_964 vpaddd %zmm28,%zmm2,%zmm0 vpaddd %zmm27,%zmm0,%zmm3 vpaddd %zmm27,%zmm3,%zmm4 vpaddd %zmm27,%zmm4,%zmm5 jmp .L_16_blocks_ok_964 .L_16_blocks_overflow_964: vpshufb %zmm29,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vmovdqa64 ddq_add_4444(%rip),%zmm5 vpaddd %zmm5,%zmm0,%zmm3 vpaddd %zmm5,%zmm3,%zmm4 vpaddd %zmm5,%zmm4,%zmm5 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 .L_16_blocks_ok_964: vbroadcastf64x2 0(%rdi),%zmm30 vpxorq 768(%rsp),%zmm14,%zmm8 vmovdqu64 0(%rsp,%rbx,1),%zmm1 vextracti32x4 $3,%zmm5,%xmm2 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vbroadcastf64x2 16(%rdi),%zmm31 vmovdqu64 64(%rsp,%rbx,1),%zmm18 vmovdqa64 832(%rsp),%zmm22 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm30,%zmm3,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpxorq %zmm30,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm30 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 vmovdqu64 128(%rsp,%rbx,1),%zmm1 vmovdqa64 896(%rsp),%zmm8 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm18 vmovdqa64 960(%rsp),%zmm22 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm30 vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm31 vpternlogq $0x96,%zmm17,%zmm12,%zmm14 vpternlogq $0x96,%zmm19,%zmm13,%zmm7 vpternlogq $0x96,%zmm21,%zmm16,%zmm11 vpternlogq $0x96,%zmm20,%zmm15,%zmm10 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm30 vmovdqu8 0(%rcx,%r11,1),%zmm17 vmovdqu8 64(%rcx,%r11,1),%zmm19 vmovdqu8 128(%rcx,%r11,1),%zmm20 vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm31 vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm30 vpternlogq $0x96,%zmm16,%zmm11,%zmm10 vpxorq %zmm12,%zmm14,%zmm24 vpxorq %zmm13,%zmm7,%zmm25 vpxorq %zmm15,%zmm10,%zmm26 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm31 vaesenc %zmm30,%zmm0,%zmm0 vaesenc %zmm30,%zmm3,%zmm3 vaesenc %zmm30,%zmm4,%zmm4 vaesenc %zmm30,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm30 vaesenc %zmm31,%zmm0,%zmm0 vaesenc %zmm31,%zmm3,%zmm3 vaesenc %zmm31,%zmm4,%zmm4 vaesenc %zmm31,%zmm5,%zmm5 vaesenclast %zmm30,%zmm0,%zmm0 vaesenclast %zmm30,%zmm3,%zmm3 vaesenclast %zmm30,%zmm4,%zmm4 vaesenclast %zmm30,%zmm5,%zmm5 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vpxorq %zmm20,%zmm4,%zmm4 vpxorq %zmm21,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm11 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm21,%zmm21{%k1}{z} vpshufb %zmm29,%zmm17,%zmm17 vpshufb %zmm29,%zmm19,%zmm19 vpshufb %zmm29,%zmm20,%zmm20 vpshufb %zmm29,%zmm21,%zmm21 vextracti32x4 $3,%zmm21,%xmm7 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_965: movq %r8,(%rdx) vmovdqu64 %xmm11,16(%rsi) vmovdqu64 112(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 vmovdqu64 176(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 vmovdqu64 240(%rsi),%zmm1 vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm0,%zmm17,%zmm8 vpternlogq $0x96,%zmm3,%zmm19,%zmm22 vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 vpternlogq $0x96,%zmm4,%zmm17,%zmm30 vpternlogq $0x96,%zmm5,%zmm19,%zmm31 vmovdqu64 304(%rsi),%ymm1 vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 vpxorq %zmm30,%zmm4,%zmm4 vpternlogq $0x96,%zmm31,%zmm26,%zmm5 vpternlogq $0x96,%zmm8,%zmm24,%zmm0 vpternlogq $0x96,%zmm22,%zmm25,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm30 vpslldq $8,%zmm4,%zmm31 vpxorq %zmm30,%zmm0,%zmm0 vpxorq %zmm31,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm30 vpxorq %ymm30,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm30 vpxorq %xmm30,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm31 vpxorq %ymm31,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm31 vpxorq %xmm31,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm1 vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_965: vpxorq %xmm7,%xmm14,%xmm14 .L_after_reduction_965: jmp .L_last_blocks_done_933 .L_last_num_blocks_is_0_933: vmovdqa64 768(%rsp),%zmm13 vpxorq %zmm14,%zmm13,%zmm13 vmovdqu64 0(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 832(%rsp),%zmm13 vmovdqu64 64(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpxorq %zmm10,%zmm4,%zmm26 vpxorq %zmm6,%zmm0,%zmm24 vpxorq %zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vmovdqa64 896(%rsp),%zmm13 vmovdqu64 128(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 vmovdqa64 960(%rsp),%zmm13 vmovdqu64 192(%rsp,%rbx,1),%zmm12 vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 vpternlogq $0x96,%zmm10,%zmm4,%zmm26 vpternlogq $0x96,%zmm6,%zmm0,%zmm24 vpternlogq $0x96,%zmm7,%zmm3,%zmm25 vpternlogq $0x96,%zmm11,%zmm5,%zmm26 vpsrldq $8,%zmm26,%zmm0 vpslldq $8,%zmm26,%zmm3 vpxorq %zmm0,%zmm24,%zmm24 vpxorq %zmm3,%zmm25,%zmm25 vextracti64x4 $1,%zmm24,%ymm0 vpxorq %ymm0,%ymm24,%ymm24 vextracti32x4 $1,%ymm24,%xmm0 vpxorq %xmm0,%xmm24,%xmm24 vextracti64x4 $1,%zmm25,%ymm3 vpxorq %ymm3,%ymm25,%ymm25 vextracti32x4 $1,%ymm25,%xmm3 vpxorq %xmm3,%xmm25,%xmm25 vmovdqa64 POLY2(%rip),%xmm4 vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 vpslldq $8,%xmm0,%xmm0 vpxorq %xmm0,%xmm25,%xmm0 vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 vpsrldq $4,%xmm3,%xmm3 vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm24,%xmm3,%xmm14 .L_last_blocks_done_933: vpshufb %xmm29,%xmm2,%xmm2 jmp .L_ghash_done_821 .L_message_below_equal_16_blocks_821: movl %r8d,%r12d addl $15,%r12d shrl $4,%r12d cmpq $8,%r12 je .L_small_initial_num_blocks_is_8_966 jl .L_small_initial_num_blocks_is_7_1_966 cmpq $12,%r12 je .L_small_initial_num_blocks_is_12_966 jl .L_small_initial_num_blocks_is_11_9_966 cmpq $16,%r12 je .L_small_initial_num_blocks_is_16_966 cmpq $15,%r12 je .L_small_initial_num_blocks_is_15_966 cmpq $14,%r12 je .L_small_initial_num_blocks_is_14_966 jmp .L_small_initial_num_blocks_is_13_966 .L_small_initial_num_blocks_is_11_9_966: cmpq $11,%r12 je .L_small_initial_num_blocks_is_11_966 cmpq $10,%r12 je .L_small_initial_num_blocks_is_10_966 jmp .L_small_initial_num_blocks_is_9_966 .L_small_initial_num_blocks_is_7_1_966: cmpq $4,%r12 je .L_small_initial_num_blocks_is_4_966 jl .L_small_initial_num_blocks_is_3_1_966 cmpq $7,%r12 je .L_small_initial_num_blocks_is_7_966 cmpq $6,%r12 je .L_small_initial_num_blocks_is_6_966 jmp .L_small_initial_num_blocks_is_5_966 .L_small_initial_num_blocks_is_3_1_966: cmpq $3,%r12 je .L_small_initial_num_blocks_is_3_966 cmpq $2,%r12 je .L_small_initial_num_blocks_is_2_966 .L_small_initial_num_blocks_is_1_966: vmovdqa64 SHUF_MASK(%rip),%xmm29 vpaddd ONE(%rip),%xmm2,%xmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm0,%xmm2 vpshufb %xmm29,%xmm0,%xmm0 vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %xmm15,%xmm0,%xmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %xmm15,%xmm0,%xmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %xmm15,%xmm0,%xmm0 vpxorq %xmm6,%xmm0,%xmm0 vextracti32x4 $0,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %xmm29,%xmm6,%xmm6 vextracti32x4 $0,%zmm6,%xmm13 cmpq $16,%r8 jl .L_small_initial_partial_block_967 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_967 .L_small_initial_partial_block_967: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %xmm13,%xmm14,%xmm14 jmp .L_after_reduction_967 .L_small_initial_compute_done_967: .L_after_reduction_967: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_2_966: vmovdqa64 SHUF_MASK(%rip),%ymm29 vshufi64x2 $0,%ymm2,%ymm2,%ymm0 vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm0,%xmm2 vpshufb %ymm29,%ymm0,%ymm0 vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %ymm15,%ymm0,%ymm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %ymm15,%ymm0,%ymm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %ymm15,%ymm0,%ymm0 vpxorq %ymm6,%ymm0,%ymm0 vextracti32x4 $1,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %ymm29,%ymm6,%ymm6 vextracti32x4 $1,%zmm6,%xmm13 subq $16 * (2 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_968 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_968 .L_small_initial_partial_block_968: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_968: orq %r8,%r8 je .L_after_reduction_968 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_968: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_3_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $2,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $2,%zmm6,%xmm13 subq $16 * (3 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_969 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_969 .L_small_initial_partial_block_969: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_969: orq %r8,%r8 je .L_after_reduction_969 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_969: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_4_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm0,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vpxorq %zmm6,%zmm0,%zmm0 vextracti32x4 $3,%zmm0,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} vmovdqu8 %zmm0,%zmm0{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vextracti32x4 $3,%zmm6,%xmm13 subq $16 * (4 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_970 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_970 .L_small_initial_partial_block_970: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_970: orq %r8,%r8 je .L_after_reduction_970 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_970: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_5_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %xmm29,%xmm3,%xmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %xmm15,%xmm3,%xmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %xmm15,%xmm3,%xmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %xmm15,%xmm3,%xmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %xmm7,%xmm3,%xmm3 vextracti32x4 $0,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %xmm29,%xmm7,%xmm7 vextracti32x4 $0,%zmm7,%xmm13 subq $16 * (5 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_971 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_971 .L_small_initial_partial_block_971: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_971: orq %r8,%r8 je .L_after_reduction_971 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_971: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_6_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %ymm29,%ymm3,%ymm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %ymm15,%ymm3,%ymm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %ymm15,%ymm3,%ymm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %ymm15,%ymm3,%ymm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %ymm7,%ymm3,%ymm3 vextracti32x4 $1,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %ymm29,%ymm7,%ymm7 vextracti32x4 $1,%zmm7,%xmm13 subq $16 * (6 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_972 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_972 .L_small_initial_partial_block_972: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_972: orq %r8,%r8 je .L_after_reduction_972 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_972: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_7_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $2,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $2,%zmm7,%xmm13 subq $16 * (7 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_973 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_973 .L_small_initial_partial_block_973: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_973: orq %r8,%r8 je .L_after_reduction_973 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_973: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_8_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $64,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm3,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vextracti32x4 $3,%zmm3,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} vmovdqu8 %zmm3,%zmm3{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vextracti32x4 $3,%zmm7,%xmm13 subq $16 * (8 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_974 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_974 .L_small_initial_partial_block_974: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_974: orq %r8,%r8 je .L_after_reduction_974 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_974: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_9_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %xmm29,%xmm4,%xmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %xmm15,%xmm4,%xmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %xmm15,%xmm4,%xmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %xmm15,%xmm4,%xmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %xmm10,%xmm4,%xmm4 vextracti32x4 $0,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %xmm29,%xmm10,%xmm10 vextracti32x4 $0,%zmm10,%xmm13 subq $16 * (9 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_975 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_975 .L_small_initial_partial_block_975: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_975: orq %r8,%r8 je .L_after_reduction_975 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_975: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_10_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %ymm29,%ymm4,%ymm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %ymm15,%ymm4,%ymm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %ymm15,%ymm4,%ymm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %ymm15,%ymm4,%ymm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %ymm10,%ymm4,%ymm4 vextracti32x4 $1,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %ymm29,%ymm10,%ymm10 vextracti32x4 $1,%zmm10,%xmm13 subq $16 * (10 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_976 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_976 .L_small_initial_partial_block_976: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_976: orq %r8,%r8 je .L_after_reduction_976 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_976: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_11_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $2,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $2,%zmm10,%xmm13 subq $16 * (11 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_977 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_977 .L_small_initial_partial_block_977: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_977: orq %r8,%r8 je .L_after_reduction_977 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_977: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_12_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $128,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm4,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vextracti32x4 $3,%zmm4,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} vmovdqu8 %zmm4,%zmm4{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vextracti32x4 $3,%zmm10,%xmm13 subq $16 * (12 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_978 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_978 .L_small_initial_partial_block_978: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vpxorq %zmm15,%zmm0,%zmm15 vpxorq %zmm16,%zmm3,%zmm16 vpxorq %zmm17,%zmm4,%zmm17 vpxorq %zmm19,%zmm5,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_978: orq %r8,%r8 je .L_after_reduction_978 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_978: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_13_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $0,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %xmm29,%xmm5,%xmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %xmm15,%xmm5,%xmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %xmm15,%xmm5,%xmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %xmm15,%xmm5,%xmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %xmm11,%xmm5,%xmm5 vextracti32x4 $0,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %xmm29,%xmm11,%xmm11 vextracti32x4 $0,%zmm11,%xmm13 subq $16 * (13 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_979 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_979 .L_small_initial_partial_block_979: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 160(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 224(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 288(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vpxorq %zmm19,%zmm17,%zmm17 vpsrldq $8,%zmm17,%zmm4 vpslldq $8,%zmm17,%zmm5 vpxorq %zmm4,%zmm15,%zmm0 vpxorq %zmm5,%zmm16,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_979: orq %r8,%r8 je .L_after_reduction_979 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_979: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_14_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $1,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %ymm29,%ymm5,%ymm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %ymm15,%ymm5,%ymm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %ymm15,%ymm5,%ymm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %ymm15,%ymm5,%ymm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %ymm11,%ymm5,%ymm5 vextracti32x4 $1,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %ymm29,%ymm11,%ymm11 vextracti32x4 $1,%zmm11,%xmm13 subq $16 * (14 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_980 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_980 .L_small_initial_partial_block_980: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 144(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 208(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 272(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 336(%rsi),%xmm20 vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_980: orq %r8,%r8 je .L_after_reduction_980 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_980: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_15_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $2,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $2,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $2,%zmm11,%xmm13 subq $16 * (15 - 1),%r8 cmpq $16,%r8 jl .L_small_initial_partial_block_981 subq $16,%r8 movq $0,(%rdx) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 jmp .L_small_initial_compute_done_981 .L_small_initial_partial_block_981: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 128(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 192(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 256(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 320(%rsi),%ymm20 vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_981: orq %r8,%r8 je .L_after_reduction_981 vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_981: jmp .L_small_initial_blocks_encrypted_966 .L_small_initial_num_blocks_is_16_966: vmovdqa64 SHUF_MASK(%rip),%zmm29 vshufi64x2 $0,%zmm2,%zmm2,%zmm2 vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 leaq byte64_len_to_mask_table(%rip),%r10 movq %r8,%r15 subq $192,%r15 kmovq (%r10,%r15,8),%k1 vextracti32x4 $3,%zmm5,%xmm2 vpshufb %zmm29,%zmm0,%zmm0 vpshufb %zmm29,%zmm3,%zmm3 vpshufb %zmm29,%zmm4,%zmm4 vpshufb %zmm29,%zmm5,%zmm5 vmovdqu8 0(%rcx,%r11,1),%zmm6 vmovdqu8 64(%rcx,%r11,1),%zmm7 vmovdqu8 128(%rcx,%r11,1),%zmm10 vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} vbroadcastf64x2 0(%rdi),%zmm15 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm15,%zmm3,%zmm3 vpxorq %zmm15,%zmm4,%zmm4 vpxorq %zmm15,%zmm5,%zmm5 vbroadcastf64x2 16(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 32(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 48(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 64(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 80(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 96(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 112(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 128(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 144(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 160(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 176(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 192(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 208(%rdi),%zmm15 vaesenc %zmm15,%zmm0,%zmm0 vaesenc %zmm15,%zmm3,%zmm3 vaesenc %zmm15,%zmm4,%zmm4 vaesenc %zmm15,%zmm5,%zmm5 vbroadcastf64x2 224(%rdi),%zmm15 vaesenclast %zmm15,%zmm0,%zmm0 vaesenclast %zmm15,%zmm3,%zmm3 vaesenclast %zmm15,%zmm4,%zmm4 vaesenclast %zmm15,%zmm5,%zmm5 vpxorq %zmm6,%zmm0,%zmm0 vpxorq %zmm7,%zmm3,%zmm3 vpxorq %zmm10,%zmm4,%zmm4 vpxorq %zmm11,%zmm5,%zmm5 vextracti32x4 $3,%zmm5,%xmm12 movq %r9,%r10 vmovdqu8 %zmm0,0(%r10,%r11,1) vmovdqu8 %zmm3,64(%r10,%r11,1) vmovdqu8 %zmm4,128(%r10,%r11,1) vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} vmovdqu8 %zmm5,%zmm5{%k1}{z} vpshufb %zmm29,%zmm6,%zmm6 vpshufb %zmm29,%zmm7,%zmm7 vpshufb %zmm29,%zmm10,%zmm10 vpshufb %zmm29,%zmm11,%zmm11 vextracti32x4 $3,%zmm11,%xmm13 subq $16 * (16 - 1),%r8 .L_small_initial_partial_block_982: movq %r8,(%rdx) vmovdqu64 %xmm12,16(%rsi) vpxorq %zmm14,%zmm6,%zmm6 vmovdqu64 112(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 vmovdqu64 176(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 vmovdqu64 240(%rsi),%zmm20 vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm0,%zmm6,%zmm15 vpternlogq $0x96,%zmm3,%zmm7,%zmm16 vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 vpternlogq $0x96,%zmm4,%zmm6,%zmm17 vpternlogq $0x96,%zmm5,%zmm7,%zmm19 vmovdqu64 304(%rsi),%ymm20 vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 vpxorq %zmm17,%zmm4,%zmm4 vpxorq %zmm19,%zmm5,%zmm5 vpxorq %zmm15,%zmm0,%zmm0 vpxorq %zmm16,%zmm3,%zmm3 vpxorq %zmm5,%zmm4,%zmm4 vpsrldq $8,%zmm4,%zmm17 vpslldq $8,%zmm4,%zmm19 vpxorq %zmm17,%zmm0,%zmm0 vpxorq %zmm19,%zmm3,%zmm3 vextracti64x4 $1,%zmm0,%ymm17 vpxorq %ymm17,%ymm0,%ymm0 vextracti32x4 $1,%ymm0,%xmm17 vpxorq %xmm17,%xmm0,%xmm0 vextracti64x4 $1,%zmm3,%ymm19 vpxorq %ymm19,%ymm3,%ymm3 vextracti32x4 $1,%ymm3,%xmm19 vpxorq %xmm19,%xmm3,%xmm3 vmovdqa64 POLY2(%rip),%xmm20 vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm3,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 vpsrldq $4,%xmm5,%xmm5 vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 vpslldq $4,%xmm14,%xmm14 vpternlogq $0x96,%xmm0,%xmm5,%xmm14 .L_small_initial_compute_done_982: vpxorq %xmm13,%xmm14,%xmm14 .L_after_reduction_982: .L_small_initial_blocks_encrypted_966: .L_ghash_done_821: vmovdqu64 %xmm2,0(%rsi) vmovdqu64 %xmm14,64(%rsi) .L_enc_dec_done_821: jmp .Lexit_gcm_decrypt .Lexit_gcm_decrypt: cmpq $256,%r8 jbe .Lskip_hkeys_cleanup_983 vpxor %xmm0,%xmm0,%xmm0 vmovdqa64 %zmm0,0(%rsp) vmovdqa64 %zmm0,64(%rsp) vmovdqa64 %zmm0,128(%rsp) vmovdqa64 %zmm0,192(%rsp) vmovdqa64 %zmm0,256(%rsp) vmovdqa64 %zmm0,320(%rsp) vmovdqa64 %zmm0,384(%rsp) vmovdqa64 %zmm0,448(%rsp) vmovdqa64 %zmm0,512(%rsp) vmovdqa64 %zmm0,576(%rsp) vmovdqa64 %zmm0,640(%rsp) vmovdqa64 %zmm0,704(%rsp) .Lskip_hkeys_cleanup_983: vzeroupper leaq (%rbp),%rsp .cfi_def_cfa_register %rsp popq %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 popq %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 popq %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 popq %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 popq %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp popq %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx .byte 0xf3,0xc3 .Ldecrypt_seh_end: .cfi_endproc .size ossl_aes_gcm_decrypt_avx512, .-ossl_aes_gcm_decrypt_avx512 .globl ossl_aes_gcm_finalize_avx512 .type ossl_aes_gcm_finalize_avx512,@function .align 32 ossl_aes_gcm_finalize_avx512: .cfi_startproc .byte 243,15,30,250 vmovdqu 336(%rdi),%xmm2 vmovdqu 32(%rdi),%xmm3 vmovdqu 64(%rdi),%xmm4 cmpq $0,%rsi je .L_partial_done_984 vpclmulqdq $0x11,%xmm2,%xmm4,%xmm0 vpclmulqdq $0x00,%xmm2,%xmm4,%xmm16 vpclmulqdq $0x01,%xmm2,%xmm4,%xmm17 vpclmulqdq $0x10,%xmm2,%xmm4,%xmm4 vpxorq %xmm17,%xmm4,%xmm4 vpsrldq $8,%xmm4,%xmm17 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm17,%xmm0,%xmm0 vpxorq %xmm16,%xmm4,%xmm4 vmovdqu64 POLY2(%rip),%xmm17 vpclmulqdq $0x01,%xmm4,%xmm17,%xmm16 vpslldq $8,%xmm16,%xmm16 vpxorq %xmm16,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm17,%xmm16 vpsrldq $4,%xmm16,%xmm16 vpclmulqdq $0x10,%xmm4,%xmm17,%xmm4 vpslldq $4,%xmm4,%xmm4 vpternlogq $0x96,%xmm16,%xmm0,%xmm4 .L_partial_done_984: vmovq 56(%rdi),%xmm5 vpinsrq $1,48(%rdi),%xmm5,%xmm5 vpsllq $3,%xmm5,%xmm5 vpxor %xmm5,%xmm4,%xmm4 vpclmulqdq $0x11,%xmm2,%xmm4,%xmm0 vpclmulqdq $0x00,%xmm2,%xmm4,%xmm16 vpclmulqdq $0x01,%xmm2,%xmm4,%xmm17 vpclmulqdq $0x10,%xmm2,%xmm4,%xmm4 vpxorq %xmm17,%xmm4,%xmm4 vpsrldq $8,%xmm4,%xmm17 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm17,%xmm0,%xmm0 vpxorq %xmm16,%xmm4,%xmm4 vmovdqu64 POLY2(%rip),%xmm17 vpclmulqdq $0x01,%xmm4,%xmm17,%xmm16 vpslldq $8,%xmm16,%xmm16 vpxorq %xmm16,%xmm4,%xmm4 vpclmulqdq $0x00,%xmm4,%xmm17,%xmm16 vpsrldq $4,%xmm16,%xmm16 vpclmulqdq $0x10,%xmm4,%xmm17,%xmm4 vpslldq $4,%xmm4,%xmm4 vpternlogq $0x96,%xmm16,%xmm0,%xmm4 vpshufb SHUF_MASK(%rip),%xmm4,%xmm4 vpxor %xmm4,%xmm3,%xmm3 .L_return_T_984: vmovdqu %xmm3,64(%rdi) .Labort_finalize: .byte 0xf3,0xc3 .cfi_endproc .size ossl_aes_gcm_finalize_avx512, .-ossl_aes_gcm_finalize_avx512 .globl ossl_gcm_gmult_avx512 .hidden ossl_gcm_gmult_avx512 .type ossl_gcm_gmult_avx512,@function .align 32 ossl_gcm_gmult_avx512: .cfi_startproc .byte 243,15,30,250 vmovdqu64 (%rdi),%xmm1 vmovdqu64 336(%rsi),%xmm2 vpclmulqdq $0x11,%xmm2,%xmm1,%xmm3 vpclmulqdq $0x00,%xmm2,%xmm1,%xmm4 vpclmulqdq $0x01,%xmm2,%xmm1,%xmm5 vpclmulqdq $0x10,%xmm2,%xmm1,%xmm1 vpxorq %xmm5,%xmm1,%xmm1 vpsrldq $8,%xmm1,%xmm5 vpslldq $8,%xmm1,%xmm1 vpxorq %xmm5,%xmm3,%xmm3 vpxorq %xmm4,%xmm1,%xmm1 vmovdqu64 POLY2(%rip),%xmm5 vpclmulqdq $0x01,%xmm1,%xmm5,%xmm4 vpslldq $8,%xmm4,%xmm4 vpxorq %xmm4,%xmm1,%xmm1 vpclmulqdq $0x00,%xmm1,%xmm5,%xmm4 vpsrldq $4,%xmm4,%xmm4 vpclmulqdq $0x10,%xmm1,%xmm5,%xmm1 vpslldq $4,%xmm1,%xmm1 vpternlogq $0x96,%xmm4,%xmm3,%xmm1 vmovdqu64 %xmm1,(%rdi) vzeroupper .Labort_gmult: .byte 0xf3,0xc3 .cfi_endproc .size ossl_gcm_gmult_avx512, .-ossl_gcm_gmult_avx512 .section .rodata .align 16 POLY:.quad 0x0000000000000001, 0xC200000000000000 .align 64 POLY2: .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .quad 0x00000001C2000000, 0xC200000000000000 .align 16 TWOONE:.quad 0x0000000000000001, 0x0000000100000000 .align 64 SHUF_MASK: .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .quad 0x08090A0B0C0D0E0F, 0x0001020304050607 .align 16 SHIFT_MASK: .quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 ALL_F: .quad 0xffffffffffffffff, 0xffffffffffffffff ZERO: .quad 0x0000000000000000, 0x0000000000000000 .align 16 ONE: .quad 0x0000000000000001, 0x0000000000000000 .align 16 ONEf: .quad 0x0000000000000000, 0x0100000000000000 .align 64 ddq_add_1234: .quad 0x0000000000000001, 0x0000000000000000 .quad 0x0000000000000002, 0x0000000000000000 .quad 0x0000000000000003, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .align 64 ddq_add_5678: .quad 0x0000000000000005, 0x0000000000000000 .quad 0x0000000000000006, 0x0000000000000000 .quad 0x0000000000000007, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .align 64 ddq_add_4444: .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .quad 0x0000000000000004, 0x0000000000000000 .align 64 ddq_add_8888: .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .quad 0x0000000000000008, 0x0000000000000000 .align 64 ddq_addbe_1234: .quad 0x0000000000000000, 0x0100000000000000 .quad 0x0000000000000000, 0x0200000000000000 .quad 0x0000000000000000, 0x0300000000000000 .quad 0x0000000000000000, 0x0400000000000000 .align 64 ddq_addbe_4444: .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .quad 0x0000000000000000, 0x0400000000000000 .align 64 byte_len_to_mask_table: .value 0x0000, 0x0001, 0x0003, 0x0007 .value 0x000f, 0x001f, 0x003f, 0x007f .value 0x00ff, 0x01ff, 0x03ff, 0x07ff .value 0x0fff, 0x1fff, 0x3fff, 0x7fff .value 0xffff .align 64 byte64_len_to_mask_table: .quad 0x0000000000000000, 0x0000000000000001 .quad 0x0000000000000003, 0x0000000000000007 .quad 0x000000000000000f, 0x000000000000001f .quad 0x000000000000003f, 0x000000000000007f .quad 0x00000000000000ff, 0x00000000000001ff .quad 0x00000000000003ff, 0x00000000000007ff .quad 0x0000000000000fff, 0x0000000000001fff .quad 0x0000000000003fff, 0x0000000000007fff .quad 0x000000000000ffff, 0x000000000001ffff .quad 0x000000000003ffff, 0x000000000007ffff .quad 0x00000000000fffff, 0x00000000001fffff .quad 0x00000000003fffff, 0x00000000007fffff .quad 0x0000000000ffffff, 0x0000000001ffffff .quad 0x0000000003ffffff, 0x0000000007ffffff .quad 0x000000000fffffff, 0x000000001fffffff .quad 0x000000003fffffff, 0x000000007fffffff .quad 0x00000000ffffffff, 0x00000001ffffffff .quad 0x00000003ffffffff, 0x00000007ffffffff .quad 0x0000000fffffffff, 0x0000001fffffffff .quad 0x0000003fffffffff, 0x0000007fffffffff .quad 0x000000ffffffffff, 0x000001ffffffffff .quad 0x000003ffffffffff, 0x000007ffffffffff .quad 0x00000fffffffffff, 0x00001fffffffffff .quad 0x00003fffffffffff, 0x00007fffffffffff .quad 0x0000ffffffffffff, 0x0001ffffffffffff .quad 0x0003ffffffffffff, 0x0007ffffffffffff .quad 0x000fffffffffffff, 0x001fffffffffffff .quad 0x003fffffffffffff, 0x007fffffffffffff .quad 0x00ffffffffffffff, 0x01ffffffffffffff .quad 0x03ffffffffffffff, 0x07ffffffffffffff .quad 0x0fffffffffffffff, 0x1fffffffffffffff .quad 0x3fffffffffffffff, 0x7fffffffffffffff .quad 0xffffffffffffffff .section ".note.gnu.property", "a" .p2align 3 .long 1f - 0f .long 4f - 1f .long 5 0: # "GNU" encoded with .byte, since .asciz isn't supported # on Solaris. .byte 0x47 .byte 0x4e .byte 0x55 .byte 0 1: .p2align 3 .long 0xc0000002 .long 3f - 2f 2: .long 3 3: .p2align 3 4: