1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 4 5 ; Insertion/shuffles of all-zero/all-bits/constants into v8i32->v8f32 sitofp conversion. 6 7 define <8 x float> @sitofp_insert_zero_v8i32(<8 x i32> %a0) { 8 ; X86-LABEL: sitofp_insert_zero_v8i32: 9 ; X86: # %bb.0: 10 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 11 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7] 12 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 13 ; X86-NEXT: retl 14 ; 15 ; X64-LABEL: sitofp_insert_zero_v8i32: 16 ; X64: # %bb.0: 17 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 18 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7] 19 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 20 ; X64-NEXT: retq 21 %1 = insertelement <8 x i32> %a0, i32 0, i32 0 22 %2 = insertelement <8 x i32> %1, i32 0, i32 2 23 %3 = insertelement <8 x i32> %2, i32 0, i32 4 24 %4 = insertelement <8 x i32> %3, i32 0, i32 5 25 %5 = sitofp <8 x i32> %4 to <8 x float> 26 ret <8 x float> %5 27 } 28 29 define <8 x float> @sitofp_shuffle_zero_v8i32(<8 x i32> %a0) { 30 ; X86-LABEL: sitofp_shuffle_zero_v8i32: 31 ; X86: # %bb.0: 32 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 33 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 34 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 35 ; X86-NEXT: retl 36 ; 37 ; X64-LABEL: sitofp_shuffle_zero_v8i32: 38 ; X64: # %bb.0: 39 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 40 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 41 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 42 ; X64-NEXT: retq 43 %1 = shufflevector <8 x i32> %a0, <8 x i32> <i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef, i32 0, i32 undef>, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 44 %2 = sitofp <8 x i32> %1 to <8 x float> 45 ret <8 x float> %2 46 } 47 48 define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) { 49 ; X86-LABEL: sitofp_insert_allbits_v8i32: 50 ; X86: # %bb.0: 51 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 52 ; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 53 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7] 54 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 55 ; X86-NEXT: retl 56 ; 57 ; X64-LABEL: sitofp_insert_allbits_v8i32: 58 ; X64: # %bb.0: 59 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 60 ; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 61 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7] 62 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 63 ; X64-NEXT: retq 64 %1 = insertelement <8 x i32> %a0, i32 -1, i32 0 65 %2 = insertelement <8 x i32> %1, i32 -1, i32 2 66 %3 = insertelement <8 x i32> %2, i32 -1, i32 4 67 %4 = insertelement <8 x i32> %3, i32 -1, i32 5 68 %5 = sitofp <8 x i32> %4 to <8 x float> 69 ret <8 x float> %5 70 } 71 72 define <8 x float> @sitofp_shuffle_allbits_v8i32(<8 x i32> %a0) { 73 ; X86-LABEL: sitofp_shuffle_allbits_v8i32: 74 ; X86: # %bb.0: 75 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 76 ; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 77 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 78 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 79 ; X86-NEXT: retl 80 ; 81 ; X64-LABEL: sitofp_shuffle_allbits_v8i32: 82 ; X64: # %bb.0: 83 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 84 ; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 85 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7] 86 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 87 ; X64-NEXT: retq 88 %1 = shufflevector <8 x i32> %a0, <8 x i32> <i32 -1, i32 undef, i32 -1, i32 undef, i32 -1, i32 undef, i32 -1, i32 undef>, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 89 %2 = sitofp <8 x i32> %1 to <8 x float> 90 ret <8 x float> %2 91 } 92 93 define <8 x float> @sitofp_insert_constants_v8i32(<8 x i32> %a0) { 94 ; X86-LABEL: sitofp_insert_constants_v8i32: 95 ; X86: # %bb.0: 96 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1 97 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 98 ; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 99 ; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7] 100 ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 101 ; X86-NEXT: movl $2, %eax 102 ; X86-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 103 ; X86-NEXT: movl $-3, %eax 104 ; X86-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 105 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 106 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 107 ; X86-NEXT: retl 108 ; 109 ; X64-LABEL: sitofp_insert_constants_v8i32: 110 ; X64: # %bb.0: 111 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 112 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 113 ; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 114 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7] 115 ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 116 ; X64-NEXT: movl $2, %eax 117 ; X64-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 118 ; X64-NEXT: movl $-3, %eax 119 ; X64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 120 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 121 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 122 ; X64-NEXT: retq 123 %1 = insertelement <8 x i32> %a0, i32 0, i32 0 124 %2 = insertelement <8 x i32> %1, i32 -1, i32 2 125 %3 = insertelement <8 x i32> %2, i32 2, i32 4 126 %4 = insertelement <8 x i32> %3, i32 -3, i32 5 127 %5 = sitofp <8 x i32> %4 to <8 x float> 128 ret <8 x float> %5 129 } 130 131 define <8 x float> @sitofp_shuffle_constants_v8i32(<8 x i32> %a0) { 132 ; X86-LABEL: sitofp_shuffle_constants_v8i32: 133 ; X86: # %bb.0: 134 ; X86-NEXT: vblendps {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7] 135 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 136 ; X86-NEXT: retl 137 ; 138 ; X64-LABEL: sitofp_shuffle_constants_v8i32: 139 ; X64: # %bb.0: 140 ; X64-NEXT: vblendps {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7] 141 ; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 142 ; X64-NEXT: retq 143 %1 = shufflevector <8 x i32> %a0, <8 x i32> <i32 0, i32 undef, i32 -1, i32 undef, i32 2, i32 undef, i32 -3, i32 undef>, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 144 %2 = sitofp <8 x i32> %1 to <8 x float> 145 ret <8 x float> %2 146 } 147