Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
      5 
      6 ; fold (udiv x, 1) -> x
      7 define i32 @combine_udiv_by_one(i32 %x) {
      8 ; CHECK-LABEL: combine_udiv_by_one:
      9 ; CHECK:       # %bb.0:
     10 ; CHECK-NEXT:    movl %edi, %eax
     11 ; CHECK-NEXT:    retq
     12   %1 = udiv i32 %x, 1
     13   ret i32 %1
     14 }
     15 
     16 define <4 x i32> @combine_vec_udiv_by_one(<4 x i32> %x) {
     17 ; CHECK-LABEL: combine_vec_udiv_by_one:
     18 ; CHECK:       # %bb.0:
     19 ; CHECK-NEXT:    retq
     20   %1 = udiv <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
     21   ret <4 x i32> %1
     22 }
     23 
     24 ; fold (udiv x, -1) -> select((icmp eq x, -1), 1, 0)
     25 define i32 @combine_udiv_by_negone(i32 %x) {
     26 ; CHECK-LABEL: combine_udiv_by_negone:
     27 ; CHECK:       # %bb.0:
     28 ; CHECK-NEXT:    xorl %eax, %eax
     29 ; CHECK-NEXT:    cmpl $-1, %edi
     30 ; CHECK-NEXT:    sete %al
     31 ; CHECK-NEXT:    retq
     32   %1 = udiv i32 %x, -1
     33   ret i32 %1
     34 }
     35 
     36 define <4 x i32> @combine_vec_udiv_by_negone(<4 x i32> %x) {
     37 ; SSE-LABEL: combine_vec_udiv_by_negone:
     38 ; SSE:       # %bb.0:
     39 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
     40 ; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
     41 ; SSE-NEXT:    psrld $31, %xmm0
     42 ; SSE-NEXT:    retq
     43 ;
     44 ; AVX-LABEL: combine_vec_udiv_by_negone:
     45 ; AVX:       # %bb.0:
     46 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
     47 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     48 ; AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
     49 ; AVX-NEXT:    retq
     50   %1 = udiv <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
     51   ret <4 x i32> %1
     52 }
     53 
     54 ; fold (udiv x, INT_MIN) -> (srl x, 31)
     55 define i32 @combine_udiv_by_minsigned(i32 %x) {
     56 ; CHECK-LABEL: combine_udiv_by_minsigned:
     57 ; CHECK:       # %bb.0:
     58 ; CHECK-NEXT:    shrl $31, %edi
     59 ; CHECK-NEXT:    movl %edi, %eax
     60 ; CHECK-NEXT:    retq
     61   %1 = udiv i32 %x, -2147483648
     62   ret i32 %1
     63 }
     64 
     65 define <4 x i32> @combine_vec_udiv_by_minsigned(<4 x i32> %x) {
     66 ; SSE-LABEL: combine_vec_udiv_by_minsigned:
     67 ; SSE:       # %bb.0:
     68 ; SSE-NEXT:    psrld $31, %xmm0
     69 ; SSE-NEXT:    retq
     70 ;
     71 ; AVX-LABEL: combine_vec_udiv_by_minsigned:
     72 ; AVX:       # %bb.0:
     73 ; AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
     74 ; AVX-NEXT:    retq
     75   %1 = udiv <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
     76   ret <4 x i32> %1
     77 }
     78 
     79 ; TODO fold (udiv x, x) -> 1
     80 define i32 @combine_udiv_dupe(i32 %x) {
     81 ; CHECK-LABEL: combine_udiv_dupe:
     82 ; CHECK:       # %bb.0:
     83 ; CHECK-NEXT:    xorl %edx, %edx
     84 ; CHECK-NEXT:    movl %edi, %eax
     85 ; CHECK-NEXT:    divl %edi
     86 ; CHECK-NEXT:    retq
     87   %1 = udiv i32 %x, %x
     88   ret i32 %1
     89 }
     90 
     91 define <4 x i32> @combine_vec_udiv_dupe(<4 x i32> %x) {
     92 ; SSE-LABEL: combine_vec_udiv_dupe:
     93 ; SSE:       # %bb.0:
     94 ; SSE-NEXT:    pextrd $1, %xmm0, %eax
     95 ; SSE-NEXT:    xorl %edx, %edx
     96 ; SSE-NEXT:    divl %eax
     97 ; SSE-NEXT:    movl %eax, %ecx
     98 ; SSE-NEXT:    movd %xmm0, %eax
     99 ; SSE-NEXT:    xorl %edx, %edx
    100 ; SSE-NEXT:    divl %eax
    101 ; SSE-NEXT:    movd %eax, %xmm1
    102 ; SSE-NEXT:    pinsrd $1, %ecx, %xmm1
    103 ; SSE-NEXT:    pextrd $2, %xmm0, %eax
    104 ; SSE-NEXT:    xorl %edx, %edx
    105 ; SSE-NEXT:    divl %eax
    106 ; SSE-NEXT:    pinsrd $2, %eax, %xmm1
    107 ; SSE-NEXT:    pextrd $3, %xmm0, %eax
    108 ; SSE-NEXT:    xorl %edx, %edx
    109 ; SSE-NEXT:    divl %eax
    110 ; SSE-NEXT:    pinsrd $3, %eax, %xmm1
    111 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    112 ; SSE-NEXT:    retq
    113 ;
    114 ; AVX-LABEL: combine_vec_udiv_dupe:
    115 ; AVX:       # %bb.0:
    116 ; AVX-NEXT:    vpextrd $1, %xmm0, %eax
    117 ; AVX-NEXT:    xorl %edx, %edx
    118 ; AVX-NEXT:    divl %eax
    119 ; AVX-NEXT:    movl %eax, %ecx
    120 ; AVX-NEXT:    vmovd %xmm0, %eax
    121 ; AVX-NEXT:    xorl %edx, %edx
    122 ; AVX-NEXT:    divl %eax
    123 ; AVX-NEXT:    vmovd %eax, %xmm1
    124 ; AVX-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
    125 ; AVX-NEXT:    vpextrd $2, %xmm0, %eax
    126 ; AVX-NEXT:    xorl %edx, %edx
    127 ; AVX-NEXT:    divl %eax
    128 ; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
    129 ; AVX-NEXT:    vpextrd $3, %xmm0, %eax
    130 ; AVX-NEXT:    xorl %edx, %edx
    131 ; AVX-NEXT:    divl %eax
    132 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
    133 ; AVX-NEXT:    retq
    134   %1 = udiv <4 x i32> %x, %x
    135   ret <4 x i32> %1
    136 }
    137 
    138 ; fold (udiv x, (1 << c)) -> x >>u c
    139 define <4 x i32> @combine_vec_udiv_by_pow2a(<4 x i32> %x) {
    140 ; SSE-LABEL: combine_vec_udiv_by_pow2a:
    141 ; SSE:       # %bb.0:
    142 ; SSE-NEXT:    psrld $2, %xmm0
    143 ; SSE-NEXT:    retq
    144 ;
    145 ; AVX-LABEL: combine_vec_udiv_by_pow2a:
    146 ; AVX:       # %bb.0:
    147 ; AVX-NEXT:    vpsrld $2, %xmm0, %xmm0
    148 ; AVX-NEXT:    retq
    149   %1 = udiv <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
    150   ret <4 x i32> %1
    151 }
    152 
    153 define <4 x i32> @combine_vec_udiv_by_pow2b(<4 x i32> %x) {
    154 ; SSE-LABEL: combine_vec_udiv_by_pow2b:
    155 ; SSE:       # %bb.0:
    156 ; SSE-NEXT:    movdqa %xmm0, %xmm2
    157 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    158 ; SSE-NEXT:    psrld $3, %xmm1
    159 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
    160 ; SSE-NEXT:    psrld $4, %xmm0
    161 ; SSE-NEXT:    psrld $2, %xmm2
    162 ; SSE-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7]
    163 ; SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
    164 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    165 ; SSE-NEXT:    retq
    166 ;
    167 ; AVX1-LABEL: combine_vec_udiv_by_pow2b:
    168 ; AVX1:       # %bb.0:
    169 ; AVX1-NEXT:    vpsrld $4, %xmm0, %xmm1
    170 ; AVX1-NEXT:    vpsrld $2, %xmm0, %xmm2
    171 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
    172 ; AVX1-NEXT:    vpsrld $3, %xmm0, %xmm2
    173 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
    174 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
    175 ; AVX1-NEXT:    retq
    176 ;
    177 ; AVX2-LABEL: combine_vec_udiv_by_pow2b:
    178 ; AVX2:       # %bb.0:
    179 ; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
    180 ; AVX2-NEXT:    retq
    181   %1 = udiv <4 x i32> %x, <i32 1, i32 4, i32 8, i32 16>
    182   ret <4 x i32> %1
    183 }
    184 
    185 define <4 x i32> @combine_vec_udiv_by_pow2c(<4 x i32> %x, <4 x i32> %y) {
    186 ; SSE-LABEL: combine_vec_udiv_by_pow2c:
    187 ; SSE:       # %bb.0:
    188 ; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
    189 ; SSE-NEXT:    movdqa %xmm0, %xmm3
    190 ; SSE-NEXT:    psrld %xmm2, %xmm3
    191 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
    192 ; SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
    193 ; SSE-NEXT:    movdqa %xmm0, %xmm5
    194 ; SSE-NEXT:    psrld %xmm4, %xmm5
    195 ; SSE-NEXT:    pblendw {{.*#+}} xmm5 = xmm3[0,1,2,3],xmm5[4,5,6,7]
    196 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
    197 ; SSE-NEXT:    movdqa %xmm0, %xmm3
    198 ; SSE-NEXT:    psrld %xmm1, %xmm3
    199 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,1,1,1,4,5,6,7]
    200 ; SSE-NEXT:    psrld %xmm1, %xmm0
    201 ; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
    202 ; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
    203 ; SSE-NEXT:    retq
    204 ;
    205 ; AVX1-LABEL: combine_vec_udiv_by_pow2c:
    206 ; AVX1:       # %bb.0:
    207 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    208 ; AVX1-NEXT:    vpsrld %xmm2, %xmm0, %xmm2
    209 ; AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm3
    210 ; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
    211 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    212 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    213 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
    214 ; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
    215 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    216 ; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
    217 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
    218 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
    219 ; AVX1-NEXT:    retq
    220 ;
    221 ; AVX2-LABEL: combine_vec_udiv_by_pow2c:
    222 ; AVX2:       # %bb.0:
    223 ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
    224 ; AVX2-NEXT:    retq
    225   %1 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
    226   %2 = udiv <4 x i32> %x, %1
    227   ret <4 x i32> %2
    228 }
    229 
    230 ; fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
    231 define <4 x i32> @combine_vec_udiv_by_shl_pow2a(<4 x i32> %x, <4 x i32> %y) {
    232 ; SSE-LABEL: combine_vec_udiv_by_shl_pow2a:
    233 ; SSE:       # %bb.0:
    234 ; SSE-NEXT:    paddd {{.*}}(%rip), %xmm1
    235 ; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
    236 ; SSE-NEXT:    movdqa %xmm0, %xmm3
    237 ; SSE-NEXT:    psrld %xmm2, %xmm3
    238 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
    239 ; SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
    240 ; SSE-NEXT:    movdqa %xmm0, %xmm5
    241 ; SSE-NEXT:    psrld %xmm4, %xmm5
    242 ; SSE-NEXT:    pblendw {{.*#+}} xmm5 = xmm3[0,1,2,3],xmm5[4,5,6,7]
    243 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
    244 ; SSE-NEXT:    movdqa %xmm0, %xmm3
    245 ; SSE-NEXT:    psrld %xmm1, %xmm3
    246 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,1,1,1,4,5,6,7]
    247 ; SSE-NEXT:    psrld %xmm1, %xmm0
    248 ; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
    249 ; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
    250 ; SSE-NEXT:    retq
    251 ;
    252 ; AVX1-LABEL: combine_vec_udiv_by_shl_pow2a:
    253 ; AVX1:       # %bb.0:
    254 ; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
    255 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    256 ; AVX1-NEXT:    vpsrld %xmm2, %xmm0, %xmm2
    257 ; AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm3
    258 ; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
    259 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    260 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    261 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
    262 ; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
    263 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    264 ; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
    265 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
    266 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
    267 ; AVX1-NEXT:    retq
    268 ;
    269 ; AVX2-LABEL: combine_vec_udiv_by_shl_pow2a:
    270 ; AVX2:       # %bb.0:
    271 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2]
    272 ; AVX2-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    273 ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
    274 ; AVX2-NEXT:    retq
    275   %1 = shl <4 x i32> <i32 4, i32 4, i32 4, i32 4>, %y
    276   %2 = udiv <4 x i32> %x, %1
    277   ret <4 x i32> %2
    278 }
    279 
    280 define <4 x i32> @combine_vec_udiv_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
    281 ; SSE-LABEL: combine_vec_udiv_by_shl_pow2b:
    282 ; SSE:       # %bb.0:
    283 ; SSE-NEXT:    paddd {{.*}}(%rip), %xmm1
    284 ; SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
    285 ; SSE-NEXT:    movdqa %xmm0, %xmm3
    286 ; SSE-NEXT:    psrld %xmm2, %xmm3
    287 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
    288 ; SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7]
    289 ; SSE-NEXT:    movdqa %xmm0, %xmm5
    290 ; SSE-NEXT:    psrld %xmm4, %xmm5
    291 ; SSE-NEXT:    pblendw {{.*#+}} xmm5 = xmm3[0,1,2,3],xmm5[4,5,6,7]
    292 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
    293 ; SSE-NEXT:    movdqa %xmm0, %xmm3
    294 ; SSE-NEXT:    psrld %xmm1, %xmm3
    295 ; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,1,1,1,4,5,6,7]
    296 ; SSE-NEXT:    psrld %xmm1, %xmm0
    297 ; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
    298 ; SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
    299 ; SSE-NEXT:    retq
    300 ;
    301 ; AVX1-LABEL: combine_vec_udiv_by_shl_pow2b:
    302 ; AVX1:       # %bb.0:
    303 ; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
    304 ; AVX1-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
    305 ; AVX1-NEXT:    vpsrld %xmm2, %xmm0, %xmm2
    306 ; AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm3
    307 ; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
    308 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
    309 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    310 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
    311 ; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
    312 ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
    313 ; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
    314 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
    315 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
    316 ; AVX1-NEXT:    retq
    317 ;
    318 ; AVX2-LABEL: combine_vec_udiv_by_shl_pow2b:
    319 ; AVX2:       # %bb.0:
    320 ; AVX2-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
    321 ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
    322 ; AVX2-NEXT:    retq
    323   %1 = shl <4 x i32> <i32 1, i32 4, i32 8, i32 16>, %y
    324   %2 = udiv <4 x i32> %x, %1
    325   ret <4 x i32> %2
    326 }
    327 
    328 ; fold (udiv x, c1)
    329 define i32 @combine_udiv_uniform(i32 %x) {
    330 ; CHECK-LABEL: combine_udiv_uniform:
    331 ; CHECK:       # %bb.0:
    332 ; CHECK-NEXT:    movl %edi, %ecx
    333 ; CHECK-NEXT:    movl $2987803337, %eax # imm = 0xB21642C9
    334 ; CHECK-NEXT:    imulq %rcx, %rax
    335 ; CHECK-NEXT:    shrq $36, %rax
    336 ; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
    337 ; CHECK-NEXT:    retq
    338   %1 = udiv i32 %x, 23
    339   ret i32 %1
    340 }
    341 
    342 define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
    343 ; SSE-LABEL: combine_vec_udiv_uniform:
    344 ; SSE:       # %bb.0:
    345 ; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [25645,25645,25645,25645,25645,25645,25645,25645]
    346 ; SSE-NEXT:    pmulhuw %xmm0, %xmm1
    347 ; SSE-NEXT:    psubw %xmm1, %xmm0
    348 ; SSE-NEXT:    psrlw $1, %xmm0
    349 ; SSE-NEXT:    paddw %xmm1, %xmm0
    350 ; SSE-NEXT:    psrlw $4, %xmm0
    351 ; SSE-NEXT:    retq
    352 ;
    353 ; AVX-LABEL: combine_vec_udiv_uniform:
    354 ; AVX:       # %bb.0:
    355 ; AVX-NEXT:    vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
    356 ; AVX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
    357 ; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
    358 ; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
    359 ; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
    360 ; AVX-NEXT:    retq
    361   %1 = udiv <8 x i16> %x, <i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23, i16 23>
    362   ret <8 x i16> %1
    363 }
    364 
    365 define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
    366 ; SSE-LABEL: combine_vec_udiv_nonuniform:
    367 ; SSE:       # %bb.0:
    368 ; SSE-NEXT:    movd %xmm0, %eax
    369 ; SSE-NEXT:    movzwl %ax, %ecx
    370 ; SSE-NEXT:    imull $25645, %ecx, %ecx # imm = 0x642D
    371 ; SSE-NEXT:    shrl $16, %ecx
    372 ; SSE-NEXT:    subl %ecx, %eax
    373 ; SSE-NEXT:    movzwl %ax, %eax
    374 ; SSE-NEXT:    shrl %eax
    375 ; SSE-NEXT:    addl %ecx, %eax
    376 ; SSE-NEXT:    shrl $4, %eax
    377 ; SSE-NEXT:    movd %eax, %xmm1
    378 ; SSE-NEXT:    pextrw $1, %xmm0, %eax
    379 ; SSE-NEXT:    imull $61681, %eax, %eax # imm = 0xF0F1
    380 ; SSE-NEXT:    shrl $21, %eax
    381 ; SSE-NEXT:    pinsrw $1, %eax, %xmm1
    382 ; SSE-NEXT:    pextrw $2, %xmm0, %eax
    383 ; SSE-NEXT:    imull $8195, %eax, %eax # imm = 0x2003
    384 ; SSE-NEXT:    shrl $29, %eax
    385 ; SSE-NEXT:    pinsrw $2, %eax, %xmm1
    386 ; SSE-NEXT:    pextrw $3, %xmm0, %eax
    387 ; SSE-NEXT:    shrl $3, %eax
    388 ; SSE-NEXT:    imull $9363, %eax, %eax # imm = 0x2493
    389 ; SSE-NEXT:    shrl $16, %eax
    390 ; SSE-NEXT:    pinsrw $3, %eax, %xmm1
    391 ; SSE-NEXT:    pextrw $4, %xmm0, %eax
    392 ; SSE-NEXT:    shrl $7, %eax
    393 ; SSE-NEXT:    pinsrw $4, %eax, %xmm1
    394 ; SSE-NEXT:    pextrw $5, %xmm0, %eax
    395 ; SSE-NEXT:    xorl %ecx, %ecx
    396 ; SSE-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
    397 ; SSE-NEXT:    sete %cl
    398 ; SSE-NEXT:    pinsrw $5, %ecx, %xmm1
    399 ; SSE-NEXT:    pextrw $6, %xmm0, %eax
    400 ; SSE-NEXT:    imull $32897, %eax, %eax # imm = 0x8081
    401 ; SSE-NEXT:    shrl $31, %eax
    402 ; SSE-NEXT:    pinsrw $6, %eax, %xmm1
    403 ; SSE-NEXT:    pextrw $7, %xmm0, %eax
    404 ; SSE-NEXT:    shrl $15, %eax
    405 ; SSE-NEXT:    pinsrw $7, %eax, %xmm1
    406 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    407 ; SSE-NEXT:    retq
    408 ;
    409 ; AVX-LABEL: combine_vec_udiv_nonuniform:
    410 ; AVX:       # %bb.0:
    411 ; AVX-NEXT:    vmovd %xmm0, %eax
    412 ; AVX-NEXT:    movzwl %ax, %ecx
    413 ; AVX-NEXT:    imull $25645, %ecx, %ecx # imm = 0x642D
    414 ; AVX-NEXT:    shrl $16, %ecx
    415 ; AVX-NEXT:    subl %ecx, %eax
    416 ; AVX-NEXT:    movzwl %ax, %eax
    417 ; AVX-NEXT:    shrl %eax
    418 ; AVX-NEXT:    addl %ecx, %eax
    419 ; AVX-NEXT:    shrl $4, %eax
    420 ; AVX-NEXT:    vmovd %eax, %xmm1
    421 ; AVX-NEXT:    vpextrw $1, %xmm0, %eax
    422 ; AVX-NEXT:    imull $61681, %eax, %eax # imm = 0xF0F1
    423 ; AVX-NEXT:    shrl $21, %eax
    424 ; AVX-NEXT:    vpinsrw $1, %eax, %xmm1, %xmm1
    425 ; AVX-NEXT:    vpextrw $2, %xmm0, %eax
    426 ; AVX-NEXT:    imull $8195, %eax, %eax # imm = 0x2003
    427 ; AVX-NEXT:    shrl $29, %eax
    428 ; AVX-NEXT:    vpinsrw $2, %eax, %xmm1, %xmm1
    429 ; AVX-NEXT:    vpextrw $3, %xmm0, %eax
    430 ; AVX-NEXT:    shrl $3, %eax
    431 ; AVX-NEXT:    imull $9363, %eax, %eax # imm = 0x2493
    432 ; AVX-NEXT:    shrl $16, %eax
    433 ; AVX-NEXT:    vpinsrw $3, %eax, %xmm1, %xmm1
    434 ; AVX-NEXT:    vpextrw $4, %xmm0, %eax
    435 ; AVX-NEXT:    shrl $7, %eax
    436 ; AVX-NEXT:    vpinsrw $4, %eax, %xmm1, %xmm1
    437 ; AVX-NEXT:    vpextrw $5, %xmm0, %eax
    438 ; AVX-NEXT:    xorl %ecx, %ecx
    439 ; AVX-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
    440 ; AVX-NEXT:    sete %cl
    441 ; AVX-NEXT:    vpinsrw $5, %ecx, %xmm1, %xmm1
    442 ; AVX-NEXT:    vpextrw $6, %xmm0, %eax
    443 ; AVX-NEXT:    imull $32897, %eax, %eax # imm = 0x8081
    444 ; AVX-NEXT:    shrl $31, %eax
    445 ; AVX-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1
    446 ; AVX-NEXT:    vpextrw $7, %xmm0, %eax
    447 ; AVX-NEXT:    shrl $15, %eax
    448 ; AVX-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm0
    449 ; AVX-NEXT:    retq
    450   %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
    451   ret <8 x i16> %1
    452 }
    453