Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f  | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
      7 
      8 define i32 @sad8_32bit_icmp_sge(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #0 {
      9 ; SSE2-LABEL: sad8_32bit_icmp_sge:
     10 ; SSE2:       # %bb.0: # %entry
     11 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
     12 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
     13 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
     14 ; SSE2-NEXT:    movd %xmm1, %eax
     15 ; SSE2-NEXT:    retq
     16 ;
     17 ; AVX-LABEL: sad8_32bit_icmp_sge:
     18 ; AVX:       # %bb.0: # %entry
     19 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
     20 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
     21 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
     22 ; AVX-NEXT:    vmovd %xmm0, %eax
     23 ; AVX-NEXT:    retq
     24 
     25 entry:
     26   %idx.ext = zext i32 %stride to i64
     27   br label %for.body
     28 
     29 for.body:                                         ; preds = %entry
     30   %0 = bitcast i8* %cur to <8 x i8>*
     31   %1 = load <8 x i8>, <8 x i8>* %0, align 1
     32   %2 = zext <8 x i8> %1 to <8 x i32>
     33   %3 = bitcast i8* %ref to <8 x i8>*
     34   %4 = load <8 x i8>, <8 x i8>* %3, align 1
     35   %5 = zext <8 x i8> %4 to <8 x i32>
     36   %6 = sub nsw <8 x i32> %2, %5
     37   %7 = icmp sgt <8 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
     38   %8 = sub nsw <8 x i32> zeroinitializer, %6
     39   %9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
     40   %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
     41   %bin.rdx = add <8 x i32> %9, %rdx.shuf
     42   %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     43   %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
     44   %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     45   %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
     46   %10 = extractelement <8 x i32> %bin.rdx232, i32 0
     47   ret i32 %10
     48 }
     49 
     50 define i32 @sad8_32bit_icmp_sgt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #1 {
     51 ; SSE2-LABEL: sad8_32bit_icmp_sgt:
     52 ; SSE2:       # %bb.0: # %entry
     53 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
     54 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
     55 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
     56 ; SSE2-NEXT:    movd %xmm1, %eax
     57 ; SSE2-NEXT:    retq
     58 ;
     59 ; AVX-LABEL: sad8_32bit_icmp_sgt:
     60 ; AVX:       # %bb.0: # %entry
     61 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
     62 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
     63 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
     64 ; AVX-NEXT:    vmovd %xmm0, %eax
     65 ; AVX-NEXT:    retq
     66 entry:
     67   %idx.ext = zext i32 %stride to i64
     68   br label %for.body
     69 
     70 for.body:                                         ; preds = %entry
     71   %0 = bitcast i8* %cur to <8 x i8>*
     72   %1 = load <8 x i8>, <8 x i8>* %0, align 1
     73   %2 = zext <8 x i8> %1 to <8 x i32>
     74   %3 = bitcast i8* %ref to <8 x i8>*
     75   %4 = load <8 x i8>, <8 x i8>* %3, align 1
     76   %5 = zext <8 x i8> %4 to <8 x i32>
     77   %6 = sub nsw <8 x i32> %2, %5
     78   %7 = icmp sgt <8 x i32> %6, zeroinitializer
     79   %8 = sub nsw <8 x i32> zeroinitializer, %6
     80   %9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
     81   %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
     82   %bin.rdx = add <8 x i32> %9, %rdx.shuf
     83   %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     84   %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
     85   %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     86   %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
     87   %10 = extractelement <8 x i32> %bin.rdx232, i32 0
     88   ret i32 %10
     89 }
     90 
     91 define i32 @sad8_32bit_icmp_sle(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #2 {
     92 ; SSE2-LABEL: sad8_32bit_icmp_sle:
     93 ; SSE2:       # %bb.0: # %entry
     94 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
     95 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
     96 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
     97 ; SSE2-NEXT:    movd %xmm1, %eax
     98 ; SSE2-NEXT:    retq
     99 ;
    100 ; AVX-LABEL: sad8_32bit_icmp_sle:
    101 ; AVX:       # %bb.0: # %entry
    102 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    103 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
    104 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
    105 ; AVX-NEXT:    vmovd %xmm0, %eax
    106 ; AVX-NEXT:    retq
    107 entry:
    108   %idx.ext = zext i32 %stride to i64
    109   br label %for.body
    110 
    111 for.body:                                         ; preds = %entry
    112   %0 = bitcast i8* %cur to <8 x i8>*
    113   %1 = load <8 x i8>, <8 x i8>* %0, align 1
    114   %2 = zext <8 x i8> %1 to <8 x i32>
    115   %3 = bitcast i8* %ref to <8 x i8>*
    116   %4 = load <8 x i8>, <8 x i8>* %3, align 1
    117   %5 = zext <8 x i8> %4 to <8 x i32>
    118   %6 = sub nsw <8 x i32> %2, %5
    119   %7 = icmp slt <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    120   %8 = sub nsw <8 x i32> zeroinitializer, %6
    121   %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
    122   %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    123   %bin.rdx = add <8 x i32> %9, %rdx.shuf
    124   %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    125   %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
    126   %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    127   %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
    128   %10 = extractelement <8 x i32> %bin.rdx232, i32 0
    129   ret i32 %10
    130 }
    131 
    132 define i32 @sad8_32bit_icmp_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #3 {
    133 ; SSE2-LABEL: sad8_32bit_icmp_slt:
    134 ; SSE2:       # %bb.0: # %entry
    135 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    136 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    137 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
    138 ; SSE2-NEXT:    movd %xmm1, %eax
    139 ; SSE2-NEXT:    retq
    140 ;
    141 ; AVX-LABEL: sad8_32bit_icmp_slt:
    142 ; AVX:       # %bb.0: # %entry
    143 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    144 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
    145 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
    146 ; AVX-NEXT:    vmovd %xmm0, %eax
    147 ; AVX-NEXT:    retq
    148 entry:
    149   %idx.ext = zext i32 %stride to i64
    150   br label %for.body
    151 
    152 for.body:                                         ; preds = %entry
    153   %0 = bitcast i8* %cur to <8 x i8>*
    154   %1 = load <8 x i8>, <8 x i8>* %0, align 1
    155   %2 = zext <8 x i8> %1 to <8 x i32>
    156   %3 = bitcast i8* %ref to <8 x i8>*
    157   %4 = load <8 x i8>, <8 x i8>* %3, align 1
    158   %5 = zext <8 x i8> %4 to <8 x i32>
    159   %6 = sub nsw <8 x i32> %2, %5
    160   %7 = icmp slt <8 x i32> %6, zeroinitializer
    161   %8 = sub nsw <8 x i32> zeroinitializer, %6
    162   %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
    163   %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    164   %bin.rdx = add <8 x i32> %9, %rdx.shuf
    165   %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    166   %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
    167   %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    168   %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
    169   %10 = extractelement <8 x i32> %bin.rdx232, i32 0
    170   ret i32 %10
    171 }
    172 
    173 define i64 @sad8_64bit_icmp_sext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
    174 ; SSE2-LABEL: sad8_64bit_icmp_sext_slt:
    175 ; SSE2:       # %bb.0: # %entry
    176 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    177 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    178 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
    179 ; SSE2-NEXT:    movq %xmm1, %rax
    180 ; SSE2-NEXT:    retq
    181 ;
    182 ; AVX-LABEL: sad8_64bit_icmp_sext_slt:
    183 ; AVX:       # %bb.0: # %entry
    184 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    185 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
    186 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
    187 ; AVX-NEXT:    vmovq %xmm0, %rax
    188 ; AVX-NEXT:    retq
    189 entry:
    190   br label %for.body
    191 
    192 for.body:                                         ; preds = %entry
    193   %0 = bitcast i8* %cur to <8 x i8>*
    194   %1 = load <8 x i8>, <8 x i8>* %0, align 1
    195   %2 = zext <8 x i8> %1 to <8 x i32>
    196   %3 = bitcast i8* %ref to <8 x i8>*
    197   %4 = load <8 x i8>, <8 x i8>* %3, align 1
    198   %5 = zext <8 x i8> %4 to <8 x i32>
    199   %6 = sub nsw <8 x i32> %2, %5
    200   %7 = icmp slt <8 x i32> %6, zeroinitializer
    201   %8 = sub nsw <8 x i32> zeroinitializer, %6
    202   %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
    203   %10 = sext <8 x i32> %9 to <8 x i64>
    204   %rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    205   %bin.rdx = add <8 x i64> %rdx.shuf, %10
    206   %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    207   %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
    208   %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    209   %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
    210   %11 = extractelement <8 x i64> %bin.rdx239, i32 0
    211   ret i64 %11
    212 }
    213 
    214 define i64 @sad8_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
    215 ; SSE2-LABEL: sad8_64bit_icmp_zext_slt:
    216 ; SSE2:       # %bb.0: # %entry
    217 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    218 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    219 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
    220 ; SSE2-NEXT:    movq %xmm1, %rax
    221 ; SSE2-NEXT:    retq
    222 ;
    223 ; AVX-LABEL: sad8_64bit_icmp_zext_slt:
    224 ; AVX:       # %bb.0: # %entry
    225 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    226 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
    227 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
    228 ; AVX-NEXT:    vmovq %xmm0, %rax
    229 ; AVX-NEXT:    retq
    230 entry:
    231   br label %for.body
    232 
    233 for.body:                                         ; preds = %entry
    234   %0 = bitcast i8* %cur to <8 x i8>*
    235   %1 = load <8 x i8>, <8 x i8>* %0, align 1
    236   %2 = zext <8 x i8> %1 to <8 x i32>
    237   %3 = bitcast i8* %ref to <8 x i8>*
    238   %4 = load <8 x i8>, <8 x i8>* %3, align 1
    239   %5 = zext <8 x i8> %4 to <8 x i32>
    240   %6 = sub nsw <8 x i32> %2, %5
    241   %7 = icmp slt <8 x i32> %6, zeroinitializer
    242   %8 = sub nsw <8 x i32> zeroinitializer, %6
    243   %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
    244   %10 = zext <8 x i32> %9 to <8 x i64>
    245   %rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    246   %bin.rdx = add <8 x i64> %rdx.shuf, %10
    247   %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    248   %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
    249   %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    250   %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
    251   %11 = extractelement <8 x i64> %bin.rdx239, i32 0
    252   ret i64 %11
    253 }
    254 
    255 define i64 @sad8_early_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
    256 ; SSE2-LABEL: sad8_early_64bit_icmp_zext_slt:
    257 ; SSE2:       # %bb.0: # %entry
    258 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
    259 ; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
    260 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
    261 ; SSE2-NEXT:    movq %xmm1, %rax
    262 ; SSE2-NEXT:    retq
    263 ;
    264 ; AVX-LABEL: sad8_early_64bit_icmp_zext_slt:
    265 ; AVX:       # %bb.0: # %entry
    266 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    267 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
    268 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
    269 ; AVX-NEXT:    vmovq %xmm0, %rax
    270 ; AVX-NEXT:    retq
    271 entry:
    272   br label %for.body
    273 
    274 for.body:                                         ; preds = %entry
    275   %0 = bitcast i8* %cur to <8 x i8>*
    276   %1 = load <8 x i8>, <8 x i8>* %0, align 1
    277   %2 = zext <8 x i8> %1 to <8 x i64>
    278   %3 = bitcast i8* %ref to <8 x i8>*
    279   %4 = load <8 x i8>, <8 x i8>* %3, align 1
    280   %5 = zext <8 x i8> %4 to <8 x i64>
    281   %6 = sub nsw <8 x i64> %2, %5
    282   %7 = icmp slt <8 x i64> %6, zeroinitializer
    283   %8 = sub nsw <8 x i64> zeroinitializer, %6
    284   %9 = select <8 x i1> %7, <8 x i64> %8, <8 x i64> %6
    285   %rdx.shuf = shufflevector <8 x i64> %9, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    286   %bin.rdx = add <8 x i64> %rdx.shuf, %9
    287   %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    288   %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
    289   %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    290   %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
    291   %10 = extractelement <8 x i64> %bin.rdx239, i32 0
    292   ret i64 %10
    293 }
    294