Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s
      3 
      4 define <64 x i8> @test1(i8 * %addr) {
      5 ; CHECK-LABEL: test1:
      6 ; CHECK:       ## %bb.0:
      7 ; CHECK-NEXT:    vmovups (%rdi), %zmm0
      8 ; CHECK-NEXT:    retq
      9   %vaddr = bitcast i8* %addr to <64 x i8>*
     10   %res = load <64 x i8>, <64 x i8>* %vaddr, align 1
     11   ret <64 x i8>%res
     12 }
     13 
     14 define void @test2(i8 * %addr, <64 x i8> %data) {
     15 ; CHECK-LABEL: test2:
     16 ; CHECK:       ## %bb.0:
     17 ; CHECK-NEXT:    vmovups %zmm0, (%rdi)
     18 ; CHECK-NEXT:    retq
     19   %vaddr = bitcast i8* %addr to <64 x i8>*
     20   store <64 x i8>%data, <64 x i8>* %vaddr, align 1
     21   ret void
     22 }
     23 
     24 define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
     25 ; CHECK-LABEL: test3:
     26 ; CHECK:       ## %bb.0:
     27 ; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1
     28 ; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1}
     29 ; CHECK-NEXT:    retq
     30   %mask = icmp ne <64 x i8> %mask1, zeroinitializer
     31   %vaddr = bitcast i8* %addr to <64 x i8>*
     32   %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
     33   %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old
     34   ret <64 x i8>%res
     35 }
     36 
     37 define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
     38 ; CHECK-LABEL: test4:
     39 ; CHECK:       ## %bb.0:
     40 ; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k1
     41 ; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
     42 ; CHECK-NEXT:    retq
     43   %mask = icmp ne <64 x i8> %mask1, zeroinitializer
     44   %vaddr = bitcast i8* %addr to <64 x i8>*
     45   %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
     46   %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer
     47   ret <64 x i8>%res
     48 }
     49 
     50 define <32 x i16> @test5(i8 * %addr) {
     51 ; CHECK-LABEL: test5:
     52 ; CHECK:       ## %bb.0:
     53 ; CHECK-NEXT:    vmovups (%rdi), %zmm0
     54 ; CHECK-NEXT:    retq
     55   %vaddr = bitcast i8* %addr to <32 x i16>*
     56   %res = load <32 x i16>, <32 x i16>* %vaddr, align 1
     57   ret <32 x i16>%res
     58 }
     59 
     60 define void @test6(i8 * %addr, <32 x i16> %data) {
     61 ; CHECK-LABEL: test6:
     62 ; CHECK:       ## %bb.0:
     63 ; CHECK-NEXT:    vmovups %zmm0, (%rdi)
     64 ; CHECK-NEXT:    retq
     65   %vaddr = bitcast i8* %addr to <32 x i16>*
     66   store <32 x i16>%data, <32 x i16>* %vaddr, align 1
     67   ret void
     68 }
     69 
     70 define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
     71 ; CHECK-LABEL: test7:
     72 ; CHECK:       ## %bb.0:
     73 ; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1
     74 ; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1}
     75 ; CHECK-NEXT:    retq
     76   %mask = icmp ne <32 x i16> %mask1, zeroinitializer
     77   %vaddr = bitcast i8* %addr to <32 x i16>*
     78   %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
     79   %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old
     80   ret <32 x i16>%res
     81 }
     82 
     83 define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
     84 ; CHECK-LABEL: test8:
     85 ; CHECK:       ## %bb.0:
     86 ; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k1
     87 ; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1} {z}
     88 ; CHECK-NEXT:    retq
     89   %mask = icmp ne <32 x i16> %mask1, zeroinitializer
     90   %vaddr = bitcast i8* %addr to <32 x i16>*
     91   %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
     92   %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer
     93   ret <32 x i16>%res
     94 }
     95 
     96 define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
     97 ; CHECK-LABEL: test_mask_load_16xi8:
     98 ; CHECK:       ## %bb.0:
     99 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
    100 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
    101 ; CHECK-NEXT:    kmovw %k0, %k1
    102 ; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
    103 ; CHECK-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    104 ; CHECK-NEXT:    retq
    105   %res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
    106   ret <16 x i8> %res
    107 }
    108 declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
    109 
    110 define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
    111 ; CHECK-LABEL: test_mask_load_32xi8:
    112 ; CHECK:       ## %bb.0:
    113 ; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
    114 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
    115 ; CHECK-NEXT:    kmovd %k0, %k1
    116 ; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
    117 ; CHECK-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
    118 ; CHECK-NEXT:    retq
    119   %res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
    120   ret <32 x i8> %res
    121 }
    122 declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
    123 
    124 define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
    125 ; CHECK-LABEL: test_mask_load_8xi16:
    126 ; CHECK:       ## %bb.0:
    127 ; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
    128 ; CHECK-NEXT:    vpmovw2m %zmm0, %k0
    129 ; CHECK-NEXT:    kshiftld $24, %k0, %k0
    130 ; CHECK-NEXT:    kshiftrd $24, %k0, %k1
    131 ; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1} {z}
    132 ; CHECK-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    133 ; CHECK-NEXT:    retq
    134   %res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
    135   ret <8 x i16> %res
    136 }
    137 declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
    138 
    139 define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
    140 ; CHECK-LABEL: test_mask_load_16xi16:
    141 ; CHECK:       ## %bb.0:
    142 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
    143 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
    144 ; CHECK-NEXT:    kmovw %k0, %k1
    145 ; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1} {z}
    146 ; CHECK-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
    147 ; CHECK-NEXT:    retq
    148   %res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
    149   ret <16 x i16> %res
    150 }
    151 declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
    152 
    153 define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
    154 ; CHECK-LABEL: test_mask_store_16xi8:
    155 ; CHECK:       ## %bb.0:
    156 ; CHECK-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
    157 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
    158 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
    159 ; CHECK-NEXT:    kmovw %k0, %k1
    160 ; CHECK-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
    161 ; CHECK-NEXT:    retq
    162   call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
    163   ret void
    164 }
    165 declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
    166 
    167 define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
    168 ; CHECK-LABEL: test_mask_store_32xi8:
    169 ; CHECK:       ## %bb.0:
    170 ; CHECK-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
    171 ; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
    172 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
    173 ; CHECK-NEXT:    kmovd %k0, %k1
    174 ; CHECK-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
    175 ; CHECK-NEXT:    retq
    176   call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
    177   ret void
    178 }
    179 declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
    180 
    181 define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
    182 ; CHECK-LABEL: test_mask_store_8xi16:
    183 ; CHECK:       ## %bb.0:
    184 ; CHECK-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
    185 ; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
    186 ; CHECK-NEXT:    vpmovw2m %zmm0, %k0
    187 ; CHECK-NEXT:    kshiftld $24, %k0, %k0
    188 ; CHECK-NEXT:    kshiftrd $24, %k0, %k1
    189 ; CHECK-NEXT:    vmovdqu16 %zmm1, (%rdi) {%k1}
    190 ; CHECK-NEXT:    retq
    191   call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
    192   ret void
    193 }
    194 declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
    195 
    196 define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
    197 ; CHECK-LABEL: test_mask_store_16xi16:
    198 ; CHECK:       ## %bb.0:
    199 ; CHECK-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
    200 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
    201 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
    202 ; CHECK-NEXT:    kmovw %k0, %k1
    203 ; CHECK-NEXT:    vmovdqu16 %zmm1, (%rdi) {%k1}
    204 ; CHECK-NEXT:    retq
    205   call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
    206   ret void
    207 }
    208 declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
    209