Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
      2 
      3 ; Test based on pr5626 to load/store
      4 ;
      5 
      6 %i32vec3 = type <3 x i32>
      7 define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
      8 ; CHECK: movdqa
      9 ; CHECK: paddd
     10 ; CHECK: pextrd
     11 ; CHECK: movq
     12 	%a = load %i32vec3* %ap, align 16
     13 	%b = load %i32vec3* %bp, align 16
     14 	%x = add %i32vec3 %a, %b
     15 	store %i32vec3 %x, %i32vec3* %ret, align 16
     16 	ret void
     17 }
     18 
     19 define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
     20 ; CHECK: movq
     21 ; CHECK: pinsrd
     22 ; CHECK: movq
     23 ; CHECK: pinsrd
     24 ; CHECK: paddd
     25 ; CHECK: pextrd
     26 ; CHECK: movq
     27 	%a = load %i32vec3* %ap, align 8
     28 	%b = load %i32vec3* %bp, align 8
     29 	%x = add %i32vec3 %a, %b
     30 	store %i32vec3 %x, %i32vec3* %ret, align 8
     31 	ret void
     32 }
     33 
     34 %i32vec7 = type <7 x i32>
     35 define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
     36 ; CHECK: movdqa
     37 ; CHECK: movdqa
     38 ; CHECK: paddd
     39 ; CHECK: paddd
     40 ; CHECK: pextrd
     41 ; CHECK: movq
     42 ; CHECK: movdqa
     43 	%a = load %i32vec7* %ap, align 16
     44 	%b = load %i32vec7* %bp, align 16
     45 	%x = add %i32vec7 %a, %b
     46 	store %i32vec7 %x, %i32vec7* %ret, align 16
     47 	ret void
     48 }
     49 
     50 %i32vec12 = type <12 x i32>
     51 define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
     52 ; CHECK: movdqa
     53 ; CHECK: movdqa
     54 ; CHECK: movdqa
     55 ; CHECK: paddd
     56 ; CHECK: paddd
     57 ; CHECK: paddd
     58 ; CHECK: movdqa
     59 ; CHECK: movdqa
     60 ; CHECK: movdqa
     61 	%a = load %i32vec12* %ap, align 16
     62 	%b = load %i32vec12* %bp, align 16
     63 	%x = add %i32vec12 %a, %b
     64 	store %i32vec12 %x, %i32vec12* %ret, align 16
     65 	ret void
     66 }
     67 
     68 
     69 %i16vec3 = type <3 x i16>
     70 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
     71 ; CHECK: movdqa
     72 ; CHECK: paddw
     73 ; CHECK: movd
     74 ; CHECK: pextrw
     75 	%a = load %i16vec3* %ap, align 16
     76 	%b = load %i16vec3* %bp, align 16
     77 	%x = add %i16vec3 %a, %b
     78 	store %i16vec3 %x, %i16vec3* %ret, align 16
     79 	ret void
     80 }
     81 
     82 %i16vec4 = type <4 x i16>
     83 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
     84 ; CHECK: movdqa
     85 ; CHECK: paddw
     86 ; CHECK: movq
     87 	%a = load %i16vec4* %ap, align 16
     88 	%b = load %i16vec4* %bp, align 16
     89 	%x = add %i16vec4 %a, %b
     90 	store %i16vec4 %x, %i16vec4* %ret, align 16
     91 	ret void
     92 }
     93 
     94 %i16vec12 = type <12 x i16>
     95 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
     96 ; CHECK: movdqa
     97 ; CHECK: movdqa
     98 ; CHECK: paddw
     99 ; CHECK: paddw
    100 ; CHECK: movq
    101 ; CHECK: movdqa
    102 	%a = load %i16vec12* %ap, align 16
    103 	%b = load %i16vec12* %bp, align 16
    104 	%x = add %i16vec12 %a, %b
    105 	store %i16vec12 %x, %i16vec12* %ret, align 16
    106 	ret void
    107 }
    108 
    109 %i16vec18 = type <18 x i16>
    110 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
    111 ; CHECK: movdqa
    112 ; CHECK: movdqa
    113 ; CHECK: movdqa
    114 ; CHECK: paddw
    115 ; CHECK: paddw
    116 ; CHECK: paddw
    117 ; CHECK: movd
    118 ; CHECK: movdqa
    119 ; CHECK: movdqa
    120 	%a = load %i16vec18* %ap, align 16
    121 	%b = load %i16vec18* %bp, align 16
    122 	%x = add %i16vec18 %a, %b
    123 	store %i16vec18 %x, %i16vec18* %ret, align 16
    124 	ret void
    125 }
    126 
    127 
    128 %i8vec3 = type <3 x i8>
    129 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
    130 ; CHECK: movdqa
    131 ; CHECK: paddb
    132 ; CHECK: pextrb
    133 ; CHECK: movb
    134 	%a = load %i8vec3* %ap, align 16
    135 	%b = load %i8vec3* %bp, align 16
    136 	%x = add %i8vec3 %a, %b
    137 	store %i8vec3 %x, %i8vec3* %ret, align 16
    138 	ret void
    139 }
    140 
    141 %i8vec31 = type <31 x i8>
    142 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
    143 ; CHECK: movdqa
    144 ; CHECK: movdqa
    145 ; CHECK: paddb
    146 ; CHECK: paddb
    147 ; CHECK: movq
    148 ; CHECK: pextrb
    149 ; CHECK: pextrw
    150 	%a = load %i8vec31* %ap, align 16
    151 	%b = load %i8vec31* %bp, align 16
    152 	%x = add %i8vec31 %a, %b
    153 	store %i8vec31 %x, %i8vec31* %ret, align 16
    154 	ret void
    155 }
    156 
    157 
    158 %i8vec3pack = type { <3 x i8>, i8 }
    159 define %i8vec3pack  @rot() nounwind {
    160 ; CHECK: shrb
    161 entry:
    162   %X = alloca %i8vec3pack, align 4
    163   %rot = alloca %i8vec3pack, align 4
    164   %result = alloca %i8vec3pack, align 4
    165   %storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
    166   store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
    167   %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
    168   store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
    169   %tmp = load %i8vec3pack* %X
    170   %extractVec = extractvalue %i8vec3pack %tmp, 0
    171   %tmp2 = load %i8vec3pack* %rot
    172   %extractVec3 = extractvalue %i8vec3pack %tmp2, 0
    173   %shr = lshr <3 x i8> %extractVec, %extractVec3
    174   %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
    175   store <3 x i8> %shr, <3 x i8>* %storetmp4
    176   %tmp5 = load %i8vec3pack* %result
    177   ret %i8vec3pack %tmp5
    178 }
    179 
    180