1 ; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s 2 3 ; Test based on pr5626 to load/store 4 ; 5 6 %i32vec3 = type <3 x i32> 7 define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { 8 ; CHECK: movdqa 9 ; CHECK: paddd 10 ; CHECK: pextrd 11 ; CHECK: movq 12 %a = load %i32vec3* %ap, align 16 13 %b = load %i32vec3* %bp, align 16 14 %x = add %i32vec3 %a, %b 15 store %i32vec3 %x, %i32vec3* %ret, align 16 16 ret void 17 } 18 19 define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { 20 ; CHECK: movq 21 ; CHECK: pinsrd 22 ; CHECK: movq 23 ; CHECK: pinsrd 24 ; CHECK: paddd 25 ; CHECK: pextrd 26 ; CHECK: movq 27 %a = load %i32vec3* %ap, align 8 28 %b = load %i32vec3* %bp, align 8 29 %x = add %i32vec3 %a, %b 30 store %i32vec3 %x, %i32vec3* %ret, align 8 31 ret void 32 } 33 34 %i32vec7 = type <7 x i32> 35 define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { 36 ; CHECK: movdqa 37 ; CHECK: movdqa 38 ; CHECK: paddd 39 ; CHECK: paddd 40 ; CHECK: pextrd 41 ; CHECK: movq 42 ; CHECK: movdqa 43 %a = load %i32vec7* %ap, align 16 44 %b = load %i32vec7* %bp, align 16 45 %x = add %i32vec7 %a, %b 46 store %i32vec7 %x, %i32vec7* %ret, align 16 47 ret void 48 } 49 50 %i32vec12 = type <12 x i32> 51 define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { 52 ; CHECK: movdqa 53 ; CHECK: movdqa 54 ; CHECK: movdqa 55 ; CHECK: paddd 56 ; CHECK: paddd 57 ; CHECK: paddd 58 ; CHECK: movdqa 59 ; CHECK: movdqa 60 ; CHECK: movdqa 61 %a = load %i32vec12* %ap, align 16 62 %b = load %i32vec12* %bp, align 16 63 %x = add %i32vec12 %a, %b 64 store %i32vec12 %x, %i32vec12* %ret, align 16 65 ret void 66 } 67 68 69 %i16vec3 = type <3 x i16> 70 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { 71 ; CHECK: movdqa 72 ; CHECK: paddw 73 ; CHECK: movd 74 ; CHECK: pextrw 75 %a = load %i16vec3* %ap, align 16 76 %b = load %i16vec3* %bp, align 16 77 %x = add %i16vec3 %a, %b 78 store %i16vec3 %x, %i16vec3* %ret, align 16 79 ret void 80 } 81 82 %i16vec4 = type <4 x i16> 83 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind { 84 ; CHECK: movdqa 85 ; CHECK: paddw 86 ; CHECK: movq 87 %a = load %i16vec4* %ap, align 16 88 %b = load %i16vec4* %bp, align 16 89 %x = add %i16vec4 %a, %b 90 store %i16vec4 %x, %i16vec4* %ret, align 16 91 ret void 92 } 93 94 %i16vec12 = type <12 x i16> 95 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind { 96 ; CHECK: movdqa 97 ; CHECK: movdqa 98 ; CHECK: paddw 99 ; CHECK: paddw 100 ; CHECK: movq 101 ; CHECK: movdqa 102 %a = load %i16vec12* %ap, align 16 103 %b = load %i16vec12* %bp, align 16 104 %x = add %i16vec12 %a, %b 105 store %i16vec12 %x, %i16vec12* %ret, align 16 106 ret void 107 } 108 109 %i16vec18 = type <18 x i16> 110 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind { 111 ; CHECK: movdqa 112 ; CHECK: movdqa 113 ; CHECK: movdqa 114 ; CHECK: paddw 115 ; CHECK: paddw 116 ; CHECK: paddw 117 ; CHECK: movd 118 ; CHECK: movdqa 119 ; CHECK: movdqa 120 %a = load %i16vec18* %ap, align 16 121 %b = load %i16vec18* %bp, align 16 122 %x = add %i16vec18 %a, %b 123 store %i16vec18 %x, %i16vec18* %ret, align 16 124 ret void 125 } 126 127 128 %i8vec3 = type <3 x i8> 129 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind { 130 ; CHECK: movdqa 131 ; CHECK: paddb 132 ; CHECK: pextrb 133 ; CHECK: movb 134 %a = load %i8vec3* %ap, align 16 135 %b = load %i8vec3* %bp, align 16 136 %x = add %i8vec3 %a, %b 137 store %i8vec3 %x, %i8vec3* %ret, align 16 138 ret void 139 } 140 141 %i8vec31 = type <31 x i8> 142 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind { 143 ; CHECK: movdqa 144 ; CHECK: movdqa 145 ; CHECK: paddb 146 ; CHECK: paddb 147 ; CHECK: movq 148 ; CHECK: pextrb 149 ; CHECK: pextrw 150 %a = load %i8vec31* %ap, align 16 151 %b = load %i8vec31* %bp, align 16 152 %x = add %i8vec31 %a, %b 153 store %i8vec31 %x, %i8vec31* %ret, align 16 154 ret void 155 } 156 157 158 %i8vec3pack = type { <3 x i8>, i8 } 159 define %i8vec3pack @rot() nounwind { 160 ; CHECK: shrb 161 entry: 162 %X = alloca %i8vec3pack, align 4 163 %rot = alloca %i8vec3pack, align 4 164 %result = alloca %i8vec3pack, align 4 165 %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* 166 store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp 167 %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>* 168 store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1 169 %tmp = load %i8vec3pack* %X 170 %extractVec = extractvalue %i8vec3pack %tmp, 0 171 %tmp2 = load %i8vec3pack* %rot 172 %extractVec3 = extractvalue %i8vec3pack %tmp2, 0 173 %shr = lshr <3 x i8> %extractVec, %extractVec3 174 %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>* 175 store <3 x i8> %shr, <3 x i8>* %storetmp4 176 %tmp5 = load %i8vec3pack* %result 177 ret %i8vec3pack %tmp5 178 } 179 180