1 ; RUN: llc < %s -march=x86 -mattr=+mmx 2 3 ;; A basic sanity check to make sure that MMX arithmetic actually compiles. 4 ;; First is a straight translation of the original with bitcasts as needed. 5 6 define void @foo(x86_mmx* %A, x86_mmx* %B) { 7 entry: 8 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] 9 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 10 %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8> 11 %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8> 12 %tmp4 = add <8 x i8> %tmp1a, %tmp3a ; <<8 x i8>> [#uses=2] 13 %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx 14 store x86_mmx %tmp4a, x86_mmx* %A 15 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 16 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] 17 store x86_mmx %tmp12, x86_mmx* %A 18 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 19 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] 20 store x86_mmx %tmp21, x86_mmx* %A 21 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 22 %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8> 23 %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8> 24 %tmp28 = sub <8 x i8> %tmp21a, %tmp27a ; <<8 x i8>> [#uses=2] 25 %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx 26 store x86_mmx %tmp28a, x86_mmx* %A 27 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 28 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] 29 store x86_mmx %tmp36, x86_mmx* %A 30 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 31 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] 32 store x86_mmx %tmp45, x86_mmx* %A 33 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 34 %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8> 35 %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8> 36 %tmp52 = mul <8 x i8> %tmp45a, %tmp51a ; <<8 x i8>> [#uses=2] 37 %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx 38 store x86_mmx %tmp52a, x86_mmx* %A 39 %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 40 %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8> 41 %tmp58 = and <8 x i8> %tmp52, %tmp57a ; <<8 x i8>> [#uses=2] 42 %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx 43 store x86_mmx %tmp58a, x86_mmx* %A 44 %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 45 %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8> 46 %tmp64 = or <8 x i8> %tmp58, %tmp63a ; <<8 x i8>> [#uses=2] 47 %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx 48 store x86_mmx %tmp64a, x86_mmx* %A 49 %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 50 %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8> 51 %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8> 52 %tmp70 = xor <8 x i8> %tmp64b, %tmp69a ; <<8 x i8>> [#uses=1] 53 %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx 54 store x86_mmx %tmp70a, x86_mmx* %A 55 tail call void @llvm.x86.mmx.emms( ) 56 ret void 57 } 58 59 define void @baz(x86_mmx* %A, x86_mmx* %B) { 60 entry: 61 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] 62 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 63 %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32> 64 %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32> 65 %tmp4 = add <2 x i32> %tmp1a, %tmp3a ; <<2 x i32>> [#uses=2] 66 %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx 67 store x86_mmx %tmp4a, x86_mmx* %A 68 %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 69 %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32> 70 %tmp10 = sub <2 x i32> %tmp4, %tmp9a ; <<2 x i32>> [#uses=2] 71 %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx 72 store x86_mmx %tmp10a, x86_mmx* %A 73 %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 74 %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32> 75 %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32> 76 %tmp16 = mul <2 x i32> %tmp10b, %tmp15a ; <<2 x i32>> [#uses=2] 77 %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx 78 store x86_mmx %tmp16a, x86_mmx* %A 79 %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 80 %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32> 81 %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32> 82 %tmp22 = and <2 x i32> %tmp16b, %tmp21a ; <<2 x i32>> [#uses=2] 83 %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx 84 store x86_mmx %tmp22a, x86_mmx* %A 85 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 86 %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32> 87 %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32> 88 %tmp28 = or <2 x i32> %tmp22b, %tmp27a ; <<2 x i32>> [#uses=2] 89 %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx 90 store x86_mmx %tmp28a, x86_mmx* %A 91 %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 92 %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32> 93 %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32> 94 %tmp34 = xor <2 x i32> %tmp28b, %tmp33a ; <<2 x i32>> [#uses=1] 95 %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx 96 store x86_mmx %tmp34a, x86_mmx* %A 97 tail call void @llvm.x86.mmx.emms( ) 98 ret void 99 } 100 101 define void @bar(x86_mmx* %A, x86_mmx* %B) { 102 entry: 103 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] 104 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 105 %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16> 106 %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16> 107 %tmp4 = add <4 x i16> %tmp1a, %tmp3a ; <<4 x i16>> [#uses=2] 108 %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx 109 store x86_mmx %tmp4a, x86_mmx* %A 110 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 111 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] 112 store x86_mmx %tmp12, x86_mmx* %A 113 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 114 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] 115 store x86_mmx %tmp21, x86_mmx* %A 116 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 117 %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16> 118 %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16> 119 %tmp28 = sub <4 x i16> %tmp21a, %tmp27a ; <<4 x i16>> [#uses=2] 120 %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx 121 store x86_mmx %tmp28a, x86_mmx* %A 122 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 123 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] 124 store x86_mmx %tmp36, x86_mmx* %A 125 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 126 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] 127 store x86_mmx %tmp45, x86_mmx* %A 128 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 129 %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16> 130 %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16> 131 %tmp52 = mul <4 x i16> %tmp45a, %tmp51a ; <<4 x i16>> [#uses=2] 132 %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx 133 store x86_mmx %tmp52a, x86_mmx* %A 134 %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 135 %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2] 136 store x86_mmx %tmp60, x86_mmx* %A 137 %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 138 %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1] 139 %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2] 140 store x86_mmx %tmp70, x86_mmx* %A 141 %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 142 %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16> 143 %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16> 144 %tmp76 = and <4 x i16> %tmp70a, %tmp75a ; <<4 x i16>> [#uses=2] 145 %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx 146 store x86_mmx %tmp76a, x86_mmx* %A 147 %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 148 %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16> 149 %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16> 150 %tmp82 = or <4 x i16> %tmp76b, %tmp81a ; <<4 x i16>> [#uses=2] 151 %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx 152 store x86_mmx %tmp82a, x86_mmx* %A 153 %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 154 %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16> 155 %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16> 156 %tmp88 = xor <4 x i16> %tmp82b, %tmp87a ; <<4 x i16>> [#uses=1] 157 %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx 158 store x86_mmx %tmp88a, x86_mmx* %A 159 tail call void @llvm.x86.mmx.emms( ) 160 ret void 161 } 162 163 ;; The following is modified to use MMX intrinsics everywhere they work. 164 165 define void @fooa(x86_mmx* %A, x86_mmx* %B) { 166 entry: 167 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] 168 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 169 %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2] 170 store x86_mmx %tmp4, x86_mmx* %A 171 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 172 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] 173 store x86_mmx %tmp12, x86_mmx* %A 174 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 175 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] 176 store x86_mmx %tmp21, x86_mmx* %A 177 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 178 %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2] 179 store x86_mmx %tmp28, x86_mmx* %A 180 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 181 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] 182 store x86_mmx %tmp36, x86_mmx* %A 183 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 184 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] 185 store x86_mmx %tmp45, x86_mmx* %A 186 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 187 %tmp51a = bitcast x86_mmx %tmp51 to i64 188 %tmp51aa = bitcast i64 %tmp51a to <8 x i8> 189 %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8> 190 %tmp52 = mul <8 x i8> %tmp51b, %tmp51aa ; <x86_mmx> [#uses=2] 191 %tmp52a = bitcast <8 x i8> %tmp52 to i64 192 %tmp52aa = bitcast i64 %tmp52a to x86_mmx 193 store x86_mmx %tmp52aa, x86_mmx* %A 194 %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 195 %tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 ) ; <x86_mmx> [#uses=2] 196 store x86_mmx %tmp58, x86_mmx* %A 197 %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 198 %tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 ) ; <x86_mmx> [#uses=2] 199 store x86_mmx %tmp64, x86_mmx* %A 200 %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 201 %tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 ) ; <x86_mmx> [#uses=2] 202 store x86_mmx %tmp70, x86_mmx* %A 203 tail call void @llvm.x86.mmx.emms( ) 204 ret void 205 } 206 207 define void @baza(x86_mmx* %A, x86_mmx* %B) { 208 entry: 209 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] 210 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 211 %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2] 212 store x86_mmx %tmp4, x86_mmx* %A 213 %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 214 %tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 ) ; <x86_mmx> [#uses=2] 215 store x86_mmx %tmp10, x86_mmx* %A 216 %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 217 %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32> 218 %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32> 219 %tmp16 = mul <2 x i32> %tmp10a, %tmp15a ; <x86_mmx> [#uses=2] 220 %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx 221 store x86_mmx %tmp16a, x86_mmx* %A 222 %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 223 %tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 ) ; <x86_mmx> [#uses=2] 224 store x86_mmx %tmp22, x86_mmx* %A 225 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 226 %tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2] 227 store x86_mmx %tmp28, x86_mmx* %A 228 %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 229 %tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 ) ; <x86_mmx> [#uses=2] 230 store x86_mmx %tmp34, x86_mmx* %A 231 tail call void @llvm.x86.mmx.emms( ) 232 ret void 233 } 234 235 define void @bara(x86_mmx* %A, x86_mmx* %B) { 236 entry: 237 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1] 238 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 239 %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2] 240 store x86_mmx %tmp4, x86_mmx* %A 241 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 242 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2] 243 store x86_mmx %tmp12, x86_mmx* %A 244 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 245 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2] 246 store x86_mmx %tmp21, x86_mmx* %A 247 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 248 %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2] 249 store x86_mmx %tmp28, x86_mmx* %A 250 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 251 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2] 252 store x86_mmx %tmp36, x86_mmx* %A 253 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 254 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2] 255 store x86_mmx %tmp45, x86_mmx* %A 256 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 257 %tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 ) ; <x86_mmx> [#uses=2] 258 store x86_mmx %tmp52, x86_mmx* %A 259 %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 260 %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2] 261 store x86_mmx %tmp60, x86_mmx* %A 262 %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 263 %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1] 264 %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2] 265 store x86_mmx %tmp70, x86_mmx* %A 266 %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 267 %tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 ) ; <x86_mmx> [#uses=2] 268 store x86_mmx %tmp76, x86_mmx* %A 269 %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 270 %tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 ) ; <x86_mmx> [#uses=2] 271 store x86_mmx %tmp82, x86_mmx* %A 272 %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1] 273 %tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 ) ; <x86_mmx> [#uses=2] 274 store x86_mmx %tmp88, x86_mmx* %A 275 tail call void @llvm.x86.mmx.emms( ) 276 ret void 277 } 278 279 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) 280 281 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) 282 283 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) 284 285 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) 286 287 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) 288 289 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) 290 291 declare void @llvm.x86.mmx.emms() 292 293 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) 294 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) 295 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) 296 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) 297 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) 298 declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx) 299 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) 300 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) 301 declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx) 302 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) 303 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) 304 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) 305 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) 306 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) 307 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) 308 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) 309 310