1 ; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi soft -o - | FileCheck %s 2 ; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi hard -o - | FileCheck %s -check-prefix CHECK-HARD 3 4 @v2i64 = global <2 x i64> zeroinitializer 5 @v2i32 = global <2 x i32> zeroinitializer 6 @v4i32 = global <4 x i32> zeroinitializer 7 @v4i16 = global <4 x i16> zeroinitializer 8 @v8i16 = global <8 x i16> zeroinitializer 9 @v8i8 = global <8 x i8> zeroinitializer 10 @v16i8 = global <16 x i8> zeroinitializer 11 12 @v2f32 = global <2 x float> zeroinitializer 13 @v2f64 = global <2 x double> zeroinitializer 14 @v4f32 = global <4 x float> zeroinitializer 15 16 17 ; 64 bit conversions 18 define void @conv_i64_to_v8i8( i64 %val, <8 x i8>* %store ) { 19 ; CHECK-LABEL: conv_i64_to_v8i8: 20 ; CHECK: vrev64.8 21 %v = bitcast i64 %val to <8 x i8> 22 %w = load <8 x i8>, <8 x i8>* @v8i8 23 %a = add <8 x i8> %v, %w 24 store <8 x i8> %a, <8 x i8>* %store 25 ret void 26 } 27 28 define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) { 29 ; CHECK-LABEL: conv_v8i8_to_i64: 30 ; CHECK: vrev64.8 31 %v = load <8 x i8>, <8 x i8>* %load 32 %w = load <8 x i8>, <8 x i8>* @v8i8 33 %a = add <8 x i8> %v, %w 34 %f = bitcast <8 x i8> %a to i64 35 call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store ) 36 ret void 37 } 38 39 define void @conv_i64_to_v4i16( i64 %val, <4 x i16>* %store ) { 40 ; CHECK-LABEL: conv_i64_to_v4i16: 41 ; CHECK: vrev64.16 42 %v = bitcast i64 %val to <4 x i16> 43 %w = load <4 x i16>, <4 x i16>* @v4i16 44 %a = add <4 x i16> %v, %w 45 store <4 x i16> %a, <4 x i16>* %store 46 ret void 47 } 48 49 define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) { 50 ; CHECK-LABEL: conv_v4i16_to_i64: 51 ; CHECK: vrev64.16 52 %v = load <4 x i16>, <4 x i16>* %load 53 %w = load <4 x i16>, <4 x i16>* @v4i16 54 %a = add <4 x i16> %v, %w 55 %f = bitcast <4 x i16> %a to i64 56 call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store ) 57 ret void 58 } 59 60 define void @conv_i64_to_v2i32( i64 %val, <2 x i32>* %store ) { 61 ; CHECK-LABEL: conv_i64_to_v2i32: 62 ; CHECK: vrev64.32 63 %v = bitcast i64 %val to <2 x i32> 64 %w = load <2 x i32>, <2 x i32>* @v2i32 65 %a = add <2 x i32> %v, %w 66 store <2 x i32> %a, <2 x i32>* %store 67 ret void 68 } 69 70 define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) { 71 ; CHECK-LABEL: conv_v2i32_to_i64: 72 ; CHECK: vrev64.32 73 %v = load <2 x i32>, <2 x i32>* %load 74 %w = load <2 x i32>, <2 x i32>* @v2i32 75 %a = add <2 x i32> %v, %w 76 %f = bitcast <2 x i32> %a to i64 77 call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store ) 78 ret void 79 } 80 81 define void @conv_i64_to_v2f32( i64 %val, <2 x float>* %store ) { 82 ; CHECK-LABEL: conv_i64_to_v2f32: 83 ; CHECK: vrev64.32 84 %v = bitcast i64 %val to <2 x float> 85 %w = load <2 x float>, <2 x float>* @v2f32 86 %a = fadd <2 x float> %v, %w 87 store <2 x float> %a, <2 x float>* %store 88 ret void 89 } 90 91 define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) { 92 ; CHECK-LABEL: conv_v2f32_to_i64: 93 ; CHECK: vrev64.32 94 %v = load <2 x float>, <2 x float>* %load 95 %w = load <2 x float>, <2 x float>* @v2f32 96 %a = fadd <2 x float> %v, %w 97 %f = bitcast <2 x float> %a to i64 98 call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store ) 99 ret void 100 } 101 102 define void @conv_f64_to_v8i8( double %val, <8 x i8>* %store ) { 103 ; CHECK-LABEL: conv_f64_to_v8i8: 104 ; CHECK: vrev64.8 105 %v = bitcast double %val to <8 x i8> 106 %w = load <8 x i8>, <8 x i8>* @v8i8 107 %a = add <8 x i8> %v, %w 108 store <8 x i8> %a, <8 x i8>* %store 109 ret void 110 } 111 112 define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) { 113 ; CHECK-LABEL: conv_v8i8_to_f64: 114 ; CHECK: vrev64.8 115 %v = load <8 x i8>, <8 x i8>* %load 116 %w = load <8 x i8>, <8 x i8>* @v8i8 117 %a = add <8 x i8> %v, %w 118 %f = bitcast <8 x i8> %a to double 119 call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store ) 120 ret void 121 } 122 123 define void @conv_f64_to_v4i16( double %val, <4 x i16>* %store ) { 124 ; CHECK-LABEL: conv_f64_to_v4i16: 125 ; CHECK: vrev64.16 126 %v = bitcast double %val to <4 x i16> 127 %w = load <4 x i16>, <4 x i16>* @v4i16 128 %a = add <4 x i16> %v, %w 129 store <4 x i16> %a, <4 x i16>* %store 130 ret void 131 } 132 133 define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) { 134 ; CHECK-LABEL: conv_v4i16_to_f64: 135 ; CHECK: vrev64.16 136 %v = load <4 x i16>, <4 x i16>* %load 137 %w = load <4 x i16>, <4 x i16>* @v4i16 138 %a = add <4 x i16> %v, %w 139 %f = bitcast <4 x i16> %a to double 140 call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store ) 141 ret void 142 } 143 144 define void @conv_f64_to_v2i32( double %val, <2 x i32>* %store ) { 145 ; CHECK-LABEL: conv_f64_to_v2i32: 146 ; CHECK: vrev64.32 147 %v = bitcast double %val to <2 x i32> 148 %w = load <2 x i32>, <2 x i32>* @v2i32 149 %a = add <2 x i32> %v, %w 150 store <2 x i32> %a, <2 x i32>* %store 151 ret void 152 } 153 154 define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) { 155 ; CHECK-LABEL: conv_v2i32_to_f64: 156 ; CHECK: vrev64.32 157 %v = load <2 x i32>, <2 x i32>* %load 158 %w = load <2 x i32>, <2 x i32>* @v2i32 159 %a = add <2 x i32> %v, %w 160 %f = bitcast <2 x i32> %a to double 161 call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store ) 162 ret void 163 } 164 165 define void @conv_f64_to_v2f32( double %val, <2 x float>* %store ) { 166 ; CHECK-LABEL: conv_f64_to_v2f32: 167 ; CHECK: vrev64.32 168 %v = bitcast double %val to <2 x float> 169 %w = load <2 x float>, <2 x float>* @v2f32 170 %a = fadd <2 x float> %v, %w 171 store <2 x float> %a, <2 x float>* %store 172 ret void 173 } 174 175 define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) { 176 ; CHECK-LABEL: conv_v2f32_to_f64: 177 ; CHECK: vrev64.32 178 %v = load <2 x float>, <2 x float>* %load 179 %w = load <2 x float>, <2 x float>* @v2f32 180 %a = fadd <2 x float> %v, %w 181 %f = bitcast <2 x float> %a to double 182 call void @conv_f64_to_v2f32( double %f, <2 x float>* %store ) 183 ret void 184 } 185 186 ; 128 bit conversions 187 188 189 define void @conv_i128_to_v16i8( i128 %val, <16 x i8>* %store ) { 190 ; CHECK-LABEL: conv_i128_to_v16i8: 191 ; CHECK: vrev32.8 192 %v = bitcast i128 %val to <16 x i8> 193 %w = load <16 x i8>, <16 x i8>* @v16i8 194 %a = add <16 x i8> %v, %w 195 store <16 x i8> %a, <16 x i8>* %store 196 ret void 197 } 198 199 define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) { 200 ; CHECK-LABEL: conv_v16i8_to_i128: 201 ; CHECK: vrev32.8 202 %v = load <16 x i8>, <16 x i8>* %load 203 %w = load <16 x i8>, <16 x i8>* @v16i8 204 %a = add <16 x i8> %v, %w 205 %f = bitcast <16 x i8> %a to i128 206 call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store ) 207 ret void 208 } 209 210 define void @conv_i128_to_v8i16( i128 %val, <8 x i16>* %store ) { 211 ; CHECK-LABEL: conv_i128_to_v8i16: 212 ; CHECK: vrev32.16 213 %v = bitcast i128 %val to <8 x i16> 214 %w = load <8 x i16>, <8 x i16>* @v8i16 215 %a = add <8 x i16> %v, %w 216 store <8 x i16> %a, <8 x i16>* %store 217 ret void 218 } 219 220 define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) { 221 ; CHECK-LABEL: conv_v8i16_to_i128: 222 ; CHECK: vrev32.16 223 %v = load <8 x i16>, <8 x i16>* %load 224 %w = load <8 x i16>, <8 x i16>* @v8i16 225 %a = add <8 x i16> %v, %w 226 %f = bitcast <8 x i16> %a to i128 227 call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store ) 228 ret void 229 } 230 231 define void @conv_i128_to_v4i32( i128 %val, <4 x i32>* %store ) { 232 ; CHECK-LABEL: conv_i128_to_v4i32: 233 ; CHECK: vrev64.32 234 %v = bitcast i128 %val to <4 x i32> 235 %w = load <4 x i32>, <4 x i32>* @v4i32 236 %a = add <4 x i32> %v, %w 237 store <4 x i32> %a, <4 x i32>* %store 238 ret void 239 } 240 241 define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) { 242 ; CHECK-LABEL: conv_v4i32_to_i128: 243 ; CHECK: vrev64.32 244 %v = load <4 x i32>, <4 x i32>* %load 245 %w = load <4 x i32>, <4 x i32>* @v4i32 246 %a = add <4 x i32> %v, %w 247 %f = bitcast <4 x i32> %a to i128 248 call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store ) 249 ret void 250 } 251 252 define void @conv_i128_to_v4f32( i128 %val, <4 x float>* %store ) { 253 ; CHECK-LABEL: conv_i128_to_v4f32: 254 ; CHECK: vrev64.32 255 %v = bitcast i128 %val to <4 x float> 256 %w = load <4 x float>, <4 x float>* @v4f32 257 %a = fadd <4 x float> %v, %w 258 store <4 x float> %a, <4 x float>* %store 259 ret void 260 } 261 262 define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) { 263 ; CHECK-LABEL: conv_v4f32_to_i128: 264 ; CHECK: vrev64.32 265 %v = load <4 x float>, <4 x float>* %load 266 %w = load <4 x float>, <4 x float>* @v4f32 267 %a = fadd <4 x float> %v, %w 268 %f = bitcast <4 x float> %a to i128 269 call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store ) 270 ret void 271 } 272 273 define void @conv_f128_to_v2f64( fp128 %val, <2 x double>* %store ) { 274 ; CHECK-LABEL: conv_f128_to_v2f64: 275 ; CHECK: vrev64.32 276 %v = bitcast fp128 %val to <2 x double> 277 %w = load <2 x double>, <2 x double>* @v2f64 278 %a = fadd <2 x double> %v, %w 279 store <2 x double> %a, <2 x double>* %store 280 ret void 281 } 282 283 define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) { 284 ; CHECK-LABEL: conv_v2f64_to_f128: 285 ; CHECK: vrev64.32 286 %v = load <2 x double>, <2 x double>* %load 287 %w = load <2 x double>, <2 x double>* @v2f64 288 %a = fadd <2 x double> %v, %w 289 %f = bitcast <2 x double> %a to fp128 290 call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store ) 291 ret void 292 } 293 294 define void @conv_f128_to_v16i8( fp128 %val, <16 x i8>* %store ) { 295 ; CHECK-LABEL: conv_f128_to_v16i8: 296 ; CHECK: vrev32.8 297 %v = bitcast fp128 %val to <16 x i8> 298 %w = load <16 x i8>, <16 x i8>* @v16i8 299 %a = add <16 x i8> %v, %w 300 store <16 x i8> %a, <16 x i8>* %store 301 ret void 302 } 303 304 define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) { 305 ; CHECK-LABEL: conv_v16i8_to_f128: 306 ; CHECK: vrev32.8 307 %v = load <16 x i8>, <16 x i8>* %load 308 %w = load <16 x i8>, <16 x i8>* @v16i8 309 %a = add <16 x i8> %v, %w 310 %f = bitcast <16 x i8> %a to fp128 311 call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store ) 312 ret void 313 } 314 315 define void @conv_f128_to_v8i16( fp128 %val, <8 x i16>* %store ) { 316 ; CHECK-LABEL: conv_f128_to_v8i16: 317 ; CHECK: vrev32.16 318 %v = bitcast fp128 %val to <8 x i16> 319 %w = load <8 x i16>, <8 x i16>* @v8i16 320 %a = add <8 x i16> %v, %w 321 store <8 x i16> %a, <8 x i16>* %store 322 ret void 323 } 324 325 define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) { 326 ; CHECK-LABEL: conv_v8i16_to_f128: 327 ; CHECK: vrev32.16 328 %v = load <8 x i16>, <8 x i16>* %load 329 %w = load <8 x i16>, <8 x i16>* @v8i16 330 %a = add <8 x i16> %v, %w 331 %f = bitcast <8 x i16> %a to fp128 332 call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store ) 333 ret void 334 } 335 336 define void @conv_f128_to_v4f32( fp128 %val, <4 x float>* %store ) { 337 ; CHECK-LABEL: conv_f128_to_v4f32: 338 ; CHECK: vrev64.32 339 %v = bitcast fp128 %val to <4 x float> 340 %w = load <4 x float>, <4 x float>* @v4f32 341 %a = fadd <4 x float> %v, %w 342 store <4 x float> %a, <4 x float>* %store 343 ret void 344 } 345 346 define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) { 347 ; CHECK-LABEL: conv_v4f32_to_f128: 348 ; CHECK: vrev64.32 349 %v = load <4 x float>, <4 x float>* %load 350 %w = load <4 x float>, <4 x float>* @v4f32 351 %a = fadd <4 x float> %v, %w 352 %f = bitcast <4 x float> %a to fp128 353 call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store ) 354 ret void 355 } 356 357 define void @arg_v4i32( <4 x i32> %var, <4 x i32>* %store ) { 358 ; CHECK-LABEL: arg_v4i32: 359 ; CHECK: vmov [[REG2:d[0-9]+]], r3, r2 360 ; CHECK: vmov [[REG1:d[0-9]+]], r1, r0 361 ; CHECK: vst1.64 {[[REG1]], [[REG2]]}, 362 ; CHECK-HARD-LABEL: arg_v4i32: 363 ; CHECK-HARD-NOT: vmov 364 ; CHECK-HARD: vst1.64 {d0, d1} 365 store <4 x i32> %var, <4 x i32>* %store 366 ret void 367 } 368 369 define void @arg_v8i16( <8 x i16> %var, <8 x i16>* %store ) { 370 ; CHECK-LABEL: arg_v8i16: 371 ; CHECK: vmov [[REG2:d[0-9]+]], r3, r2 372 ; CHECK: vmov [[REG1:d[0-9]+]], r1, r0 373 ; CHECK: vst1.64 {[[REG1]], [[REG2]]}, 374 ; CHECK-HARD-LABEL: arg_v8i16: 375 ; CHECK-HARD-NOT: vmov 376 ; CHECK-HARD: vst1.64 {d0, d1} 377 store <8 x i16> %var, <8 x i16>* %store 378 ret void 379 } 380 381 define void @arg_v16i8( <16 x i8> %var, <16 x i8>* %store ) { 382 ; CHECK-LABEL: arg_v16i8: 383 ; CHECK: vmov [[REG2:d[0-9]+]], r3, r2 384 ; CHECK: vmov [[REG1:d[0-9]+]], r1, r0 385 ; CHECK: vst1.64 {[[REG1]], [[REG2]]}, 386 ; CHECK-HARD-LABEL: arg_v16i8: 387 ; CHECK-HARD-NOT: vmov 388 ; CHECK-HARD: vst1.64 {d0, d1} 389 store <16 x i8> %var, <16 x i8>* %store 390 ret void 391 } 392 393