1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s 2 3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" 4 target triple = "nvptx64-nvidia-cuda" 5 6 ; CHECK-LABEL: t1 7 define <4 x float> @t1(i8* %p1) { 8 ; CHECK-NOT: ld.v4 9 ; CHECK-NOT: ld.v2 10 ; CHECK-NOT: ld.f32 11 ; CHECK: ld.u8 12 %cast = bitcast i8* %p1 to <4 x float>* 13 %r = load <4 x float>, <4 x float>* %cast, align 1 14 ret <4 x float> %r 15 } 16 17 ; CHECK-LABEL: t2 18 define <4 x float> @t2(i8* %p1) { 19 ; CHECK-NOT: ld.v4 20 ; CHECK-NOT: ld.v2 21 ; CHECK: ld.f32 22 %cast = bitcast i8* %p1 to <4 x float>* 23 %r = load <4 x float>, <4 x float>* %cast, align 4 24 ret <4 x float> %r 25 } 26 27 ; CHECK-LABEL: t3 28 define <4 x float> @t3(i8* %p1) { 29 ; CHECK-NOT: ld.v4 30 ; CHECK: ld.v2 31 %cast = bitcast i8* %p1 to <4 x float>* 32 %r = load <4 x float>, <4 x float>* %cast, align 8 33 ret <4 x float> %r 34 } 35 36 ; CHECK-LABEL: t4 37 define <4 x float> @t4(i8* %p1) { 38 ; CHECK: ld.v4 39 %cast = bitcast i8* %p1 to <4 x float>* 40 %r = load <4 x float>, <4 x float>* %cast, align 16 41 ret <4 x float> %r 42 } 43 44 45 ; CHECK-LABEL: s1 46 define void @s1(<4 x float>* %p1, <4 x float> %v) { 47 ; CHECK-NOT: st.v4 48 ; CHECK-NOT: st.v2 49 ; CHECK-NOT: st.f32 50 ; CHECK: st.u8 51 store <4 x float> %v, <4 x float>* %p1, align 1 52 ret void 53 } 54 55 ; CHECK-LABEL: s2 56 define void @s2(<4 x float>* %p1, <4 x float> %v) { 57 ; CHECK-NOT: st.v4 58 ; CHECK-NOT: st.v2 59 ; CHECK: st.f32 60 store <4 x float> %v, <4 x float>* %p1, align 4 61 ret void 62 } 63 64 ; CHECK-LABEL: s3 65 define void @s3(<4 x float>* %p1, <4 x float> %v) { 66 ; CHECK-NOT: st.v4 67 store <4 x float> %v, <4 x float>* %p1, align 8 68 ret void 69 } 70 71 ; CHECK-LABEL: s4 72 define void @s4(<4 x float>* %p1, <4 x float> %v) { 73 ; CHECK: st.v4 74 store <4 x float> %v, <4 x float>* %p1, align 16 75 ret void 76 } 77 78