Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
      4 target triple = "nvptx64-nvidia-cuda"
      5 
      6 ; CHECK-LABEL: t1
      7 define <4 x float> @t1(i8* %p1) {
      8 ; CHECK-NOT: ld.v4
      9 ; CHECK-NOT: ld.v2
     10 ; CHECK-NOT: ld.f32
     11 ; CHECK: ld.u8
     12   %cast = bitcast i8* %p1 to <4 x float>*
     13   %r = load <4 x float>, <4 x float>* %cast, align 1
     14   ret <4 x float> %r
     15 }
     16 
     17 ; CHECK-LABEL: t2
     18 define <4 x float> @t2(i8* %p1) {
     19 ; CHECK-NOT: ld.v4
     20 ; CHECK-NOT: ld.v2
     21 ; CHECK: ld.f32
     22   %cast = bitcast i8* %p1 to <4 x float>*
     23   %r = load <4 x float>, <4 x float>* %cast, align 4
     24   ret <4 x float> %r
     25 }
     26 
     27 ; CHECK-LABEL: t3
     28 define <4 x float> @t3(i8* %p1) {
     29 ; CHECK-NOT: ld.v4
     30 ; CHECK: ld.v2
     31   %cast = bitcast i8* %p1 to <4 x float>*
     32   %r = load <4 x float>, <4 x float>* %cast, align 8
     33   ret <4 x float> %r
     34 }
     35 
     36 ; CHECK-LABEL: t4
     37 define <4 x float> @t4(i8* %p1) {
     38 ; CHECK: ld.v4
     39   %cast = bitcast i8* %p1 to <4 x float>*
     40   %r = load <4 x float>, <4 x float>* %cast, align 16
     41   ret <4 x float> %r
     42 }
     43 
     44 ; CHECK-LABEL: .visible .func test_v1halfp0a1(
     45 ; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v1halfp0a1_param_0];
     46 ; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v1halfp0a1_param_1];
     47 ; CHECK-DAG: ld.u8        [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
     48 ; CHECK-DAG: st.u8        [%[[TO]]], [[B0]]
     49 ; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
     50 ; CHECK-DAG: st.u8        [%[[TO]]+1], [[B1]]
     51 ; CHECK: ret
     52 define void @test_v1halfp0a1(<1 x half> * noalias readonly %from, <1 x half> * %to) {
     53   %1 = load <1 x half>, <1 x half> * %from , align 1
     54   store <1 x half> %1, <1 x half> * %to , align 1
     55   ret void
     56 }
     57 
     58 ; CHECK-LABEL: .visible .func test_v2halfp0a1(
     59 ; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v2halfp0a1_param_0];
     60 ; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v2halfp0a1_param_1];
     61 ; CHECK-DAG: ld.u8        [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
     62 ; CHECK-DAG: st.u8        [%[[TO]]],
     63 ; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
     64 ; CHECK-DAG: st.u8        [%[[TO]]+1],
     65 ; CHECK-DAG: ld.u8        [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2]
     66 ; CHECK-DAG: st.u8        [%[[TO]]+2],
     67 ; CHECK-DAG: ld.u8        [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3]
     68 ; CHECK-DAG: st.u8        [%[[TO]]+3],
     69 ; CHECK: ret
     70 define void @test_v2halfp0a1(<2 x half> * noalias readonly %from, <2 x half> * %to) {
     71   %1 = load <2 x half>, <2 x half> * %from , align 1
     72   store <2 x half> %1, <2 x half> * %to , align 1
     73   ret void
     74 }
     75 
     76 ; CHECK-LABEL: .visible .func test_v4halfp0a1(
     77 ; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v4halfp0a1_param_0];
     78 ; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v4halfp0a1_param_1];
     79 ; CHECK-DAG: ld.u8        [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
     80 ; CHECK-DAG: st.u8        [%[[TO]]], [[B0]]
     81 ; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
     82 ; CHECK-DAG: st.u8        [%[[TO]]+1], [[B1]]
     83 ; CHECK-DAG: ld.u8        [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2]
     84 ; CHECK-DAG: st.u8        [%[[TO]]+2], [[B2]]
     85 ; CHECK-DAG: ld.u8        [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3]
     86 ; CHECK-DAG: st.u8        [%[[TO]]+3], [[B3]]
     87 ; CHECK-DAG: ld.u8        [[B4:%r[sd]?[0-9]+]], [%[[FROM]]+4]
     88 ; CHECK-DAG: st.u8        [%[[TO]]+4], [[B4]]
     89 ; CHECK-DAG: ld.u8        [[B5:%r[sd]?[0-9]+]], [%[[FROM]]+5]
     90 ; CHECK-DAG: st.u8        [%[[TO]]+5], [[B5]]
     91 ; CHECK-DAG: ld.u8        [[B6:%r[sd]?[0-9]+]], [%[[FROM]]+6]
     92 ; CHECK-DAG: st.u8        [%[[TO]]+6], [[B6]]
     93 ; CHECK-DAG: ld.u8        [[B7:%r[sd]?[0-9]+]], [%[[FROM]]+7]
     94 ; CHECK-DAG: st.u8        [%[[TO]]+7], [[B7]]
     95 ; CHECK: ret
     96 define void @test_v4halfp0a1(<4 x half> * noalias readonly %from, <4 x half> * %to) {
     97   %1 = load <4 x half>, <4 x half> * %from , align 1
     98   store <4 x half> %1, <4 x half> * %to , align 1
     99   ret void
    100 }
    101 
    102 
    103 ; CHECK-LABEL: s1
    104 define void @s1(<4 x float>* %p1, <4 x float> %v) {
    105 ; CHECK-NOT: st.v4
    106 ; CHECK-NOT: st.v2
    107 ; CHECK-NOT: st.f32
    108 ; CHECK: st.u8
    109   store <4 x float> %v, <4 x float>* %p1, align 1
    110   ret void
    111 }
    112 
    113 ; CHECK-LABEL: s2
    114 define void @s2(<4 x float>* %p1, <4 x float> %v) {
    115 ; CHECK-NOT: st.v4
    116 ; CHECK-NOT: st.v2
    117 ; CHECK: st.f32
    118   store <4 x float> %v, <4 x float>* %p1, align 4
    119   ret void
    120 }
    121 
    122 ; CHECK-LABEL: s3
    123 define void @s3(<4 x float>* %p1, <4 x float> %v) {
    124 ; CHECK-NOT: st.v4
    125   store <4 x float> %v, <4 x float>* %p1, align 8
    126   ret void
    127 }
    128 
    129 ; CHECK-LABEL: s4
    130 define void @s4(<4 x float>* %p1, <4 x float> %v) {
    131 ; CHECK: st.v4
    132   store <4 x float> %v, <4 x float>* %p1, align 16
    133   ret void
    134 }
    135 
    136