Home | History | Annotate | Download | only in NVPTX
      1 ; Verifies correctness of load/store of parameters and return values.
      2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
      3 
      4 %s_i1 = type { i1 }
      5 %s_i8 = type { i8 }
      6 %s_i16 = type { i16 }
      7 %s_f16 = type { half }
      8 %s_i32 = type { i32 }
      9 %s_f32 = type { float }
     10 %s_i64 = type { i64 }
     11 %s_f64 = type { double }
     12 
     13 ; More complicated types. i64 is used to increase natural alignment
     14 ; requirement for the type.
     15 %s_i32x4 = type { i32, i32, i32, i32, i64}
     16 %s_i32f32 = type { i32, float, i32, float, i64}
     17 %s_i8i32x4 = type { i32, i32, i8, i32, i32, i64}
     18 %s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}>
     19 %s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]}
     20 ; All scalar parameters must be at least 32 bits in size.
     21 ; i1 is loaded/stored as i8.
     22 
     23 ; CHECK: .func  (.param .b32 func_retval0)
     24 ; CHECK-LABEL: test_i1(
     25 ; CHECK-NEXT: .param .b32 test_i1_param_0
     26 ; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0];
     27 ; CHECK:      and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
     28 ; CHECK:      setp.eq.b16 %p1, [[A]], 1
     29 ; CHECK:      cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
     30 ; CHECK:      and.b32 [[C:%r[0-9]+]], [[B]], 1;
     31 ; CHECK:      .param .b32 param0;
     32 ; CHECK:      st.param.b32    [param0+0], [[C]]
     33 ; CHECK:      .param .b32 retval0;
     34 ; CHECK:      call.uni
     35 ; CHECK-NEXT: test_i1,
     36 ; CHECK:      ld.param.b32    [[R8:%r[0-9]+]], [retval0+0];
     37 ; CHECK:      and.b32         [[R:%r[0-9]+]], [[R8]], 1;
     38 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
     39 ; CHECK:      ret;
     40 define i1 @test_i1(i1 %a) {
     41   %r = tail call i1 @test_i1(i1 %a);
     42   ret i1 %r;
     43 }
     44 
     45 ; Signed i1 is a somewhat special case. We only care about one bit and
     46 ; then us neg.s32 to convert it to 32-bit -1 if it's set.
     47 ; CHECK: .func  (.param .b32 func_retval0)
     48 ; CHECK-LABEL: test_i1s(
     49 ; CHECK-NEXT: .param .b32 test_i1s_param_0
     50 ; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
     51 ; CHECK:      cvt.u32.u16     [[A32:%r[0-9]+]], [[A8]];
     52 ; CHECK:      and.b32         [[A1:%r[0-9]+]], [[A32]], 1;
     53 ; CHECK:      neg.s32         [[A:%r[0-9]+]], [[A1]];
     54 ; CHECK:      .param .b32 param0;
     55 ; CHECK:      st.param.b32    [param0+0], [[A]];
     56 ; CHECK:      .param .b32 retval0;
     57 ; CHECK:      call.uni
     58 ; CHECK:      ld.param.b32    [[R8:%r[0-9]+]], [retval0+0];
     59 ; CHECK:      and.b32         [[R1:%r[0-9]+]], [[R8]], 1;
     60 ; CHECK:      neg.s32         [[R:%r[0-9]+]], [[R1]];
     61 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
     62 ; CHECK-NEXT: ret;
     63 define signext i1 @test_i1s(i1 signext %a) {
     64        %r = tail call signext i1 @test_i1s(i1 signext %a);
     65        ret i1 %r;
     66 }
     67 
     68 ; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment.
     69 ; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
     70 ; CHECK-LABEL: test_v3i1(
     71 ; CHECK-NEXT: .param .align 4 .b8 test_v3i1_param_0[4]
     72 ; CHECK-DAG:  ld.param.u8     [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
     73 ; CHECK-DAG:  ld.param.v2.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i1_param_0]
     74 ; CHECK:      .param .align 4 .b8 param0[4];
     75 ; CHECK-DAG:  st.param.v2.b8  [param0+0], {[[E0]], [[E1]]};
     76 ; CHECK-DAG:  st.param.b8     [param0+2], [[E2]];
     77 ; CHECK:      .param .align 4 .b8 retval0[4];
     78 ; CHECK:      call.uni (retval0),
     79 ; CHECK-NEXT: test_v3i1,
     80 ; CHECK-DAG:  ld.param.v2.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
     81 ; CHECK-DAG:  ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+2];
     82 ; CHECK-DAG:  st.param.v2.b8  [func_retval0+0], {[[RE0]], [[RE1]]}
     83 ; CHECK-DAG:  st.param.b8     [func_retval0+2], [[RE2]];
     84 ; CHECK-NEXT: ret;
     85 define <3 x i1> @test_v3i1(<3 x i1> %a) {
     86        %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a);
     87        ret <3 x i1> %r;
     88 }
     89 
     90 ; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
     91 ; CHECK-LABEL: test_v4i1(
     92 ; CHECK-NEXT: .param .align 4 .b8 test_v4i1_param_0[4]
     93 ; CHECK:      ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i1_param_0]
     94 ; CHECK:      .param .align 4 .b8 param0[4];
     95 ; CHECK:      st.param.v4.b8  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
     96 ; CHECK:      .param .align 4 .b8 retval0[4];
     97 ; CHECK:      call.uni (retval0),
     98 ; CHECK:      test_v4i1,
     99 ; CHECK:      ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
    100 ; CHECK:      st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]};
    101 ; CHECK-NEXT: ret;
    102 define <4 x i1> @test_v4i1(<4 x i1> %a) {
    103        %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a);
    104        ret <4 x i1> %r;
    105 }
    106 
    107 ; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
    108 ; CHECK-LABEL: test_v5i1(
    109 ; CHECK-NEXT: .param .align 8 .b8 test_v5i1_param_0[8]
    110 ; CHECK-DAG:  ld.param.u8     [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
    111 ; CHECK-DAG:  ld.param.v4.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i1_param_0]
    112 ; CHECK:      .param .align 8 .b8 param0[8];
    113 ; CHECK-DAG:  st.param.v4.b8  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
    114 ; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
    115 ; CHECK:      .param .align 8 .b8 retval0[8];
    116 ; CHECK:      call.uni (retval0),
    117 ; CHECK-NEXT: test_v5i1,
    118 ; CHECK-DAG:  ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
    119 ; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
    120 ; CHECK-DAG:  st.param.v4.b8  [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
    121 ; CHECK-DAG:  st.param.b8     [func_retval0+4], [[RE4]];
    122 ; CHECK-NEXT: ret;
    123 define <5 x i1> @test_v5i1(<5 x i1> %a) {
    124        %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a);
    125        ret <5 x i1> %r;
    126 }
    127 
    128 ; Unsigned i8 is loaded directly into 32-bit register.
    129 ; CHECK: .func  (.param .b32 func_retval0)
    130 ; CHECK-LABEL: test_i8(
    131 ; CHECK-NEXT: .param .b32 test_i8_param_0
    132 ; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0];
    133 ; CHECK:      cvt.u32.u16     [[A32:%r[0-9]+]], [[A8]];
    134 ; CHECK:      and.b32         [[A:%r[0-9]+]], [[A32]], 255;
    135 ; CHECK:      .param .b32 param0;
    136 ; CHECK:      st.param.b32    [param0+0], [[A]];
    137 ; CHECK:      .param .b32 retval0;
    138 ; CHECK:      call.uni (retval0),
    139 ; CHECK:      test_i8,
    140 ; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0+0];
    141 ; CHECK:      and.b32         [[R:%r[0-9]+]], [[R32]], 255;
    142 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    143 ; CHECK-NEXT: ret;
    144 define i8 @test_i8(i8 %a) {
    145        %r = tail call i8 @test_i8(i8 %a);
    146        ret i8 %r;
    147 }
    148 
    149 ; signed i8 is loaded into 16-bit register which is then sign-extended to i32.
    150 ; CHECK: .func  (.param .b32 func_retval0)
    151 ; CHECK-LABEL: test_i8s(
    152 ; CHECK-NEXT: .param .b32 test_i8s_param_0
    153 ; CHECK:      ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
    154 ; CHECK:      cvt.s32.s16     [[A:%r[0-9]+]], [[A8]];
    155 ; CHECK:      .param .b32 param0;
    156 ; CHECK:      st.param.b32    [param0+0], [[A]];
    157 ; CHECK:      .param .b32 retval0;
    158 ; CHECK:      call.uni (retval0),
    159 ; CHECK:      test_i8s,
    160 ; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0+0];
    161 ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
    162 ; CHECK:      cvt.u16.u32     [[R16:%rs[0-9]+]], [[R32]];
    163 ; CHECK:      cvt.s32.s16     [[R:%r[0-9]+]], [[R16]];
    164 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    165 ; CHECK-NEXT: ret;
    166 define signext i8 @test_i8s(i8 signext %a) {
    167        %r = tail call signext i8 @test_i8s(i8 signext %a);
    168        ret i8 %r;
    169 }
    170 
    171 ; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
    172 ; CHECK-LABEL: test_v3i8(
    173 ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
    174 ; CHECK-DAG:  ld.param.u8     [[E2:%rs[0-9]+]], [test_v3i8_param_0+2];
    175 ; CHECK-DAG:  ld.param.v2.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0];
    176 ; CHECK:      .param .align 4 .b8 param0[4];
    177 ; CHECK:      st.param.v2.b8  [param0+0], {[[E0]], [[E1]]};
    178 ; CHECK:      st.param.b8     [param0+2], [[E2]];
    179 ; CHECK:      .param .align 4 .b8 retval0[4];
    180 ; CHECK:      call.uni (retval0),
    181 ; CHECK-NEXT: test_v3i8,
    182 ; CHECK-DAG:  ld.param.v2.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
    183 ; CHECK-DAG:  ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+2];
    184 ; CHECK-DAG:  st.param.v2.b8  [func_retval0+0], {[[RE0]], [[RE1]]};
    185 ; CHECK-DAG:  st.param.b8     [func_retval0+2], [[RE2]];
    186 ; CHECK-NEXT: ret;
    187 define <3 x i8> @test_v3i8(<3 x i8> %a) {
    188        %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a);
    189        ret <3 x i8> %r;
    190 }
    191 
    192 ; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
    193 ; CHECK-LABEL: test_v4i8(
    194 ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
    195 ; CHECK:      ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0]
    196 ; CHECK:      .param .align 4 .b8 param0[4];
    197 ; CHECK:      st.param.v4.b8  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
    198 ; CHECK:      .param .align 4 .b8 retval0[4];
    199 ; CHECK:      call.uni (retval0),
    200 ; CHECK-NEXT: test_v4i8,
    201 ; CHECK:      ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
    202 ; CHECK:      st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
    203 ; CHECK-NEXT: ret;
    204 define <4 x i8> @test_v4i8(<4 x i8> %a) {
    205        %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a);
    206        ret <4 x i8> %r;
    207 }
    208 
    209 ; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
    210 ; CHECK-LABEL: test_v5i8(
    211 ; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
    212 ; CHECK-DAG:  ld.param.u8     [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
    213 ; CHECK-DAG   ld.param.v4.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0]
    214 ; CHECK:      .param .align 8 .b8 param0[8];
    215 ; CHECK-DAG:  st.param.v4.b8  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
    216 ; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
    217 ; CHECK:      .param .align 8 .b8 retval0[8];
    218 ; CHECK:      call.uni (retval0),
    219 ; CHECK-NEXT: test_v5i8,
    220 ; CHECK-DAG:  ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
    221 ; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
    222 ; CHECK-DAG:  st.param.v4.b8  [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
    223 ; CHECK-DAG:  st.param.b8     [func_retval0+4], [[RE4]];
    224 ; CHECK-NEXT: ret;
    225 define <5 x i8> @test_v5i8(<5 x i8> %a) {
    226        %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a);
    227        ret <5 x i8> %r;
    228 }
    229 
    230 ; CHECK: .func  (.param .b32 func_retval0)
    231 ; CHECK-LABEL: test_i16(
    232 ; CHECK-NEXT: .param .b32 test_i16_param_0
    233 ; CHECK:      ld.param.u16    [[E16:%rs[0-9]+]], [test_i16_param_0];
    234 ; CHECK:      cvt.u32.u16     [[E32:%r[0-9]+]], [[E16]];
    235 ; CHECK:      .param .b32 param0;
    236 ; CHECK:      st.param.b32    [param0+0], [[E32]];
    237 ; CHECK:      .param .b32 retval0;
    238 ; CHECK:      call.uni (retval0),
    239 ; CHECK-NEXT: test_i16,
    240 ; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0+0];
    241 ; CHECK:      and.b32         [[R:%r[0-9]+]], [[RE32]], 65535;
    242 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    243 ; CHECK-NEXT: ret;
    244 define i16 @test_i16(i16 %a) {
    245        %r = tail call i16 @test_i16(i16 %a);
    246        ret i16 %r;
    247 }
    248 
    249 ; CHECK: .func  (.param .b32 func_retval0)
    250 ; CHECK-LABEL: test_i16s(
    251 ; CHECK-NEXT: .param .b32 test_i16s_param_0
    252 ; CHECK:      ld.param.u16    [[E16:%rs[0-9]+]], [test_i16s_param_0];
    253 ; CHECK:      cvt.s32.s16     [[E32:%r[0-9]+]], [[E16]];
    254 ; CHECK:      .param .b32 param0;
    255 ; CHECK:      st.param.b32    [param0+0], [[E32]];
    256 ; CHECK:      .param .b32 retval0;
    257 ; CHECK:      call.uni (retval0),
    258 ; CHECK-NEXT: test_i16s,
    259 ; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0+0];
    260 ; CHECK:      cvt.s32.s16     [[R:%r[0-9]+]], [[RE32]];
    261 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    262 ; CHECK-NEXT: ret;
    263 define signext i16 @test_i16s(i16 signext %a) {
    264        %r = tail call signext i16 @test_i16s(i16 signext %a);
    265        ret i16 %r;
    266 }
    267 
    268 ; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
    269 ; CHECK-LABEL: test_v3i16(
    270 ; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
    271 ; CHECK-DAG:  ld.param.u16    [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
    272 ; CHECK-DAG:  ld.param.v2.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0];
    273 ; CHECK:      .param .align 8 .b8 param0[8];
    274 ; CHECK:      st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
    275 ; CHECK:      st.param.b16    [param0+4], [[E2]];
    276 ; CHECK:      .param .align 8 .b8 retval0[8];
    277 ; CHECK:      call.uni (retval0),
    278 ; CHECK-NEXT: test_v3i16,
    279 ; CHECK:      ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
    280 ; CHECK:      ld.param.b16    [[RE2:%rs[0-9]+]], [retval0+4];
    281 ; CHECK-DAG:  st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]};
    282 ; CHECK-DAG:  st.param.b16    [func_retval0+4], [[RE2]];
    283 ; CHECK-NEXT: ret;
    284 define <3 x i16> @test_v3i16(<3 x i16> %a) {
    285        %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a);
    286        ret <3 x i16> %r;
    287 }
    288 
    289 ; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
    290 ; CHECK-LABEL: test_v4i16(
    291 ; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
    292 ; CHECK:      ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i16_param_0]
    293 ; CHECK:      .param .align 8 .b8 param0[8];
    294 ; CHECK:      st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
    295 ; CHECK:      .param .align 8 .b8 retval0[8];
    296 ; CHECK:      call.uni (retval0),
    297 ; CHECK-NEXT: test_v4i16,
    298 ; CHECK:      ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
    299 ; CHECK:      st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
    300 ; CHECK-NEXT: ret;
    301 define <4 x i16> @test_v4i16(<4 x i16> %a) {
    302        %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a);
    303        ret <4 x i16> %r;
    304 }
    305 
    306 ; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
    307 ; CHECK-LABEL: test_v5i16(
    308 ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
    309 ; CHECK-DAG:  ld.param.u16    [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
    310 ; CHECK-DAG   ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
    311 ; CHECK:      .param .align 16 .b8 param0[16];
    312 ; CHECK-DAG:  st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
    313 ; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
    314 ; CHECK:      .param .align 16 .b8 retval0[16];
    315 ; CHECK:      call.uni (retval0),
    316 ; CHECK-NEXT: test_v5i16,
    317 ; CHECK-DAG:  ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
    318 ; CHECK-DAG:  ld.param.b16    [[RE4:%rs[0-9]+]], [retval0+8];
    319 ; CHECK-DAG:  st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
    320 ; CHECK-DAG:  st.param.b16    [func_retval0+8], [[RE4]];
    321 ; CHECK-NEXT: ret;
    322 define <5 x i16> @test_v5i16(<5 x i16> %a) {
    323        %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a);
    324        ret <5 x i16> %r;
    325 }
    326 
    327 ; CHECK: .func  (.param .b32 func_retval0)
    328 ; CHECK-LABEL: test_f16(
    329 ; CHECK-NEXT: .param .b32 test_f16_param_0
    330 ; CHECK:      ld.param.b16    [[E:%h[0-9]+]], [test_f16_param_0];
    331 ; CHECK:      .param .b32 param0;
    332 ; CHECK:      st.param.b16    [param0+0], [[E]];
    333 ; CHECK:      .param .b32 retval0;
    334 ; CHECK:      call.uni (retval0),
    335 ; CHECK-NEXT: test_f16,
    336 ; CHECK:      ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
    337 ; CHECK:      st.param.b16    [func_retval0+0], [[R]]
    338 ; CHECK-NEXT: ret;
    339 define half @test_f16(half %a) {
    340        %r = tail call half @test_f16(half %a);
    341        ret half %r;
    342 }
    343 
    344 ; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
    345 ; CHECK-LABEL: test_v2f16(
    346 ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
    347 ; CHECK:      ld.param.b32    [[E:%hh[0-9]+]], [test_v2f16_param_0];
    348 ; CHECK:      .param .align 4 .b8 param0[4];
    349 ; CHECK:      st.param.b32    [param0+0], [[E]];
    350 ; CHECK:      .param .align 4 .b8 retval0[4];
    351 ; CHECK:      call.uni (retval0),
    352 ; CHECK-NEXT: test_v2f16,
    353 ; CHECK:      ld.param.b32    [[R:%hh[0-9]+]], [retval0+0];
    354 ; CHECK:      st.param.b32    [func_retval0+0], [[R]]
    355 ; CHECK-NEXT: ret;
    356 define <2 x half> @test_v2f16(<2 x half> %a) {
    357        %r = tail call <2 x half> @test_v2f16(<2 x half> %a);
    358        ret <2 x half> %r;
    359 }
    360 
    361 ; CHECK:.func  (.param .align 8 .b8 func_retval0[8])
    362 ; CHECK-LABEL: test_v3f16(
    363 ; CHECK:      .param .align 8 .b8 test_v3f16_param_0[8]
    364 ; CHECK-DAG:  ld.param.b32    [[HH01:%hh[0-9]+]], [test_v3f16_param_0];
    365 ; CHECK-DAG:  mov.b32         {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
    366 ; CHECK-DAG:  ld.param.b16    [[E2:%h[0-9]+]], [test_v3f16_param_0+4];
    367 ; CHECK:      .param .align 8 .b8 param0[8];
    368 ; CHECK-DAG:  st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
    369 ; CHECK-DAG:  st.param.b16    [param0+4], [[E2]];
    370 ; CHECK:      .param .align 8 .b8 retval0[8];
    371 ; CHECK:      call.uni (retval0),
    372 ; CHECK:      test_v3f16,
    373 ; CHECK-DAG:  ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0];
    374 ; CHECK-DAG:  ld.param.b16    [[R2:%h[0-9]+]], [retval0+4];
    375 ; CHECK-DAG:  st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]};
    376 ; CHECK-DAG:  st.param.b16    [func_retval0+4], [[R2]];
    377 ; CHECK:      ret;
    378 define <3 x half> @test_v3f16(<3 x half> %a) {
    379        %r = tail call <3 x half> @test_v3f16(<3 x half> %a);
    380        ret <3 x half> %r;
    381 }
    382 
    383 ; CHECK:.func  (.param .align 8 .b8 func_retval0[8])
    384 ; CHECK-LABEL: test_v4f16(
    385 ; CHECK:      .param .align 8 .b8 test_v4f16_param_0[8]
    386 ; CHECK:      ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
    387 ; CHECK-DAG:  mov.b32         [[HH01:%hh[0-9]+]], [[R01]];
    388 ; CHECK-DAG:  mov.b32         [[HH23:%hh[0-9]+]], [[R23]];
    389 ; CHECK:      .param .align 8 .b8 param0[8];
    390 ; CHECK:      st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]};
    391 ; CHECK:      .param .align 8 .b8 retval0[8];
    392 ; CHECK:      call.uni (retval0),
    393 ; CHECK:      test_v4f16,
    394 ; CHECK:      ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0];
    395 ; CHECK:      st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]};
    396 ; CHECK:      ret;
    397 define <4 x half> @test_v4f16(<4 x half> %a) {
    398        %r = tail call <4 x half> @test_v4f16(<4 x half> %a);
    399        ret <4 x half> %r;
    400 }
    401 
    402 ; CHECK:.func  (.param .align 16 .b8 func_retval0[16])
    403 ; CHECK-LABEL: test_v5f16(
    404 ; CHECK:      .param .align 16 .b8 test_v5f16_param_0[16]
    405 ; CHECK-DAG:  ld.param.v4.b16  {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0];
    406 ; CHECK-DAG:  mov.b32         {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
    407 ; CHECK-DAG:  ld.param.b16    [[E4:%h[0-9]+]], [test_v5f16_param_0+8];
    408 ; CHECK:      .param .align 16 .b8 param0[16];
    409 ; CHECK-DAG:  st.param.v4.b16 [param0+0],
    410 ; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
    411 ; CHECK:      .param .align 16 .b8 retval0[16];
    412 ; CHECK:      call.uni (retval0),
    413 ; CHECK:      test_v5f16,
    414 ; CHECK-DAG:  ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
    415 ; CHECK-DAG:  ld.param.b16    [[R4:%h[0-9]+]], [retval0+8];
    416 ; CHECK-DAG:  st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
    417 ; CHECK-DAG:  st.param.b16    [func_retval0+8], [[R4]];
    418 ; CHECK:      ret;
    419 define <5 x half> @test_v5f16(<5 x half> %a) {
    420        %r = tail call <5 x half> @test_v5f16(<5 x half> %a);
    421        ret <5 x half> %r;
    422 }
    423 
    424 ; CHECK:.func  (.param .align 16 .b8 func_retval0[16])
    425 ; CHECK-LABEL: test_v8f16(
    426 ; CHECK:      .param .align 16 .b8 test_v8f16_param_0[16]
    427 ; CHECK:      ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
    428 ; CHECK-DAG:  mov.b32         [[HH01:%hh[0-9]+]], [[R01]];
    429 ; CHECK-DAG:  mov.b32         [[HH23:%hh[0-9]+]], [[R23]];
    430 ; CHECK-DAG:  mov.b32         [[HH45:%hh[0-9]+]], [[R45]];
    431 ; CHECK-DAG:  mov.b32         [[HH67:%hh[0-9]+]], [[R67]];
    432 ; CHECK:      .param .align 16 .b8 param0[16];
    433 ; CHECK:      st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]};
    434 ; CHECK:      .param .align 16 .b8 retval0[16];
    435 ; CHECK:      call.uni (retval0),
    436 ; CHECK:      test_v8f16,
    437 ; CHECK:      ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0];
    438 ; CHECK:      st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
    439 ; CHECK:      ret;
    440 define <8 x half> @test_v8f16(<8 x half> %a) {
    441        %r = tail call <8 x half> @test_v8f16(<8 x half> %a);
    442        ret <8 x half> %r;
    443 }
    444 
    445 ; CHECK:.func  (.param .align 32 .b8 func_retval0[32])
    446 ; CHECK-LABEL: test_v9f16(
    447 ; CHECK:      .param .align 32 .b8 test_v9f16_param_0[32]
    448 ; CHECK-DAG:  ld.param.v4.b16  {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0];
    449 ; CHECK-DAG:  ld.param.v4.b16  {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8];
    450 ; CHECK-DAG:  ld.param.b16     [[E8:%h[0-9]+]], [test_v9f16_param_0+16];
    451 ; CHECK:      .param .align 32 .b8 param0[32];
    452 ; CHECK-DAG:  st.param.v4.b16 [param0+0],
    453 ; CHECK-DAG:  st.param.v4.b16 [param0+8],
    454 ; CHECK-DAG:  st.param.b16    [param0+16], [[E8]];
    455 ; CHECK:      .param .align 32 .b8 retval0[32];
    456 ; CHECK:      call.uni (retval0),
    457 ; CHECK:      test_v9f16,
    458 ; CHECK-DAG:  ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
    459 ; CHECK-DAG:  ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8];
    460 ; CHECK-DAG:  ld.param.b16    [[R8:%h[0-9]+]], [retval0+16];
    461 ; CHECK-DAG:  st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
    462 ; CHECK-DAG:  st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
    463 ; CHECK-DAG:  st.param.b16    [func_retval0+16], [[R8]];
    464 ; CHECK:      ret;
    465 define <9 x half> @test_v9f16(<9 x half> %a) {
    466        %r = tail call <9 x half> @test_v9f16(<9 x half> %a);
    467        ret <9 x half> %r;
    468 }
    469 
    470 ; CHECK: .func  (.param .b32 func_retval0)
    471 ; CHECK-LABEL: test_i32(
    472 ; CHECK-NEXT: .param .b32 test_i32_param_0
    473 ; CHECK:      ld.param.u32    [[E:%r[0-9]+]], [test_i32_param_0];
    474 ; CHECK:      .param .b32 param0;
    475 ; CHECK:      st.param.b32    [param0+0], [[E]];
    476 ; CHECK:      .param .b32 retval0;
    477 ; CHECK:      call.uni (retval0),
    478 ; CHECK-NEXT: test_i32,
    479 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0+0];
    480 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    481 ; CHECK-NEXT: ret;
    482 define i32 @test_i32(i32 %a) {
    483        %r = tail call i32 @test_i32(i32 %a);
    484        ret i32 %r;
    485 }
    486 
    487 ; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
    488 ; CHECK-LABEL: test_v3i32(
    489 ; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
    490 ; CHECK-DAG:  ld.param.u32     [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
    491 ; CHECK-DAG:  ld.param.v2.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
    492 ; CHECK:      .param .align 16 .b8 param0[16];
    493 ; CHECK:      st.param.v2.b32  [param0+0], {[[E0]], [[E1]]};
    494 ; CHECK:      st.param.b32     [param0+8], [[E2]];
    495 ; CHECK:      .param .align 16 .b8 retval0[16];
    496 ; CHECK:      call.uni (retval0),
    497 ; CHECK-NEXT: test_v3i32,
    498 ; CHECK:      ld.param.v2.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
    499 ; CHECK:      ld.param.b32     [[RE2:%r[0-9]+]], [retval0+8];
    500 ; CHECK-DAG:  st.param.v2.b32  [func_retval0+0], {[[RE0]], [[RE1]]};
    501 ; CHECK-DAG:  st.param.b32     [func_retval0+8], [[RE2]];
    502 ; CHECK-NEXT: ret;
    503 define <3 x i32> @test_v3i32(<3 x i32> %a) {
    504        %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a);
    505        ret <3 x i32> %r;
    506 }
    507 
    508 ; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
    509 ; CHECK-LABEL: test_v4i32(
    510 ; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
    511 ; CHECK:      ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
    512 ; CHECK:      .param .align 16 .b8 param0[16];
    513 ; CHECK:      st.param.v4.b32  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
    514 ; CHECK:      .param .align 16 .b8 retval0[16];
    515 ; CHECK:      call.uni (retval0),
    516 ; CHECK-NEXT: test_v4i32,
    517 ; CHECK:      ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
    518 ; CHECK:      st.param.v4.b32  [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
    519 ; CHCK-NEXT: ret;
    520 define <4 x i32> @test_v4i32(<4 x i32> %a) {
    521        %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a);
    522        ret <4 x i32> %r;
    523 }
    524 
    525 ; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
    526 ; CHECK-LABEL: test_v5i32(
    527 ; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
    528 ; CHECK-DAG:  ld.param.u32     [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
    529 ; CHECK-DAG   ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
    530 ; CHECK:      .param .align 32 .b8 param0[32];
    531 ; CHECK-DAG:  st.param.v4.b32  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
    532 ; CHECK-DAG:  st.param.b32     [param0+16], [[E4]];
    533 ; CHECK:      .param .align 32 .b8 retval0[32];
    534 ; CHECK:      call.uni (retval0),
    535 ; CHECK-NEXT: test_v5i32,
    536 ; CHECK-DAG:  ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
    537 ; CHECK-DAG:  ld.param.b32     [[RE4:%r[0-9]+]], [retval0+16];
    538 ; CHECK-DAG:  st.param.v4.b32  [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
    539 ; CHECK-DAG:  st.param.b32     [func_retval0+16], [[RE4]];
    540 ; CHECK-NEXT: ret;
    541 define <5 x i32> @test_v5i32(<5 x i32> %a) {
    542        %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a);
    543        ret <5 x i32> %r;
    544 }
    545 
    546 ; CHECK: .func  (.param .b32 func_retval0)
    547 ; CHECK-LABEL: test_f32(
    548 ; CHECK-NEXT: .param .b32 test_f32_param_0
    549 ; CHECK:      ld.param.f32    [[E:%f[0-9]+]], [test_f32_param_0];
    550 ; CHECK:      .param .b32 param0;
    551 ; CHECK:      st.param.f32    [param0+0], [[E]];
    552 ; CHECK:      .param .b32 retval0;
    553 ; CHECK:      call.uni (retval0),
    554 ; CHECK-NEXT: test_f32,
    555 ; CHECK:      ld.param.f32    [[R:%f[0-9]+]], [retval0+0];
    556 ; CHECK:      st.param.f32    [func_retval0+0], [[R]];
    557 ; CHECK-NEXT: ret;
    558 define float @test_f32(float %a) {
    559        %r = tail call float @test_f32(float %a);
    560        ret float %r;
    561 }
    562 
    563 ; CHECK: .func  (.param .b64 func_retval0)
    564 ; CHECK-LABEL: test_i64(
    565 ; CHECK-NEXT: .param .b64 test_i64_param_0
    566 ; CHECK:      ld.param.u64    [[E:%rd[0-9]+]], [test_i64_param_0];
    567 ; CHECK:      .param .b64 param0;
    568 ; CHECK:      st.param.b64    [param0+0], [[E]];
    569 ; CHECK:      .param .b64 retval0;
    570 ; CHECK:      call.uni (retval0),
    571 ; CHECK-NEXT: test_i64,
    572 ; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0+0];
    573 ; CHECK:      st.param.b64    [func_retval0+0], [[R]];
    574 ; CHECK-NEXT: ret;
    575 define i64 @test_i64(i64 %a) {
    576        %r = tail call i64 @test_i64(i64 %a);
    577        ret i64 %r;
    578 }
    579 
    580 ; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
    581 ; CHECK-LABEL: test_v3i64(
    582 ; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32]
    583 ; CHECK-DAG:  ld.param.u64     [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
    584 ; CHECK-DAG:  ld.param.v2.u64  {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
    585 ; CHECK:      .param .align 32 .b8 param0[32];
    586 ; CHECK:      st.param.v2.b64  [param0+0], {[[E0]], [[E1]]};
    587 ; CHECK:      st.param.b64     [param0+16], [[E2]];
    588 ; CHECK:      .param .align 32 .b8 retval0[32];
    589 ; CHECK:      call.uni (retval0),
    590 ; CHECK-NEXT: test_v3i64,
    591 ; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
    592 ; CHECK:      ld.param.b64     [[RE2:%rd[0-9]+]], [retval0+16];
    593 ; CHECK-DAG:  st.param.v2.b64  [func_retval0+0], {[[RE0]], [[RE1]]};
    594 ; CHECK-DAG:  st.param.b64     [func_retval0+16], [[RE2]];
    595 ; CHECK-DAG:  st.param.v2.b64  [func_retval0+0], {[[RE0]], [[RE1]]};
    596 ; CHECK-DAG:  st.param.b64     [func_retval0+16], [[RE2]];
    597 ; CHECK-NEXT: ret;
    598 define <3 x i64> @test_v3i64(<3 x i64> %a) {
    599        %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a);
    600        ret <3 x i64> %r;
    601 }
    602 
    603 ; For i64 vector loads are limited by PTX to 2 elements.
    604 ; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
    605 ; CHECK-LABEL: test_v4i64(
    606 ; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32]
    607 ; CHECK-DAG:  ld.param.v2.u64  {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
    608 ; CHECK-DAG:  ld.param.v2.u64  {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
    609 ; CHECK:      .param .align 32 .b8 param0[32];
    610 ; CHECK:      st.param.v2.b64  [param0+0], {[[E0]], [[E1]]};
    611 ; CHECK:      st.param.v2.b64  [param0+16], {[[E2]], [[E3]]};
    612 ; CHECK:      .param .align 32 .b8 retval0[32];
    613 ; CHECK:      call.uni (retval0),
    614 ; CHECK-NEXT: test_v4i64,
    615 ; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
    616 ; CHECK:      ld.param.v2.b64  {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
    617 ; CHECK-DAG:  st.param.v2.b64  [func_retval0+16], {[[RE2]], [[RE3]]};
    618 ; CHECK-DAG:  st.param.v2.b64  [func_retval0+0], {[[RE0]], [[RE1]]};
    619 ; CHECK-NEXT: ret;
    620 define <4 x i64> @test_v4i64(<4 x i64> %a) {
    621        %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a);
    622        ret <4 x i64> %r;
    623 }
    624 
    625 ; Aggregates, on the other hand, do not get extended.
    626 
    627 ; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
    628 ; CHECK-LABEL: test_s_i1(
    629 ; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
    630 ; CHECK:      ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
    631 ; CHECK:      .param .align 1 .b8 param0[1];
    632 ; CHECK:      st.param.b8    [param0+0], [[A]]
    633 ; CHECK:      .param .align 1 .b8 retval0[1];
    634 ; CHECK:      call.uni
    635 ; CHECK-NEXT: test_s_i1,
    636 ; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0+0];
    637 ; CHECK:      st.param.b8    [func_retval0+0], [[R]];
    638 ; CHECK-NEXT: ret;
    639 define %s_i1 @test_s_i1(%s_i1 %a) {
    640        %r = tail call %s_i1 @test_s_i1(%s_i1 %a);
    641        ret %s_i1 %r;
    642 }
    643 
    644 ; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
    645 ; CHECK-LABEL: test_s_i8(
    646 ; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
    647 ; CHECK:      ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
    648 ; CHECK:      .param .align 1 .b8 param0[1];
    649 ; CHECK:      st.param.b8    [param0+0], [[A]]
    650 ; CHECK:      .param .align 1 .b8 retval0[1];
    651 ; CHECK:      call.uni
    652 ; CHECK-NEXT: test_s_i8,
    653 ; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0+0];
    654 ; CHECK:      st.param.b8    [func_retval0+0], [[R]];
    655 ; CHECK-NEXT: ret;
    656 define %s_i8 @test_s_i8(%s_i8 %a) {
    657        %r = tail call %s_i8 @test_s_i8(%s_i8 %a);
    658        ret %s_i8 %r;
    659 }
    660 
    661 ; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
    662 ; CHECK-LABEL: test_s_i16(
    663 ; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
    664 ; CHECK:      ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
    665 ; CHECK:      .param .align 2 .b8 param0[2];
    666 ; CHECK:      st.param.b16    [param0+0], [[A]]
    667 ; CHECK:      .param .align 2 .b8 retval0[2];
    668 ; CHECK:      call.uni
    669 ; CHECK-NEXT: test_s_i16,
    670 ; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0+0];
    671 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    672 ; CHECK-NEXT: ret;
    673 define %s_i16 @test_s_i16(%s_i16 %a) {
    674        %r = tail call %s_i16 @test_s_i16(%s_i16 %a);
    675        ret %s_i16 %r;
    676 }
    677 
    678 ; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
    679 ; CHECK-LABEL: test_s_f16(
    680 ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
    681 ; CHECK:      ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0];
    682 ; CHECK:      .param .align 2 .b8 param0[2];
    683 ; CHECK:      st.param.b16    [param0+0], [[A]]
    684 ; CHECK:      .param .align 2 .b8 retval0[2];
    685 ; CHECK:      call.uni
    686 ; CHECK-NEXT: test_s_f16,
    687 ; CHECK:      ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
    688 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    689 ; CHECK-NEXT: ret;
    690 define %s_f16 @test_s_f16(%s_f16 %a) {
    691        %r = tail call %s_f16 @test_s_f16(%s_f16 %a);
    692        ret %s_f16 %r;
    693 }
    694 
    695 ; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
    696 ; CHECK-LABEL: test_s_i32(
    697 ; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
    698 ; CHECK:      ld.param.u32    [[E:%r[0-9]+]], [test_s_i32_param_0];
    699 ; CHECK:      .param .align 4 .b8 param0[4]
    700 ; CHECK:      st.param.b32    [param0+0], [[E]];
    701 ; CHECK:      .param .align 4 .b8 retval0[4];
    702 ; CHECK:      call.uni (retval0),
    703 ; CHECK-NEXT: test_s_i32,
    704 ; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0+0];
    705 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    706 ; CHECK-NEXT: ret;
    707 define %s_i32 @test_s_i32(%s_i32 %a) {
    708        %r = tail call %s_i32 @test_s_i32(%s_i32 %a);
    709        ret %s_i32 %r;
    710 }
    711 
    712 ; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
    713 ; CHECK-LABEL: test_s_f32(
    714 ; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
    715 ; CHECK:      ld.param.f32    [[E:%f[0-9]+]], [test_s_f32_param_0];
    716 ; CHECK:      .param .align 4 .b8 param0[4]
    717 ; CHECK:      st.param.f32    [param0+0], [[E]];
    718 ; CHECK:      .param .align 4 .b8 retval0[4];
    719 ; CHECK:      call.uni (retval0),
    720 ; CHECK-NEXT: test_s_f32,
    721 ; CHECK:      ld.param.f32    [[R:%f[0-9]+]], [retval0+0];
    722 ; CHECK:      st.param.f32    [func_retval0+0], [[R]];
    723 ; CHECK-NEXT: ret;
    724 define %s_f32 @test_s_f32(%s_f32 %a) {
    725        %r = tail call %s_f32 @test_s_f32(%s_f32 %a);
    726        ret %s_f32 %r;
    727 }
    728 
    729 ; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
    730 ; CHECK-LABEL: test_s_i64(
    731 ; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
    732 ; CHECK:      ld.param.u64    [[E:%rd[0-9]+]], [test_s_i64_param_0];
    733 ; CHECK:      .param .align 8 .b8 param0[8];
    734 ; CHECK:      st.param.b64    [param0+0], [[E]];
    735 ; CHECK:      .param .align 8 .b8 retval0[8];
    736 ; CHECK:      call.uni (retval0),
    737 ; CHECK-NEXT: test_s_i64,
    738 ; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0+0];
    739 ; CHECK:      st.param.b64    [func_retval0+0], [[R]];
    740 ; CHECK-NEXT: ret;
    741 define %s_i64 @test_s_i64(%s_i64 %a) {
    742        %r = tail call %s_i64 @test_s_i64(%s_i64 %a);
    743        ret %s_i64 %r;
    744 }
    745 
    746 ; Fields that have different types, but identical sizes are not vectorized.
    747 ; CHECK: .func  (.param .align 8 .b8 func_retval0[24])
    748 ; CHECK-LABEL: test_s_i32f32(
    749 ; CHECK:        .param .align 8 .b8 test_s_i32f32_param_0[24]
    750 ; CHECK-DAG:    ld.param.u64    [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16];
    751 ; CHECK-DAG:    ld.param.f32    [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12];
    752 ; CHECK-DAG:    ld.param.u32    [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8];
    753 ; CHECK-DAG:    ld.param.f32    [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4];
    754 ; CHECK-DAG:    ld.param.u32    [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
    755 ; CHECK:        .param .align 8 .b8 param0[24];
    756 ; CHECK-DAG:    st.param.b32    [param0+0], [[E0]];
    757 ; CHECK-DAG:    st.param.f32    [param0+4], [[E1]];
    758 ; CHECK-DAG:    st.param.b32    [param0+8], [[E2]];
    759 ; CHECK-DAG:    st.param.f32    [param0+12], [[E3]];
    760 ; CHECK-DAG:    st.param.b64    [param0+16], [[E4]];
    761 ; CHECK:        .param .align 8 .b8 retval0[24];
    762 ; CHECK:        call.uni (retval0),
    763 ; CHECK-NEXT:   test_s_i32f32,
    764 ; CHECK-DAG:    ld.param.b32    [[RE0:%r[0-9]+]], [retval0+0];
    765 ; CHECK-DAG:    ld.param.f32    [[RE1:%f[0-9]+]], [retval0+4];
    766 ; CHECK-DAG:    ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
    767 ; CHECK-DAG:    ld.param.f32    [[RE3:%f[0-9]+]], [retval0+12];
    768 ; CHECK-DAG:    ld.param.b64    [[RE4:%rd[0-9]+]], [retval0+16];
    769 ; CHECK-DAG:    st.param.b32    [func_retval0+0], [[RE0]];
    770 ; CHECK-DAG:    st.param.f32    [func_retval0+4], [[RE1]];
    771 ; CHECK-DAG:    st.param.b32    [func_retval0+8], [[RE2]];
    772 ; CHECK-DAG:    st.param.f32    [func_retval0+12], [[RE3]];
    773 ; CHECK-DAG:    st.param.b64    [func_retval0+16], [[RE4]];
    774 ; CHECK:        ret;
    775 define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
    776        %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a);
    777        ret %s_i32f32 %r;
    778 }
    779 
    780 ; We do vectorize consecutive fields with matching types.
    781 ; CHECK:.visible .func  (.param .align 8 .b8 func_retval0[24])
    782 ; CHECK-LABEL: test_s_i32x4(
    783 ; CHECK:        .param .align 8 .b8 test_s_i32x4_param_0[24]
    784 ; CHECK-DAG:    ld.param.u64    [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16];
    785 ; CHECK-DAG:    ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
    786 ; CHECK-DAG:    ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
    787 ; CHECK:        .param .align 8 .b8 param0[24];
    788 ; CHECK:        st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
    789 ; CHECK:        st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
    790 ; CHECK:        st.param.b64    [param0+16], [[E4]];
    791 ; CHECK:        .param .align 8 .b8 retval0[24];
    792 ; CHECK:        call.uni (retval0),
    793 ; CHECK-NEXT:   test_s_i32x4,
    794 ; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
    795 ; CHECK:        ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
    796 ; CHECK:        ld.param.b64    [[RE4:%rd[0-9]+]], [retval0+16];
    797 ; CHECK-DAG:    st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
    798 ; CHECK-DAG:    st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]};
    799 ; CHECK-DAG:    st.param.b64    [func_retval0+16], [[RE4]];
    800 ; CHECK:        ret;
    801 
    802 define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
    803        %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a);
    804        ret %s_i32x4 %r;
    805 }
    806 
    807 ; CHECK:.visible .func  (.param .align 8 .b8 func_retval0[32])
    808 ; CHECK-LABEL: test_s_i1i32x4(
    809 ; CHECK:        .param .align 8 .b8 test_s_i1i32x4_param_0[32]
    810 ; CHECK:        ld.param.u64    [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24];
    811 ; CHECK:        ld.param.u32    [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16];
    812 ; CHECK:        ld.param.u32    [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12];
    813 ; CHECK:        ld.param.u8     [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
    814 ; CHECK:        ld.param.v2.u32         {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
    815 ; CHECK:        .param .align 8 .b8 param0[32];
    816 ; CHECK:        st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
    817 ; CHECK:        st.param.b8     [param0+8], [[E2]];
    818 ; CHECK:        st.param.b32    [param0+12], [[E3]];
    819 ; CHECK:        st.param.b32    [param0+16], [[E4]];
    820 ; CHECK:        st.param.b64    [param0+24], [[E5]];
    821 ; CHECK:        .param .align 8 .b8 retval0[32];
    822 ; CHECK:        call.uni (retval0),
    823 ; CHECK:        test_s_i1i32x4,
    824 ; CHECK:        (
    825 ; CHECK:        param0
    826 ; CHECK:        );
    827 ; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
    828 ; CHECK:        ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+8];
    829 ; CHECK:        ld.param.b32    [[RE3:%r[0-9]+]], [retval0+12];
    830 ; CHECK:        ld.param.b32    [[RE4:%r[0-9]+]], [retval0+16];
    831 ; CHECK:        ld.param.b64    [[RE5:%rd[0-9]+]], [retval0+24];
    832 ; CHECK:        st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
    833 ; CHECK:        st.param.b8     [func_retval0+8], [[RE2]];
    834 ; CHECK:        st.param.b32    [func_retval0+12], [[RE3]];
    835 ; CHECK:        st.param.b32    [func_retval0+16], [[RE4]];
    836 ; CHECK:        st.param.b64    [func_retval0+24], [[RE5]];
    837 ; CHECK:        ret;
    838 
    839 define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
    840        %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a);
    841        ret %s_i8i32x4 %r;
    842 }
    843 
    844 ; -- All loads/stores from parameters aligned by one must be done one
    845 ; -- byte at a time.
    846 ; CHECK:.visible .func  (.param .align 1 .b8 func_retval0[25])
    847 ; CHECK-LABEL: test_s_i1i32x4p(
    848 ; CHECK-DAG:        .param .align 1 .b8 test_s_i1i32x4p_param_0[25]
    849 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+24];
    850 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+23];
    851 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+22];
    852 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+21];
    853 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+20];
    854 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+19];
    855 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+18];
    856 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+17];
    857 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+16];
    858 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+15];
    859 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+14];
    860 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+13];
    861 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+12];
    862 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+11];
    863 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+10];
    864 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+9];
    865 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+8];
    866 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+7];
    867 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+6];
    868 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+5];
    869 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+4];
    870 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+3];
    871 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+2];
    872 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+1];
    873 ; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0];
    874 ; --- TODO
    875 ; --- Unaligned parameter store/ return value load is broken in both nvcc
    876 ; --- and llvm and needs to be fixed.
    877 ; CHECK:        .param .align 1 .b8 param0[25];
    878 ; CHECK-DAG:        st.param.b32    [param0+0],
    879 ; CHECK-DAG:        st.param.b32    [param0+4],
    880 ; CHECK-DAG:        st.param.b8     [param0+8],
    881 ; CHECK-DAG:        st.param.b32    [param0+9],
    882 ; CHECK-DAG:        st.param.b32    [param0+13],
    883 ; CHECK-DAG:        st.param.b64    [param0+17],
    884 ; CHECK:            .param .align 1 .b8 retval0[25];
    885 ; CHECK:            call.uni (retval0),
    886 ; CHECK-NEXT:       test_s_i1i32x4p,
    887 ; CHECK-DAG:        ld.param.b32    %r41, [retval0+0];
    888 ; CHECK-DAG:        ld.param.b32    %r42, [retval0+4];
    889 ; CHECK-DAG:        ld.param.b8     %rs2, [retval0+8];
    890 ; CHECK-DAG:        ld.param.b32    %r43, [retval0+9];
    891 ; CHECK-DAG:        ld.param.b32    %r44, [retval0+13];
    892 ; CHECK-DAG:        ld.param.b64    %rd23, [retval0+17];
    893 ; CHECK-DAG:        st.param.b32    [func_retval0+0],
    894 ; CHECK-DAG:        st.param.b32    [func_retval0+4],
    895 ; CHECK-DAG:        st.param.b8     [func_retval0+8],
    896 ; CHECK-DAG:        st.param.b32    [func_retval0+9],
    897 ; CHECK-DAG:        st.param.b32    [func_retval0+13],
    898 ; CHECK-DAG:        st.param.b64    [func_retval0+17],
    899 
    900 define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
    901        %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a);
    902        ret %s_i8i32x4p %r;
    903 }
    904 
    905 ; Check that we can vectorize loads that span multiple aggregate fields.
    906 ; CHECK:.visible .func  (.param .align 16 .b8 func_retval0[80])
    907 ; CHECK-LABEL: test_s_crossfield(
    908 ; CHECK:        .param .align 16 .b8 test_s_crossfield_param_0[80]
    909 ; CHECK:        ld.param.u32    [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64];
    910 ; CHECK:        ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48];
    911 ; CHECK:        ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32];
    912 ; CHECK:        ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16];
    913 ; CHECK:        ld.param.u32    [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
    914 ; CHECK:        ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
    915 ; CHECK:        .param .align 16 .b8 param0[80];
    916 ; CHECK:        st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
    917 ; CHECK:        st.param.b32    [param0+8], [[E2]];
    918 ; CHECK:        st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
    919 ; CHECK:        st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
    920 ; CHECK:        st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
    921 ; CHECK:        st.param.b32    [param0+64], [[E15]];
    922 ; CHECK:        .param .align 16 .b8 retval0[80];
    923 ; CHECK:        call.uni (retval0),
    924 ; CHECK:        test_s_crossfield,
    925 ; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
    926 ; CHECK:        ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
    927 ; CHECK:        ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
    928 ; CHECK:        ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32];
    929 ; CHECK:        ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48];
    930 ; CHECK:        ld.param.b32    [[RE15:%r[0-9]+]], [retval0+64];
    931 ; CHECK:        st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
    932 ; CHECK:        st.param.b32    [func_retval0+8], [[RE2]];
    933 ; CHECK:        st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]};
    934 ; CHECK:        st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]};
    935 ; CHECK:        st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]};
    936 ; CHECK:        st.param.b32    [func_retval0+64], [[RE15]];
    937 ; CHECK:        ret;
    938 
    939 define %s_crossfield @test_s_crossfield(%s_crossfield %a) {
    940        %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a);
    941        ret %s_crossfield %r;
    942 }
    943