Home | History | Annotate | Download | only in msa
      1 ; RUN: llc -march=mips -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,O32 %s
      2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,O32 %s
      3 ; RUN: llc -march=mips64 -target-abi=n32 -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,N32 %s
      4 ; RUN: llc -march=mips64el -target-abi=n32 -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,N32 %s
      5 ; RUN: llc -march=mips64 -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,N64 %s
      6 ; RUN: llc -march=mips64el -mattr=+msa,+fp64 -relocation-model=pic < %s | FileCheck -check-prefixes=ALL,N64 %s
      7 
      8 @v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
      9 @v2f64 = global <2 x double> <double 0.0, double 0.0>
     10 @i32 = global i32 0
     11 @f32 = global float 0.0
     12 @f64 = global double 0.0
     13 
     14 define void @const_v4f32() nounwind {
     15   ; ALL-LABEL: const_v4f32:
     16 
     17   store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32
     18   ; ALL: ldi.b  [[R1:\$w[0-9]+]], 0
     19 
     20   store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float>*@v4f32
     21   ; ALL: lui     [[R1:\$[0-9]+]], 16256
     22   ; ALL: fill.w  [[R2:\$w[0-9]+]], [[R1]]
     23 
     24   store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32
     25   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     26   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     27   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     28   ; ALL: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     29 
     30   store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32
     31   ; ALL: lui     [[R1:\$[0-9]+]], 18304
     32   ; ALL: ori     [[R2:\$[0-9]+]], [[R1]], 128
     33   ; ALL: fill.w  [[R3:\$w[0-9]+]], [[R2]]
     34 
     35   store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32
     36   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     37   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     38   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     39   ; ALL: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     40 
     41   store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32
     42   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     43   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     44   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     45   ; ALL: ld.w  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     46 
     47   ret void
     48 }
     49 
     50 define void @const_v2f64() nounwind {
     51   ; ALL-LABEL: const_v2f64:
     52 
     53   store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64
     54   ; ALL: ldi.b  [[R1:\$w[0-9]+]], 0
     55 
     56   store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64
     57   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     58   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     59   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     60   ; ALL: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     61 
     62   store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64
     63   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     64   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     65   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     66   ; ALL: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     67 
     68   store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64
     69   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     70   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     71   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     72   ; ALL: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     73 
     74   store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64
     75   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     76   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     77   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     78   ; ALL: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     79 
     80   store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64
     81   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     82   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     83   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     84   ; ALL: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     85 
     86   store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64
     87   ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
     88   ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     89   ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
     90   ; ALL: ld.d  [[R1:\$w[0-9]+]], 0([[G_PTR]])
     91 
     92   ret void
     93 }
     94 
     95 define void @nonconst_v4f32() nounwind {
     96   ; ALL-LABEL: nonconst_v4f32:
     97 
     98   %1 = load float , float *@f32
     99   %2 = insertelement <4 x float> undef, float %1, i32 0
    100   %3 = insertelement <4 x float> %2, float %1, i32 1
    101   %4 = insertelement <4 x float> %3, float %1, i32 2
    102   %5 = insertelement <4 x float> %4, float %1, i32 3
    103   store volatile <4 x float> %5, <4 x float>*@v4f32
    104   ; ALL: lwc1 $f[[R1:[0-9]+]], 0(
    105   ; ALL: splati.w [[R2:\$w[0-9]+]], $w[[R1]]
    106 
    107   ret void
    108 }
    109 
    110 define void @nonconst_v2f64() nounwind {
    111   ; ALL-LABEL: nonconst_v2f64:
    112 
    113   %1 = load double , double *@f64
    114   %2 = insertelement <2 x double> undef, double %1, i32 0
    115   %3 = insertelement <2 x double> %2, double %1, i32 1
    116   store volatile <2 x double> %3, <2 x double>*@v2f64
    117   ; ALL: ldc1 $f[[R1:[0-9]+]], 0(
    118   ; ALL: splati.d [[R2:\$w[0-9]+]], $w[[R1]]
    119 
    120   ret void
    121 }
    122 
    123 define float @extract_v4f32() nounwind {
    124   ; ALL-LABEL: extract_v4f32:
    125 
    126   %1 = load <4 x float>, <4 x float>* @v4f32
    127   ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
    128 
    129   %2 = fadd <4 x float> %1, %1
    130   ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    131 
    132   %3 = extractelement <4 x float> %2, i32 1
    133   ; Element 1 can be obtained by splatting it across the vector and extracting
    134   ; $w0:sub_lo
    135   ; ALL-DAG: splati.w $w0, [[R1]][1]
    136 
    137   ret float %3
    138 }
    139 
    140 define float @extract_v4f32_elt0() nounwind {
    141   ; ALL-LABEL: extract_v4f32_elt0:
    142 
    143   %1 = load <4 x float>, <4 x float>* @v4f32
    144   ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
    145 
    146   %2 = fadd <4 x float> %1, %1
    147   ; ALL-DAG: fadd.w $w0, [[R1]], [[R1]]
    148 
    149   %3 = extractelement <4 x float> %2, i32 0
    150   ; Element 0 can be obtained by extracting $w0:sub_lo ($f0)
    151   ; ALL-NOT: copy_u.w
    152   ; ALL-NOT: mtc1
    153 
    154   ret float %3
    155 }
    156 
    157 define float @extract_v4f32_elt2() nounwind {
    158   ; ALL-LABEL: extract_v4f32_elt2:
    159 
    160   %1 = load <4 x float>, <4 x float>* @v4f32
    161   ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
    162 
    163   %2 = fadd <4 x float> %1, %1
    164   ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    165 
    166   %3 = extractelement <4 x float> %2, i32 2
    167   ; Element 2 can be obtained by splatting it across the vector and extracting
    168   ; $w0:sub_lo
    169   ; ALL-DAG: splati.w $w0, [[R1]][2]
    170 
    171   ret float %3
    172 }
    173 
    174 define float @extract_v4f32_vidx() nounwind {
    175   ; ALL-LABEL: extract_v4f32_vidx:
    176 
    177   %1 = load <4 x float>, <4 x float>* @v4f32
    178   ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
    179   ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
    180   ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
    181   ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
    182 
    183   %2 = fadd <4 x float> %1, %1
    184   ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    185 
    186   %3 = load i32, i32* @i32
    187   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
    188   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    189   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    190   ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
    191 
    192   %4 = extractelement <4 x float> %2, i32 %3
    193   ; ALL-DAG: splat.w $w0, [[R1]]{{\[}}[[IDX]]]
    194 
    195   ret float %4
    196 }
    197 
    198 define double @extract_v2f64() nounwind {
    199   ; ALL-LABEL: extract_v2f64:
    200 
    201   %1 = load <2 x double>, <2 x double>* @v2f64
    202   ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
    203 
    204   %2 = fadd <2 x double> %1, %1
    205   ; ALL-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    206 
    207   %3 = extractelement <2 x double> %2, i32 1
    208   ; Element 1 can be obtained by splatting it across the vector and extracting
    209   ; $w0:sub_64
    210   ; ALL-DAG: splati.d $w0, [[R1]][1]
    211   ; ALL-NOT: copy_u.w
    212   ; ALL-NOT: mtc1
    213   ; ALL-NOT: mthc1
    214   ; ALL-NOT: sll
    215   ; ALL-NOT: sra
    216 
    217   ret double %3
    218 }
    219 
    220 define double @extract_v2f64_elt0() nounwind {
    221   ; ALL-LABEL: extract_v2f64_elt0:
    222 
    223   %1 = load <2 x double>, <2 x double>* @v2f64
    224   ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
    225 
    226   %2 = fadd <2 x double> %1, %1
    227   ; ALL-DAG: fadd.d $w0, [[R1]], [[R1]]
    228 
    229   %3 = extractelement <2 x double> %2, i32 0
    230   ; Element 0 can be obtained by extracting $w0:sub_64 ($f0)
    231   ; ALL-NOT: copy_u.w
    232   ; ALL-NOT: mtc1
    233   ; ALL-NOT: mthc1
    234   ; ALL-NOT: sll
    235   ; ALL-NOT: sra
    236 
    237   ret double %3
    238 }
    239 
    240 define double @extract_v2f64_vidx() nounwind {
    241   ; ALL-LABEL: extract_v2f64_vidx:
    242 
    243   %1 = load <2 x double>, <2 x double>* @v2f64
    244   ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
    245   ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
    246   ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
    247   ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
    248 
    249   %2 = fadd <2 x double> %1, %1
    250   ; ALL-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
    251 
    252   %3 = load i32, i32* @i32
    253   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
    254   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    255   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    256   ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
    257 
    258   %4 = extractelement <2 x double> %2, i32 %3
    259   ; ALL-DAG: splat.d $w0, [[R1]]{{\[}}[[IDX]]]
    260 
    261   ret double %4
    262 }
    263 
    264 define void @insert_v4f32(float %a) nounwind {
    265   ; ALL-LABEL: insert_v4f32:
    266 
    267   %1 = load <4 x float>, <4 x float>* @v4f32
    268   ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
    269 
    270   %2 = insertelement <4 x float> %1, float %a, i32 1
    271   ; float argument passed in $f12
    272   ; ALL-DAG: insve.w [[R1]][1], $w12[0]
    273 
    274   store <4 x float> %2, <4 x float>* @v4f32
    275   ; ALL-DAG: st.w [[R1]]
    276 
    277   ret void
    278 }
    279 
    280 define void @insert_v2f64(double %a) nounwind {
    281   ; ALL-LABEL: insert_v2f64:
    282 
    283   %1 = load <2 x double>, <2 x double>* @v2f64
    284   ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
    285 
    286   %2 = insertelement <2 x double> %1, double %a, i32 1
    287   ; double argument passed in $f12
    288   ; ALL-DAG: insve.d [[R1]][1], $w12[0]
    289 
    290   store <2 x double> %2, <2 x double>* @v2f64
    291   ; ALL-DAG: st.d [[R1]]
    292 
    293   ret void
    294 }
    295 
    296 define void @insert_v4f32_vidx(float %a) nounwind {
    297   ; ALL-LABEL: insert_v4f32_vidx:
    298 
    299   %1 = load <4 x float>, <4 x float>* @v4f32
    300   ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
    301   ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
    302   ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
    303   ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
    304 
    305   %2 = load i32, i32* @i32
    306   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
    307   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    308   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    309   ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
    310 
    311   %3 = insertelement <4 x float> %1, float %a, i32 %2
    312   ; float argument passed in $f12
    313   ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
    314   ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
    315   ; ALL-DAG: insve.w [[R1]][0], $w12[0]
    316   ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
    317   ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
    318 
    319   store <4 x float> %3, <4 x float>* @v4f32
    320   ; ALL-DAG: st.w [[R1]]
    321 
    322   ret void
    323 }
    324 
    325 define void @insert_v2f64_vidx(double %a) nounwind {
    326   ; ALL-LABEL: insert_v2f64_vidx:
    327 
    328   %1 = load <2 x double>, <2 x double>* @v2f64
    329   ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
    330   ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
    331   ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
    332   ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
    333 
    334   %2 = load i32, i32* @i32
    335   ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
    336   ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    337   ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
    338   ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
    339 
    340   %3 = insertelement <2 x double> %1, double %a, i32 %2
    341   ; double argument passed in $f12
    342   ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
    343   ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
    344   ; ALL-DAG: insve.d [[R1]][0], $w12[0]
    345   ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
    346   ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
    347 
    348   store <2 x double> %3, <2 x double>* @v2f64
    349   ; ALL-DAG: st.d [[R1]]
    350 
    351   ret void
    352 }
    353