Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
      2 
      3 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
      4 
      5 ;============ v1f32
      6 
      7 ; WidenVecRes same
      8 define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
      9 ; CHECK-LABEL: test_copysign_v1f32_v1f32:
     10 ; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
     11 ; CHECK-NEXT:    bit.8b v0, v1, v2
     12 ; CHECK-NEXT:    ret
     13   %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
     14   ret <1 x float> %r
     15 }
     16 
     17 ; WidenVecRes mismatched
     18 define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
     19 ; CHECK-LABEL: test_copysign_v1f32_v1f64:
     20 ; CHECK-NEXT:    fcvt s1, d1
     21 ; CHECK-NEXT:    movi.4s v2, #0x80, lsl #24
     22 ; CHECK-NEXT:    bit.16b v0, v1, v2
     23 ; CHECK-NEXT:    ret
     24   %tmp0 = fptrunc <1 x double> %b to <1 x float>
     25   %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
     26   ret <1 x float> %r
     27 }
     28 
     29 declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
     30 
     31 ;============ v1f64
     32 
     33 ; WidenVecOp #1
     34 define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
     35 ; CHECK-LABEL: test_copysign_v1f64_v1f32:
     36 ; CHECK-NEXT:    fcvt d1, s1
     37 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
     38 ; CHECK-NEXT:    fneg.2d v2, v2
     39 ; CHECK-NEXT:    bit.16b v0, v1, v2
     40 ; CHECK-NEXT:    ret
     41   %tmp0 = fpext <1 x float> %b to <1 x double>
     42   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
     43   ret <1 x double> %r
     44 }
     45 
     46 define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
     47 ; CHECK-LABEL: test_copysign_v1f64_v1f64:
     48 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
     49 ; CHECK-NEXT:    fneg.2d v2, v2
     50 ; CHECK-NEXT:    bit.16b v0, v1, v2
     51 ; CHECK-NEXT:    ret
     52   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
     53   ret <1 x double> %r
     54 }
     55 
     56 declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
     57 
     58 ;============ v2f32
     59 
     60 define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
     61 ; CHECK-LABEL: test_copysign_v2f32_v2f32:
     62 ; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
     63 ; CHECK-NEXT:    bit.8b v0, v1, v2
     64 ; CHECK-NEXT:    ret
     65   %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
     66   ret <2 x float> %r
     67 }
     68 
     69 define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
     70 ; CHECK-LABEL: test_copysign_v2f32_v2f64:
     71 ; CHECK-NEXT:    fcvtn v1.2s, v1.2d
     72 ; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
     73 ; CHECK-NEXT:    bit.8b v0, v1, v2
     74 ; CHECK-NEXT:    ret
     75   %tmp0 = fptrunc <2 x double> %b to <2 x float>
     76   %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
     77   ret <2 x float> %r
     78 }
     79 
     80 declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
     81 
     82 ;============ v4f32
     83 
     84 define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
     85 ; CHECK-LABEL: test_copysign_v4f32_v4f32:
     86 ; CHECK-NEXT:    movi.4s v2, #0x80, lsl #24
     87 ; CHECK-NEXT:    bit.16b v0, v1, v2
     88 ; CHECK-NEXT:    ret
     89   %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
     90   ret <4 x float> %r
     91 }
     92 
     93 ; SplitVecOp #1
     94 define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
     95 ; CHECK-LABEL: test_copysign_v4f32_v4f64:
     96 ; CHECK-NEXT:    mov s3, v0[1]
     97 ; CHECK-NEXT:    mov d4, v1[1]
     98 ; CHECK-NEXT:    movi.4s v5, #0x80, lsl #24
     99 ; CHECK-NEXT:    fcvt s1, d1
    100 ; CHECK-NEXT:    mov s6, v0[2]
    101 ; CHECK-NEXT:    mov s7, v0[3]
    102 ; CHECK-NEXT:    fcvt s16, d2
    103 ; CHECK-NEXT:    bit.16b v0, v1, v5
    104 ; CHECK-NEXT:    bit.16b v6, v16, v5
    105 ; CHECK-NEXT:    fcvt s1, d4
    106 ; CHECK-NEXT:    bit.16b v3, v1, v5
    107 ; CHECK-NEXT:    mov d1, v2[1]
    108 ; CHECK-NEXT:    fcvt s1, d1
    109 ; CHECK-NEXT:    ins.s v0[1], v3[0]
    110 ; CHECK-NEXT:    ins.s v0[2], v6[0]
    111 ; CHECK-NEXT:    bit.16b v7, v1, v5
    112 ; CHECK-NEXT:    ins.s v0[3], v7[0]
    113 ; CHECK-NEXT:    ret
    114   %tmp0 = fptrunc <4 x double> %b to <4 x float>
    115   %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
    116   ret <4 x float> %r
    117 }
    118 
    119 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
    120 
    121 ;============ v2f64
    122 
    123 define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
    124 ; CHECK-LABEL: test_copysign_v2f64_v232:
    125 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
    126 ; CHECK-NEXT:    fneg.2d v2, v2
    127 ; CHECK-NEXT:    fcvtl v1.2d, v1.2s
    128 ; CHECK-NEXT:    bit.16b v0, v1, v2
    129 ; CHECK-NEXT:    ret
    130   %tmp0 = fpext <2 x float> %b to <2 x double>
    131   %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
    132   ret <2 x double> %r
    133 }
    134 
    135 define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
    136 ; CHECK-LABEL: test_copysign_v2f64_v2f64:
    137 ; CHECK-NEXT:    movi.2d v2, #0000000000000000
    138 ; CHECK-NEXT:    fneg.2d v2, v2
    139 ; CHECK-NEXT:    bit.16b v0, v1, v2
    140 ; CHECK-NEXT:    ret
    141   %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
    142   ret <2 x double> %r
    143 }
    144 
    145 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
    146 
    147 ;============ v4f64
    148 
    149 ; SplitVecRes mismatched
    150 define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
    151 ; CHECK-LABEL: test_copysign_v4f64_v4f32:
    152 ; CHECK-NEXT:    movi.2d v3, #0000000000000000
    153 ; CHECK-NEXT:    fcvtl2 v4.2d, v2.4s
    154 ; CHECK-NEXT:    fcvtl v2.2d, v2.2s
    155 ; CHECK-NEXT:    fneg.2d v3, v3
    156 ; CHECK-NEXT:    bit.16b v1, v4, v3
    157 ; CHECK-NEXT:    bit.16b v0, v2, v3
    158 ; CHECK-NEXT:    ret
    159   %tmp0 = fpext <4 x float> %b to <4 x double>
    160   %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
    161   ret <4 x double> %r
    162 }
    163 
    164 ; SplitVecRes same
    165 define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
    166 ; CHECK-LABEL: test_copysign_v4f64_v4f64:
    167 ; CHECK-NEXT:    movi.2d v4, #0000000000000000
    168 ; CHECK-NEXT:    fneg.2d v4, v4
    169 ; CHECK-NEXT:    bit.16b v0, v2, v4
    170 ; CHECK-NEXT:    bit.16b v1, v3, v4
    171 ; CHECK-NEXT:    ret
    172   %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
    173   ret <4 x double> %r
    174 }
    175 
    176 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
    177 
    178 attributes #0 = { nounwind }
    179