Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
      2 
      3 
      4 define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
      5 ;CHECK: mul {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
      6 	%tmp3 = mul <8 x i8> %A, %B;
      7 	ret <8 x i8> %tmp3
      8 }
      9 
     10 define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
     11 ;CHECK: mul {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
     12 	%tmp3 = mul <16 x i8> %A, %B;
     13 	ret <16 x i8> %tmp3
     14 }
     15 
     16 define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
     17 ;CHECK: mul {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
     18 	%tmp3 = mul <4 x i16> %A, %B;
     19 	ret <4 x i16> %tmp3
     20 }
     21 
     22 define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
     23 ;CHECK: mul {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
     24 	%tmp3 = mul <8 x i16> %A, %B;
     25 	ret <8 x i16> %tmp3
     26 }
     27 
     28 define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
     29 ;CHECK: mul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
     30 	%tmp3 = mul <2 x i32> %A, %B;
     31 	ret <2 x i32> %tmp3
     32 }
     33 
     34 define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
     35 ;CHECK: mul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
     36 	%tmp3 = mul <4 x i32> %A, %B;
     37 	ret <4 x i32> %tmp3
     38 }
     39 
     40  define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
     41 ;CHECK: fmul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
     42 	%tmp3 = fmul <2 x float> %A, %B;
     43 	ret <2 x float> %tmp3
     44 }
     45 
     46 define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
     47 ;CHECK: fmul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
     48 	%tmp3 = fmul <4 x float> %A, %B;
     49 	ret <4 x float> %tmp3
     50 }
     51 define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
     52 ;CHECK: fmul {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
     53 	%tmp3 = fmul <2 x double> %A, %B;
     54 	ret <2 x double> %tmp3
     55 }
     56 
     57 
     58  define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
     59 ;CHECK: fdiv {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
     60 	%tmp3 = fdiv <2 x float> %A, %B;
     61 	ret <2 x float> %tmp3
     62 }
     63 
     64 define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
     65 ;CHECK: fdiv {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
     66 	%tmp3 = fdiv <4 x float> %A, %B;
     67 	ret <4 x float> %tmp3
     68 }
     69 define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
     70 ;CHECK: fdiv {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
     71 	%tmp3 = fdiv <2 x double> %A, %B;
     72 	ret <2 x double> %tmp3
     73 }
     74 
     75 declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>)
     76 declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>)
     77 
     78 define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
     79 ; CHECK: poly_mulv8i8:
     80    %prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
     81 ; CHECK: pmul v0.8b, v0.8b, v1.8b
     82    ret <8 x i8> %prod
     83 }
     84 
     85 define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
     86 ; CHECK: poly_mulv16i8:
     87    %prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
     88 ; CHECK: pmul v0.16b, v0.16b, v1.16b
     89    ret <16 x i8> %prod
     90 }
     91 
     92 declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
     93 declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
     94 declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
     95 declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
     96 
     97 define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
     98 ; CHECK: test_sqdmulh_v4i16:
     99    %prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
    100 ; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
    101    ret <4 x i16> %prod
    102 }
    103 
    104 define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
    105 ; CHECK: test_sqdmulh_v8i16:
    106    %prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
    107 ; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
    108    ret <8 x i16> %prod
    109 }
    110 
    111 define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
    112 ; CHECK: test_sqdmulh_v2i32:
    113    %prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
    114 ; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
    115    ret <2 x i32> %prod
    116 }
    117 
    118 define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    119 ; CHECK: test_sqdmulh_v4i32:
    120    %prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    121 ; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
    122    ret <4 x i32> %prod
    123 }
    124 
    125 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
    126 declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
    127 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
    128 declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
    129 
    130 define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
    131 ; CHECK: test_sqrdmulh_v4i16:
    132    %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
    133 ; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
    134    ret <4 x i16> %prod
    135 }
    136 
    137 define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
    138 ; CHECK: test_sqrdmulh_v8i16:
    139    %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
    140 ; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
    141    ret <8 x i16> %prod
    142 }
    143 
    144 define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
    145 ; CHECK: test_sqrdmulh_v2i32:
    146    %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
    147 ; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
    148    ret <2 x i32> %prod
    149 }
    150 
    151 define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
    152 ; CHECK: test_sqrdmulh_v4i32:
    153    %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
    154 ; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
    155    ret <4 x i32> %prod
    156 }
    157 
    158 declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
    159 declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
    160 declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
    161 
    162 define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
    163 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
    164 ; CHECK: fmulx v0.2s, v0.2s, v1.2s
    165         %val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
    166         ret <2 x float> %val
    167 }
    168 
    169 define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
    170 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
    171 ; CHECK: fmulx v0.4s, v0.4s, v1.4s
    172         %val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
    173         ret <4 x float> %val
    174 }
    175 
    176 define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
    177 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
    178 ; CHECK: fmulx v0.2d, v0.2d, v1.2d
    179         %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
    180         ret <2 x double> %val
    181 }
    182