1 ; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s 2 3 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind { 4 ;CHECK: vcvt_f32tos32: 5 ;CHECK: vcvt.s32.f32 6 %tmp1 = load <2 x float>* %A 7 %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> 8 ret <2 x i32> %tmp2 9 } 10 11 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind { 12 ;CHECK: vcvt_f32tou32: 13 ;CHECK: vcvt.u32.f32 14 %tmp1 = load <2 x float>* %A 15 %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> 16 ret <2 x i32> %tmp2 17 } 18 19 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind { 20 ;CHECK: vcvt_s32tof32: 21 ;CHECK: vcvt.f32.s32 22 %tmp1 = load <2 x i32>* %A 23 %tmp2 = sitofp <2 x i32> %tmp1 to <2 x float> 24 ret <2 x float> %tmp2 25 } 26 27 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind { 28 ;CHECK: vcvt_u32tof32: 29 ;CHECK: vcvt.f32.u32 30 %tmp1 = load <2 x i32>* %A 31 %tmp2 = uitofp <2 x i32> %tmp1 to <2 x float> 32 ret <2 x float> %tmp2 33 } 34 35 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind { 36 ;CHECK: vcvtQ_f32tos32: 37 ;CHECK: vcvt.s32.f32 38 %tmp1 = load <4 x float>* %A 39 %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> 40 ret <4 x i32> %tmp2 41 } 42 43 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind { 44 ;CHECK: vcvtQ_f32tou32: 45 ;CHECK: vcvt.u32.f32 46 %tmp1 = load <4 x float>* %A 47 %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> 48 ret <4 x i32> %tmp2 49 } 50 51 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind { 52 ;CHECK: vcvtQ_s32tof32: 53 ;CHECK: vcvt.f32.s32 54 %tmp1 = load <4 x i32>* %A 55 %tmp2 = sitofp <4 x i32> %tmp1 to <4 x float> 56 ret <4 x float> %tmp2 57 } 58 59 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind { 60 ;CHECK: vcvtQ_u32tof32: 61 ;CHECK: vcvt.f32.u32 62 %tmp1 = load <4 x i32>* %A 63 %tmp2 = uitofp <4 x i32> %tmp1 to <4 x float> 64 ret <4 x float> %tmp2 65 } 66 67 define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind { 68 ;CHECK: vcvt_n_f32tos32: 69 ;CHECK: vcvt.s32.f32 70 %tmp1 = load <2 x float>* %A 71 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1) 72 ret <2 x i32> %tmp2 73 } 74 75 define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind { 76 ;CHECK: vcvt_n_f32tou32: 77 ;CHECK: vcvt.u32.f32 78 %tmp1 = load <2 x float>* %A 79 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1) 80 ret <2 x i32> %tmp2 81 } 82 83 define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind { 84 ;CHECK: vcvt_n_s32tof32: 85 ;CHECK: vcvt.f32.s32 86 %tmp1 = load <2 x i32>* %A 87 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) 88 ret <2 x float> %tmp2 89 } 90 91 define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind { 92 ;CHECK: vcvt_n_u32tof32: 93 ;CHECK: vcvt.f32.u32 94 %tmp1 = load <2 x i32>* %A 95 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) 96 ret <2 x float> %tmp2 97 } 98 99 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone 100 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone 101 declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone 102 declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone 103 104 define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind { 105 ;CHECK: vcvtQ_n_f32tos32: 106 ;CHECK: vcvt.s32.f32 107 %tmp1 = load <4 x float>* %A 108 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1) 109 ret <4 x i32> %tmp2 110 } 111 112 define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind { 113 ;CHECK: vcvtQ_n_f32tou32: 114 ;CHECK: vcvt.u32.f32 115 %tmp1 = load <4 x float>* %A 116 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1) 117 ret <4 x i32> %tmp2 118 } 119 120 define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind { 121 ;CHECK: vcvtQ_n_s32tof32: 122 ;CHECK: vcvt.f32.s32 123 %tmp1 = load <4 x i32>* %A 124 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) 125 ret <4 x float> %tmp2 126 } 127 128 define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind { 129 ;CHECK: vcvtQ_n_u32tof32: 130 ;CHECK: vcvt.f32.u32 131 %tmp1 = load <4 x i32>* %A 132 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) 133 ret <4 x float> %tmp2 134 } 135 136 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone 137 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone 138 declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone 139 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone 140 141 define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind { 142 ;CHECK: vcvt_f16tof32: 143 ;CHECK: vcvt.f32.f16 144 %tmp1 = load <4 x i16>* %A 145 %tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1) 146 ret <4 x float> %tmp2 147 } 148 149 define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind { 150 ;CHECK: vcvt_f32tof16: 151 ;CHECK: vcvt.f16.f32 152 %tmp1 = load <4 x float>* %A 153 %tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1) 154 ret <4 x i16> %tmp2 155 } 156 157 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone 158 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone 159 160 ; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8 161 ; instructions as expensive. If lowering is improved the cost model needs to 162 ; change. 163 ; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST 164 %T0_5 = type <8 x i8> 165 %T1_5 = type <8 x i32> 166 ; CHECK: func_cvt5: 167 define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) { 168 ; CHECK: strh 169 ; CHECK: strh 170 ; CHECK: strh 171 ; CHECK: strh 172 ; CHECK: strh 173 ; CHECK: strh 174 ; CHECK: strh 175 ; CHECK: strh 176 %v0 = load %T0_5* %loadaddr 177 ; COST: func_cvt5 178 ; COST: cost of 24 {{.*}} sext 179 %r = sext %T0_5 %v0 to %T1_5 180 store %T1_5 %r, %T1_5* %storeaddr 181 ret void 182 } 183 ;; We currently estimate the cost of this instruction as expensive. If lowering 184 ;; is improved the cost needs to change. 185 %TA0_5 = type <8 x i8> 186 %TA1_5 = type <8 x i32> 187 ; CHECK: func_cvt1: 188 define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) { 189 ; CHECK: strh 190 ; CHECK: strh 191 ; CHECK: strh 192 ; CHECK: strh 193 ; CHECK: strh 194 ; CHECK: strh 195 ; CHECK: strh 196 ; CHECK: strh 197 %v0 = load %TA0_5* %loadaddr 198 ; COST: func_cvt1 199 ; COST: cost of 22 {{.*}} zext 200 %r = zext %TA0_5 %v0 to %TA1_5 201 store %TA1_5 %r, %TA1_5* %storeaddr 202 ret void 203 } 204 ;; We currently estimate the cost of this instruction as expensive. If lowering 205 ;; is improved the cost needs to change. 206 %T0_51 = type <8 x i32> 207 %T1_51 = type <8 x i8> 208 ; CHECK: func_cvt51: 209 define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) { 210 ; CHECK: strb 211 ; CHECK: strb 212 ; CHECK: strb 213 ; CHECK: strb 214 ; CHECK: strb 215 ; CHECK: strb 216 ; CHECK: strb 217 ; CHECK: strb 218 %v0 = load %T0_51* %loadaddr 219 ; COST: func_cvt51 220 ; COST: cost of 19 {{.*}} trunc 221 %r = trunc %T0_51 %v0 to %T1_51 222 store %T1_51 %r, %T1_51* %storeaddr 223 ret void 224 } 225 ;; We currently estimate the cost of this instruction as expensive. If lowering 226 ;; is improved the cost needs to change. 227 %TT0_5 = type <16 x i8> 228 %TT1_5 = type <16 x i32> 229 ; CHECK: func_cvt52: 230 define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) { 231 ; CHECK: strh 232 ; CHECK: strh 233 ; CHECK: strh 234 ; CHECK: strh 235 ; CHECK: strh 236 ; CHECK: strh 237 ; CHECK: strh 238 ; CHECK: strh 239 ; CHECK: strh 240 ; CHECK: strh 241 ; CHECK: strh 242 ; CHECK: strh 243 ; CHECK: strh 244 ; CHECK: strh 245 ; CHECK: strh 246 ; CHECK: strh 247 %v0 = load %TT0_5* %loadaddr 248 ; COST: func_cvt52 249 ; COST: cost of 48 {{.*}} sext 250 %r = sext %TT0_5 %v0 to %TT1_5 251 store %TT1_5 %r, %TT1_5* %storeaddr 252 ret void 253 } 254 ;; We currently estimate the cost of this instruction as expensive. If lowering 255 ;; is improved the cost needs to change. 256 %TTA0_5 = type <16 x i8> 257 %TTA1_5 = type <16 x i32> 258 ; CHECK: func_cvt12: 259 define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) { 260 ; CHECK: strh 261 ; CHECK: strh 262 ; CHECK: strh 263 ; CHECK: strh 264 ; CHECK: strh 265 ; CHECK: strh 266 ; CHECK: strh 267 ; CHECK: strh 268 ; CHECK: strh 269 ; CHECK: strh 270 ; CHECK: strh 271 ; CHECK: strh 272 ; CHECK: strh 273 ; CHECK: strh 274 ; CHECK: strh 275 ; CHECK: strh 276 %v0 = load %TTA0_5* %loadaddr 277 ; COST: func_cvt12 278 ; COST: cost of 44 {{.*}} zext 279 %r = zext %TTA0_5 %v0 to %TTA1_5 280 store %TTA1_5 %r, %TTA1_5* %storeaddr 281 ret void 282 } 283 ;; We currently estimate the cost of this instruction as expensive. If lowering 284 ;; is improved the cost needs to change. 285 %TT0_51 = type <16 x i32> 286 %TT1_51 = type <16 x i8> 287 ; CHECK: func_cvt512: 288 define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) { 289 ; CHECK: strb 290 ; CHECK: strb 291 ; CHECK: strb 292 ; CHECK: strb 293 ; CHECK: strb 294 ; CHECK: strb 295 ; CHECK: strb 296 ; CHECK: strb 297 ; CHECK: strb 298 ; CHECK: strb 299 ; CHECK: strb 300 ; CHECK: strb 301 ; CHECK: strb 302 ; CHECK: strb 303 ; CHECK: strb 304 ; CHECK: strb 305 %v0 = load %TT0_51* %loadaddr 306 ; COST: func_cvt512 307 ; COST: cost of 38 {{.*}} trunc 308 %r = trunc %TT0_51 %v0 to %TT1_51 309 store %TT1_51 %r, %TT1_51* %storeaddr 310 ret void 311 } 312