1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 2 3 define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind { 4 ;CHECK-LABEL: subhn8b: 5 ;CHECK: subhn.8b 6 %tmp1 = load <8 x i16>, <8 x i16>* %A 7 %tmp2 = load <8 x i16>, <8 x i16>* %B 8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) 9 ret <8 x i8> %tmp3 10 } 11 12 define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind { 13 ;CHECK-LABEL: subhn4h: 14 ;CHECK: subhn.4h 15 %tmp1 = load <4 x i32>, <4 x i32>* %A 16 %tmp2 = load <4 x i32>, <4 x i32>* %B 17 %tmp3 = call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) 18 ret <4 x i16> %tmp3 19 } 20 21 define <2 x i32> @subhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind { 22 ;CHECK-LABEL: subhn2s: 23 ;CHECK: subhn.2s 24 %tmp1 = load <2 x i64>, <2 x i64>* %A 25 %tmp2 = load <2 x i64>, <2 x i64>* %B 26 %tmp3 = call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) 27 ret <2 x i32> %tmp3 28 } 29 30 define <16 x i8> @subhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind { 31 ;CHECK-LABEL: subhn2_16b: 32 ;CHECK: subhn.8b 33 ;CHECK-NEXT: subhn2.16b 34 %vsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind 35 %vsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind 36 %res = shufflevector <8 x i8> %vsubhn2.i, <8 x i8> %vsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 37 ret <16 x i8> %res 38 } 39 40 define <8 x i16> @subhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind { 41 ;CHECK-LABEL: subhn2_8h: 42 ;CHECK: subhn.4h 43 ;CHECK-NEXT: subhn2.8h 44 %vsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind 45 %vsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind 46 %res = shufflevector <4 x i16> %vsubhn2.i, <4 x i16> %vsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 47 ret <8 x i16> %res 48 } 49 50 define <4 x i32> @subhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind { 51 ;CHECK-LABEL: subhn2_4s: 52 ;CHECK: subhn.2s 53 ;CHECK-NEXT: subhn2.4s 54 %vsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind 55 %vsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind 56 %res = shufflevector <2 x i32> %vsubhn2.i, <2 x i32> %vsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 57 ret <4 x i32> %res 58 } 59 60 declare <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 61 declare <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 62 declare <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 63 64 define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind { 65 ;CHECK-LABEL: rsubhn8b: 66 ;CHECK: rsubhn.8b 67 %tmp1 = load <8 x i16>, <8 x i16>* %A 68 %tmp2 = load <8 x i16>, <8 x i16>* %B 69 %tmp3 = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) 70 ret <8 x i8> %tmp3 71 } 72 73 define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind { 74 ;CHECK-LABEL: rsubhn4h: 75 ;CHECK: rsubhn.4h 76 %tmp1 = load <4 x i32>, <4 x i32>* %A 77 %tmp2 = load <4 x i32>, <4 x i32>* %B 78 %tmp3 = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) 79 ret <4 x i16> %tmp3 80 } 81 82 define <2 x i32> @rsubhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind { 83 ;CHECK-LABEL: rsubhn2s: 84 ;CHECK: rsubhn.2s 85 %tmp1 = load <2 x i64>, <2 x i64>* %A 86 %tmp2 = load <2 x i64>, <2 x i64>* %B 87 %tmp3 = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) 88 ret <2 x i32> %tmp3 89 } 90 91 define <16 x i8> @rsubhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind { 92 ;CHECK-LABEL: rsubhn2_16b: 93 ;CHECK: rsubhn.8b 94 ;CHECK-NEXT: rsubhn2.16b 95 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind 96 %vrsubhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind 97 %res = shufflevector <8 x i8> %vrsubhn2.i, <8 x i8> %vrsubhn_high2.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 98 ret <16 x i8> %res 99 } 100 101 define <8 x i16> @rsubhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind { 102 ;CHECK-LABEL: rsubhn2_8h: 103 ;CHECK: rsubhn.4h 104 ;CHECK-NEXT: rsubhn2.8h 105 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind 106 %vrsubhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind 107 %res = shufflevector <4 x i16> %vrsubhn2.i, <4 x i16> %vrsubhn_high3.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 108 ret <8 x i16> %res 109 } 110 111 define <4 x i32> @rsubhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind { 112 ;CHECK-LABEL: rsubhn2_4s: 113 ;CHECK: rsubhn.2s 114 ;CHECK-NEXT: rsubhn2.4s 115 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind 116 %vrsubhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind 117 %res = shufflevector <2 x i32> %vrsubhn2.i, <2 x i32> %vrsubhn_high3.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 118 ret <4 x i32> %res 119 } 120 121 declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 122 declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 123 declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 124 125 define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 126 ;CHECK-LABEL: ssubl8h: 127 ;CHECK: ssubl.8h 128 %tmp1 = load <8 x i8>, <8 x i8>* %A 129 %tmp2 = load <8 x i8>, <8 x i8>* %B 130 %tmp3 = sext <8 x i8> %tmp1 to <8 x i16> 131 %tmp4 = sext <8 x i8> %tmp2 to <8 x i16> 132 %tmp5 = sub <8 x i16> %tmp3, %tmp4 133 ret <8 x i16> %tmp5 134 } 135 136 define <4 x i32> @ssubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 137 ;CHECK-LABEL: ssubl4s: 138 ;CHECK: ssubl.4s 139 %tmp1 = load <4 x i16>, <4 x i16>* %A 140 %tmp2 = load <4 x i16>, <4 x i16>* %B 141 %tmp3 = sext <4 x i16> %tmp1 to <4 x i32> 142 %tmp4 = sext <4 x i16> %tmp2 to <4 x i32> 143 %tmp5 = sub <4 x i32> %tmp3, %tmp4 144 ret <4 x i32> %tmp5 145 } 146 147 define <2 x i64> @ssubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 148 ;CHECK-LABEL: ssubl2d: 149 ;CHECK: ssubl.2d 150 %tmp1 = load <2 x i32>, <2 x i32>* %A 151 %tmp2 = load <2 x i32>, <2 x i32>* %B 152 %tmp3 = sext <2 x i32> %tmp1 to <2 x i64> 153 %tmp4 = sext <2 x i32> %tmp2 to <2 x i64> 154 %tmp5 = sub <2 x i64> %tmp3, %tmp4 155 ret <2 x i64> %tmp5 156 } 157 158 define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { 159 ;CHECK-LABEL: ssubl2_8h: 160 ;CHECK: ssubl.8h 161 %tmp1 = load <16 x i8>, <16 x i8>* %A 162 %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 163 %ext1 = sext <8 x i8> %high1 to <8 x i16> 164 165 %tmp2 = load <16 x i8>, <16 x i8>* %B 166 %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 167 %ext2 = sext <8 x i8> %high2 to <8 x i16> 168 169 %res = sub <8 x i16> %ext1, %ext2 170 ret <8 x i16> %res 171 } 172 173 define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 174 ;CHECK-LABEL: ssubl2_4s: 175 ;CHECK: ssubl.4s 176 %tmp1 = load <8 x i16>, <8 x i16>* %A 177 %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 178 %ext1 = sext <4 x i16> %high1 to <4 x i32> 179 180 %tmp2 = load <8 x i16>, <8 x i16>* %B 181 %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 182 %ext2 = sext <4 x i16> %high2 to <4 x i32> 183 184 %res = sub <4 x i32> %ext1, %ext2 185 ret <4 x i32> %res 186 } 187 188 define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 189 ;CHECK-LABEL: ssubl2_2d: 190 ;CHECK: ssubl.2d 191 %tmp1 = load <4 x i32>, <4 x i32>* %A 192 %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 193 %ext1 = sext <2 x i32> %high1 to <2 x i64> 194 195 %tmp2 = load <4 x i32>, <4 x i32>* %B 196 %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 197 %ext2 = sext <2 x i32> %high2 to <2 x i64> 198 199 %res = sub <2 x i64> %ext1, %ext2 200 ret <2 x i64> %res 201 } 202 203 define <8 x i16> @usubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind { 204 ;CHECK-LABEL: usubl8h: 205 ;CHECK: usubl.8h 206 %tmp1 = load <8 x i8>, <8 x i8>* %A 207 %tmp2 = load <8 x i8>, <8 x i8>* %B 208 %tmp3 = zext <8 x i8> %tmp1 to <8 x i16> 209 %tmp4 = zext <8 x i8> %tmp2 to <8 x i16> 210 %tmp5 = sub <8 x i16> %tmp3, %tmp4 211 ret <8 x i16> %tmp5 212 } 213 214 define <4 x i32> @usubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind { 215 ;CHECK-LABEL: usubl4s: 216 ;CHECK: usubl.4s 217 %tmp1 = load <4 x i16>, <4 x i16>* %A 218 %tmp2 = load <4 x i16>, <4 x i16>* %B 219 %tmp3 = zext <4 x i16> %tmp1 to <4 x i32> 220 %tmp4 = zext <4 x i16> %tmp2 to <4 x i32> 221 %tmp5 = sub <4 x i32> %tmp3, %tmp4 222 ret <4 x i32> %tmp5 223 } 224 225 define <2 x i64> @usubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { 226 ;CHECK-LABEL: usubl2d: 227 ;CHECK: usubl.2d 228 %tmp1 = load <2 x i32>, <2 x i32>* %A 229 %tmp2 = load <2 x i32>, <2 x i32>* %B 230 %tmp3 = zext <2 x i32> %tmp1 to <2 x i64> 231 %tmp4 = zext <2 x i32> %tmp2 to <2 x i64> 232 %tmp5 = sub <2 x i64> %tmp3, %tmp4 233 ret <2 x i64> %tmp5 234 } 235 236 define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { 237 ;CHECK-LABEL: usubl2_8h: 238 ;CHECK: usubl.8h 239 %tmp1 = load <16 x i8>, <16 x i8>* %A 240 %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 241 %ext1 = zext <8 x i8> %high1 to <8 x i16> 242 243 %tmp2 = load <16 x i8>, <16 x i8>* %B 244 %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 245 %ext2 = zext <8 x i8> %high2 to <8 x i16> 246 247 %res = sub <8 x i16> %ext1, %ext2 248 ret <8 x i16> %res 249 } 250 251 define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { 252 ;CHECK-LABEL: usubl2_4s: 253 ;CHECK: usubl.4s 254 %tmp1 = load <8 x i16>, <8 x i16>* %A 255 %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 256 %ext1 = zext <4 x i16> %high1 to <4 x i32> 257 258 %tmp2 = load <8 x i16>, <8 x i16>* %B 259 %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 260 %ext2 = zext <4 x i16> %high2 to <4 x i32> 261 262 %res = sub <4 x i32> %ext1, %ext2 263 ret <4 x i32> %res 264 } 265 266 define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { 267 ;CHECK-LABEL: usubl2_2d: 268 ;CHECK: usubl.2d 269 %tmp1 = load <4 x i32>, <4 x i32>* %A 270 %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 271 %ext1 = zext <2 x i32> %high1 to <2 x i64> 272 273 %tmp2 = load <4 x i32>, <4 x i32>* %B 274 %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 275 %ext2 = zext <2 x i32> %high2 to <2 x i64> 276 277 %res = sub <2 x i64> %ext1, %ext2 278 ret <2 x i64> %res 279 } 280 281 define <8 x i16> @ssubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind { 282 ;CHECK-LABEL: ssubw8h: 283 ;CHECK: ssubw.8h 284 %tmp1 = load <8 x i16>, <8 x i16>* %A 285 %tmp2 = load <8 x i8>, <8 x i8>* %B 286 %tmp3 = sext <8 x i8> %tmp2 to <8 x i16> 287 %tmp4 = sub <8 x i16> %tmp1, %tmp3 288 ret <8 x i16> %tmp4 289 } 290 291 define <4 x i32> @ssubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind { 292 ;CHECK-LABEL: ssubw4s: 293 ;CHECK: ssubw.4s 294 %tmp1 = load <4 x i32>, <4 x i32>* %A 295 %tmp2 = load <4 x i16>, <4 x i16>* %B 296 %tmp3 = sext <4 x i16> %tmp2 to <4 x i32> 297 %tmp4 = sub <4 x i32> %tmp1, %tmp3 298 ret <4 x i32> %tmp4 299 } 300 301 define <2 x i64> @ssubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind { 302 ;CHECK-LABEL: ssubw2d: 303 ;CHECK: ssubw.2d 304 %tmp1 = load <2 x i64>, <2 x i64>* %A 305 %tmp2 = load <2 x i32>, <2 x i32>* %B 306 %tmp3 = sext <2 x i32> %tmp2 to <2 x i64> 307 %tmp4 = sub <2 x i64> %tmp1, %tmp3 308 ret <2 x i64> %tmp4 309 } 310 311 define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { 312 ;CHECK-LABEL: ssubw2_8h: 313 ;CHECK: ssubw.8h 314 %tmp1 = load <8 x i16>, <8 x i16>* %A 315 316 %tmp2 = load <16 x i8>, <16 x i8>* %B 317 %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 318 %ext2 = sext <8 x i8> %high2 to <8 x i16> 319 320 %res = sub <8 x i16> %tmp1, %ext2 321 ret <8 x i16> %res 322 } 323 324 define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { 325 ;CHECK-LABEL: ssubw2_4s: 326 ;CHECK: ssubw.4s 327 %tmp1 = load <4 x i32>, <4 x i32>* %A 328 329 %tmp2 = load <8 x i16>, <8 x i16>* %B 330 %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 331 %ext2 = sext <4 x i16> %high2 to <4 x i32> 332 333 %res = sub <4 x i32> %tmp1, %ext2 334 ret <4 x i32> %res 335 } 336 337 define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind { 338 ;CHECK-LABEL: ssubw2_2d: 339 ;CHECK: ssubw.2d 340 %tmp1 = load <2 x i64>, <2 x i64>* %A 341 342 %tmp2 = load <4 x i32>, <4 x i32>* %B 343 %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 344 %ext2 = sext <2 x i32> %high2 to <2 x i64> 345 346 %res = sub <2 x i64> %tmp1, %ext2 347 ret <2 x i64> %res 348 } 349 350 define <8 x i16> @usubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind { 351 ;CHECK-LABEL: usubw8h: 352 ;CHECK: usubw.8h 353 %tmp1 = load <8 x i16>, <8 x i16>* %A 354 %tmp2 = load <8 x i8>, <8 x i8>* %B 355 %tmp3 = zext <8 x i8> %tmp2 to <8 x i16> 356 %tmp4 = sub <8 x i16> %tmp1, %tmp3 357 ret <8 x i16> %tmp4 358 } 359 360 define <4 x i32> @usubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind { 361 ;CHECK-LABEL: usubw4s: 362 ;CHECK: usubw.4s 363 %tmp1 = load <4 x i32>, <4 x i32>* %A 364 %tmp2 = load <4 x i16>, <4 x i16>* %B 365 %tmp3 = zext <4 x i16> %tmp2 to <4 x i32> 366 %tmp4 = sub <4 x i32> %tmp1, %tmp3 367 ret <4 x i32> %tmp4 368 } 369 370 define <2 x i64> @usubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind { 371 ;CHECK-LABEL: usubw2d: 372 ;CHECK: usubw.2d 373 %tmp1 = load <2 x i64>, <2 x i64>* %A 374 %tmp2 = load <2 x i32>, <2 x i32>* %B 375 %tmp3 = zext <2 x i32> %tmp2 to <2 x i64> 376 %tmp4 = sub <2 x i64> %tmp1, %tmp3 377 ret <2 x i64> %tmp4 378 } 379 380 define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { 381 ;CHECK-LABEL: usubw2_8h: 382 ;CHECK: usubw.8h 383 %tmp1 = load <8 x i16>, <8 x i16>* %A 384 385 %tmp2 = load <16 x i8>, <16 x i8>* %B 386 %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 387 %ext2 = zext <8 x i8> %high2 to <8 x i16> 388 389 %res = sub <8 x i16> %tmp1, %ext2 390 ret <8 x i16> %res 391 } 392 393 define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { 394 ;CHECK-LABEL: usubw2_4s: 395 ;CHECK: usubw.4s 396 %tmp1 = load <4 x i32>, <4 x i32>* %A 397 398 %tmp2 = load <8 x i16>, <8 x i16>* %B 399 %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 400 %ext2 = zext <4 x i16> %high2 to <4 x i32> 401 402 %res = sub <4 x i32> %tmp1, %ext2 403 ret <4 x i32> %res 404 } 405 406 define <2 x i64> @usubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind { 407 ;CHECK-LABEL: usubw2_2d: 408 ;CHECK: usubw.2d 409 %tmp1 = load <2 x i64>, <2 x i64>* %A 410 411 %tmp2 = load <4 x i32>, <4 x i32>* %B 412 %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 413 %ext2 = zext <2 x i32> %high2 to <2 x i64> 414 415 %res = sub <2 x i64> %tmp1, %ext2 416 ret <2 x i64> %res 417 } 418