1 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST 2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE 3 4 ; COST-LABEL: sel.v8i8 5 ; COST: Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 6 ; CODE-LABEL: sel.v8i8 7 ; CODE: tbl v0.8b, { v0.16b }, v2.8b 8 define <8 x i8> @sel.v8i8(<8 x i8> %v0, <8 x i8> %v1) { 9 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 10 ret <8 x i8> %tmp0 11 } 12 13 ; COST-LABEL: sel.v16i8 14 ; COST: Found an estimated cost of 90 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 15 ; CODE-LABEL: sel.v16i8 16 ; CODE: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 17 define <16 x i8> @sel.v16i8(<16 x i8> %v0, <16 x i8> %v1) { 18 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 19 ret <16 x i8> %tmp0 20 } 21 22 ; COST-LABEL: sel.v4i16 23 ; COST: Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 24 ; CODE-LABEL: sel.v4i16 25 ; CODE: rev32 v0.4h, v0.4h 26 ; CODE: trn2 v0.4h, v0.4h, v1.4h 27 define <4 x i16> @sel.v4i16(<4 x i16> %v0, <4 x i16> %v1) { 28 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 29 ret <4 x i16> %tmp0 30 } 31 32 ; COST-LABEL: sel.v8i16 33 ; COST: Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 34 ; CODE-LABEL: sel.v8i16 35 ; CODE: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 36 define <8 x i16> @sel.v8i16(<8 x i16> %v0, <8 x i16> %v1) { 37 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 38 ret <8 x i16> %tmp0 39 } 40 41 ; COST-LABEL: sel.v2i32 42 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 3> 43 ; CODE-LABEL: sel.v2i32 44 ; CODE: mov v0.s[1], v1.s[1] 45 define <2 x i32> @sel.v2i32(<2 x i32> %v0, <2 x i32> %v1) { 46 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 3> 47 ret <2 x i32> %tmp0 48 } 49 50 ; COST-LABEL: sel.v4i32 51 ; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 52 ; CODE-LABEL: sel.v4i32 53 ; CODE: rev64 v0.4s, v0.4s 54 ; CODE: trn2 v0.4s, v0.4s, v1.4s 55 define <4 x i32> @sel.v4i32(<4 x i32> %v0, <4 x i32> %v1) { 56 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 57 ret <4 x i32> %tmp0 58 } 59 60 ; COST-LABEL: sel.v2i64 61 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 3> 62 ; CODE-LABEL: sel.v2i64 63 ; CODE: mov v0.d[1], v1.d[1] 64 define <2 x i64> @sel.v2i64(<2 x i64> %v0, <2 x i64> %v1) { 65 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 3> 66 ret <2 x i64> %tmp0 67 } 68 69 ; COST-LABEL: sel.v2f32 70 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 3> 71 ; CODE-LABEL: sel.v2f32 72 ; CODE: mov v0.s[1], v1.s[1] 73 define <2 x float> @sel.v2f32(<2 x float> %v0, <2 x float> %v1) { 74 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 3> 75 ret <2 x float> %tmp0 76 } 77 78 ; COST-LABEL: sel.v4f32 79 ; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 80 ; CODE-LABEL: sel.v4f32 81 ; CODE: rev64 v0.4s, v0.4s 82 ; CODE: trn2 v0.4s, v0.4s, v1.4s 83 define <4 x float> @sel.v4f32(<4 x float> %v0, <4 x float> %v1) { 84 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 85 ret <4 x float> %tmp0 86 } 87 88 ; COST-LABEL: sel.v2f64 89 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 3> 90 ; CODE-LABEL: sel.v2f64 91 ; CODE: mov v0.d[1], v1.d[1] 92 define <2 x double> @sel.v2f64(<2 x double> %v0, <2 x double> %v1) { 93 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 3> 94 ret <2 x double> %tmp0 95 } 96