1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX 3 4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5 target triple = "x86_64-apple-macosx10.8.0" 6 7 define i32 @add(i32 %arg) { 8 ; CHECK-LABEL: for function 'add' 9 ; -- Same size registeres -- 10 ;CHECK: cost of 1 {{.*}} zext 11 %A = zext <4 x i1> undef to <4 x i32> 12 ;CHECK: cost of 2 {{.*}} sext 13 %B = sext <4 x i1> undef to <4 x i32> 14 ;CHECK: cost of 0 {{.*}} trunc 15 %C = trunc <4 x i32> undef to <4 x i1> 16 17 ; -- Different size registers -- 18 ;CHECK-NOT: cost of 1 {{.*}} zext 19 %D = zext <8 x i1> undef to <8 x i32> 20 ;CHECK-NOT: cost of 2 {{.*}} sext 21 %E = sext <8 x i1> undef to <8 x i32> 22 ;CHECK-NOT: cost of 2 {{.*}} trunc 23 %F = trunc <8 x i32> undef to <8 x i1> 24 25 ; -- scalars -- 26 27 ;CHECK: cost of 1 {{.*}} zext 28 %G = zext i1 undef to i32 29 ;CHECK: cost of 0 {{.*}} trunc 30 %H = trunc i32 undef to i1 31 32 ;CHECK: cost of 0 {{.*}} ret 33 ret i32 undef 34 } 35 36 define i32 @zext_sext(<8 x i1> %in) { 37 ; CHECK-AVX2-LABEL: for function 'zext_sext' 38 ; CHECK-AVX-LABEL: for function 'zext_sext' 39 ;CHECK-AVX2: cost of 3 {{.*}} zext 40 ;CHECK-AVX: cost of 4 {{.*}} zext 41 %Z = zext <8 x i1> %in to <8 x i32> 42 ;CHECK-AVX2: cost of 3 {{.*}} sext 43 ;CHECK-AVX: cost of 7 {{.*}} sext 44 %S = sext <8 x i1> %in to <8 x i32> 45 46 ;CHECK-AVX2: cost of 1 {{.*}} zext 47 ;CHECK-AVX: cost of 4 {{.*}} zext 48 %A1 = zext <16 x i8> undef to <16 x i16> 49 ;CHECK-AVX2: cost of 1 {{.*}} sext 50 ;CHECK-AVX: cost of 4 {{.*}} sext 51 %A2 = sext <16 x i8> undef to <16 x i16> 52 ;CHECK-AVX2: cost of 1 {{.*}} sext 53 ;CHECK-AVX: cost of 4 {{.*}} sext 54 %A = sext <8 x i16> undef to <8 x i32> 55 ;CHECK-AVX2: cost of 1 {{.*}} zext 56 ;CHECK-AVX: cost of 4 {{.*}} zext 57 %B = zext <8 x i16> undef to <8 x i32> 58 ;CHECK-AVX2: cost of 1 {{.*}} sext 59 ;CHECK-AVX: cost of 4 {{.*}} sext 60 %C = sext <4 x i32> undef to <4 x i64> 61 62 ;CHECK-AVX2: cost of 3 {{.*}} zext 63 ;CHECK-AVX: cost of 4 {{.*}} zext 64 %C.v8i8.z = zext <8 x i8> undef to <8 x i32> 65 ;CHECK-AVX2: cost of 3 {{.*}} sext 66 ;CHECK-AVX: cost of 7 {{.*}} sext 67 %C.v8i8.s = sext <8 x i8> undef to <8 x i32> 68 ;CHECK-AVX2: cost of 3 {{.*}} zext 69 ;CHECK-AVX: cost of 3 {{.*}} zext 70 %C.v4i16.z = zext <4 x i16> undef to <4 x i64> 71 ;CHECK-AVX2: cost of 3 {{.*}} sext 72 ;CHECK-AVX: cost of 6 {{.*}} sext 73 %C.v4i16.s = sext <4 x i16> undef to <4 x i64> 74 75 ;CHECK-AVX2: cost of 3 {{.*}} zext 76 ;CHECK-AVX: cost of 4 {{.*}} zext 77 %C.v4i8.z = zext <4 x i8> undef to <4 x i64> 78 ;CHECK-AVX2: cost of 3 {{.*}} sext 79 ;CHECK-AVX: cost of 6 {{.*}} sext 80 %C.v4i8.s = sext <4 x i8> undef to <4 x i64> 81 82 ;CHECK-AVX2: cost of 1 {{.*}} zext 83 ;CHECK-AVX: cost of 4 {{.*}} zext 84 %D = zext <4 x i32> undef to <4 x i64> 85 86 ;CHECK-AVX2: cost of 2 {{.*}} trunc 87 ;CHECK-AVX: cost of 4 {{.*}} trunc 88 %E = trunc <4 x i64> undef to <4 x i32> 89 ;CHECK-AVX2: cost of 2 {{.*}} trunc 90 ;CHECK-AVX: cost of 5 {{.*}} trunc 91 %F = trunc <8 x i32> undef to <8 x i16> 92 ;CHECK-AVX2: cost of 4 {{.*}} trunc 93 ;CHECK-AVX: cost of 4 {{.*}} trunc 94 %F1 = trunc <16 x i16> undef to <16 x i8> 95 ;CHECK-AVX2: cost of 2 {{.*}} trunc 96 ;CHECK-AVX: cost of 4 {{.*}} trunc 97 %F2 = trunc <8 x i32> undef to <8 x i8> 98 ;CHECK-AVX2: cost of 2 {{.*}} trunc 99 ;CHECK-AVX: cost of 4 {{.*}} trunc 100 %F3 = trunc <4 x i64> undef to <4 x i8> 101 102 ;CHECK-AVX2: cost of 4 {{.*}} trunc 103 ;CHECK-AVX: cost of 9 {{.*}} trunc 104 %G = trunc <8 x i64> undef to <8 x i32> 105 106 ret i32 undef 107 } 108 109 define i32 @masks8(<8 x i1> %in) { 110 ; CHECK-AVX2-LABEL: for function 'masks8' 111 ; CHECK-AVX-LABEL: for function 'masks8' 112 113 ;CHECK-AVX2: cost of 3 {{.*}} zext 114 ;CHECK-AVX: cost of 4 {{.*}} zext 115 %Z = zext <8 x i1> %in to <8 x i32> 116 ;CHECK-AVX2: cost of 3 {{.*}} sext 117 ;CHECK-AVX: cost of 7 {{.*}} sext 118 %S = sext <8 x i1> %in to <8 x i32> 119 ret i32 undef 120 } 121 122 define i32 @masks4(<4 x i1> %in) { 123 ; CHECK-AVX2-LABEL: for function 'masks4' 124 ; CHECK-AVX-LABEL: for function 'masks4' 125 126 ;CHECK-AVX2: cost of 3 {{.*}} zext 127 ;CHECK-AVX: cost of 4 {{.*}} zext 128 %Z = zext <4 x i1> %in to <4 x i64> 129 ;CHECK-AVX2: cost of 3 {{.*}} sext 130 ;CHECK-AVX: cost of 6 {{.*}} sext 131 %S = sext <4 x i1> %in to <4 x i64> 132 ret i32 undef 133 } 134 135 define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { 136 ; CHECK-LABEL: for function 'sitofp4' 137 ; CHECK: cost of 3 {{.*}} sitofp 138 %A1 = sitofp <4 x i1> %a to <4 x float> 139 ; CHECK: cost of 3 {{.*}} sitofp 140 %A2 = sitofp <4 x i1> %a to <4 x double> 141 142 ; CHECK: cost of 3 {{.*}} sitofp 143 %B1 = sitofp <4 x i8> %b to <4 x float> 144 ; CHECK: cost of 3 {{.*}} sitofp 145 %B2 = sitofp <4 x i8> %b to <4 x double> 146 147 ; CHECK: cost of 3 {{.*}} sitofp 148 %C1 = sitofp <4 x i16> %c to <4 x float> 149 ; CHECK: cost of 3 {{.*}} sitofp 150 %C2 = sitofp <4 x i16> %c to <4 x double> 151 152 ; CHECK: cost of 1 {{.*}} sitofp 153 %D1 = sitofp <4 x i32> %d to <4 x float> 154 ; CHECK: cost of 1 {{.*}} sitofp 155 %D2 = sitofp <4 x i32> %d to <4 x double> 156 ret void 157 } 158 159 define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { 160 ; CHECK-LABEL: for function 'sitofp8' 161 ; CHECK: cost of 8 {{.*}} sitofp 162 %A1 = sitofp <8 x i1> %a to <8 x float> 163 164 ; CHECK: cost of 8 {{.*}} sitofp 165 %B1 = sitofp <8 x i8> %b to <8 x float> 166 167 ; CHECK: cost of 5 {{.*}} sitofp 168 %C1 = sitofp <8 x i16> %c to <8 x float> 169 170 ; CHECK: cost of 1 {{.*}} sitofp 171 %D1 = sitofp <8 x i32> %d to <8 x float> 172 ret void 173 } 174 175 define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { 176 ; CHECK-LABEL: for function 'uitofp4' 177 ; CHECK: cost of 7 {{.*}} uitofp 178 %A1 = uitofp <4 x i1> %a to <4 x float> 179 ; CHECK: cost of 7 {{.*}} uitofp 180 %A2 = uitofp <4 x i1> %a to <4 x double> 181 182 ; CHECK: cost of 2 {{.*}} uitofp 183 %B1 = uitofp <4 x i8> %b to <4 x float> 184 ; CHECK: cost of 2 {{.*}} uitofp 185 %B2 = uitofp <4 x i8> %b to <4 x double> 186 187 ; CHECK: cost of 2 {{.*}} uitofp 188 %C1 = uitofp <4 x i16> %c to <4 x float> 189 ; CHECK: cost of 2 {{.*}} uitofp 190 %C2 = uitofp <4 x i16> %c to <4 x double> 191 192 ; CHECK: cost of 6 {{.*}} uitofp 193 %D1 = uitofp <4 x i32> %d to <4 x float> 194 ; CHECK: cost of 6 {{.*}} uitofp 195 %D2 = uitofp <4 x i32> %d to <4 x double> 196 ret void 197 } 198 199 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { 200 ; CHECK-LABEL: for function 'uitofp8' 201 ; CHECK: cost of 6 {{.*}} uitofp 202 %A1 = uitofp <8 x i1> %a to <8 x float> 203 204 ; CHECK: cost of 5 {{.*}} uitofp 205 %B1 = uitofp <8 x i8> %b to <8 x float> 206 207 ; CHECK: cost of 5 {{.*}} uitofp 208 %C1 = uitofp <8 x i16> %c to <8 x float> 209 210 ; CHECK: cost of 9 {{.*}} uitofp 211 %D1 = uitofp <8 x i32> %d to <8 x float> 212 ret void 213 } 214