1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512 2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX 4 5 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 6 target triple = "x86_64-apple-macosx10.8.0" 7 8 define i32 @add(i32 %arg) { 9 ; CHECK-LABEL: for function 'add' 10 ; -- Same size registeres -- 11 ;CHECK: cost of 1 {{.*}} zext 12 %A = zext <4 x i1> undef to <4 x i32> 13 ;CHECK: cost of 2 {{.*}} sext 14 %B = sext <4 x i1> undef to <4 x i32> 15 ;CHECK: cost of 0 {{.*}} trunc 16 %C = trunc <4 x i32> undef to <4 x i1> 17 18 ; -- Different size registers -- 19 ;CHECK-NOT: cost of 1 {{.*}} zext 20 %D = zext <8 x i1> undef to <8 x i32> 21 ;CHECK-NOT: cost of 2 {{.*}} sext 22 %E = sext <8 x i1> undef to <8 x i32> 23 ;CHECK-NOT: cost of 2 {{.*}} trunc 24 %F = trunc <8 x i32> undef to <8 x i1> 25 26 ; -- scalars -- 27 28 ;CHECK: cost of 1 {{.*}} zext 29 %G = zext i1 undef to i32 30 ;CHECK: cost of 0 {{.*}} trunc 31 %H = trunc i32 undef to i1 32 33 ;CHECK: cost of 0 {{.*}} ret 34 ret i32 undef 35 } 36 37 define i32 @zext_sext(<8 x i1> %in) { 38 ; CHECK-AVX2-LABEL: for function 'zext_sext' 39 ; CHECK-AVX-LABEL: for function 'zext_sext' 40 ;CHECK-AVX2: cost of 3 {{.*}} zext 41 ;CHECK-AVX: cost of 4 {{.*}} zext 42 %Z = zext <8 x i1> %in to <8 x i32> 43 ;CHECK-AVX2: cost of 3 {{.*}} sext 44 ;CHECK-AVX: cost of 7 {{.*}} sext 45 %S = sext <8 x i1> %in to <8 x i32> 46 47 ;CHECK-AVX2: cost of 1 {{.*}} zext 48 ;CHECK-AVX: cost of 4 {{.*}} zext 49 %A1 = zext <16 x i8> undef to <16 x i16> 50 ;CHECK-AVX2: cost of 1 {{.*}} sext 51 ;CHECK-AVX: cost of 4 {{.*}} sext 52 %A2 = sext <16 x i8> undef to <16 x i16> 53 ;CHECK-AVX2: cost of 1 {{.*}} sext 54 ;CHECK-AVX: cost of 4 {{.*}} sext 55 %A = sext <8 x i16> undef to <8 x i32> 56 ;CHECK-AVX2: cost of 1 {{.*}} zext 57 ;CHECK-AVX: cost of 4 {{.*}} zext 58 %B = zext <8 x i16> undef to <8 x i32> 59 ;CHECK-AVX2: cost of 1 {{.*}} sext 60 ;CHECK-AVX: cost of 4 {{.*}} sext 61 %C = sext <4 x i32> undef to <4 x i64> 62 63 ;CHECK-AVX2: cost of 3 {{.*}} zext 64 ;CHECK-AVX: cost of 4 {{.*}} zext 65 %C.v8i8.z = zext <8 x i8> undef to <8 x i32> 66 ;CHECK-AVX2: cost of 3 {{.*}} sext 67 ;CHECK-AVX: cost of 7 {{.*}} sext 68 %C.v8i8.s = sext <8 x i8> undef to <8 x i32> 69 ;CHECK-AVX2: cost of 3 {{.*}} zext 70 ;CHECK-AVX: cost of 3 {{.*}} zext 71 %C.v4i16.z = zext <4 x i16> undef to <4 x i64> 72 ;CHECK-AVX2: cost of 3 {{.*}} sext 73 ;CHECK-AVX: cost of 6 {{.*}} sext 74 %C.v4i16.s = sext <4 x i16> undef to <4 x i64> 75 76 ;CHECK-AVX2: cost of 3 {{.*}} zext 77 ;CHECK-AVX: cost of 4 {{.*}} zext 78 %C.v4i8.z = zext <4 x i8> undef to <4 x i64> 79 ;CHECK-AVX2: cost of 3 {{.*}} sext 80 ;CHECK-AVX: cost of 6 {{.*}} sext 81 %C.v4i8.s = sext <4 x i8> undef to <4 x i64> 82 83 ;CHECK-AVX2: cost of 1 {{.*}} zext 84 ;CHECK-AVX: cost of 4 {{.*}} zext 85 %D = zext <4 x i32> undef to <4 x i64> 86 87 ;CHECK-AVX512: cost of 1 {{.*}} %D1 = zext 88 %D1 = zext <8 x i32> undef to <8 x i64> 89 90 ;CHECK-AVX512: cost of 1 {{.*}} %D2 = sext 91 %D2 = sext <8 x i32> undef to <8 x i64> 92 93 ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext 94 %D3 = zext <16 x i16> undef to <16 x i32> 95 ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext 96 %D4 = zext <16 x i8> undef to <16 x i32> 97 ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext 98 %D5 = zext <16 x i1> undef to <16 x i32> 99 100 ;CHECK-AVX2: cost of 2 {{.*}} trunc 101 ;CHECK-AVX: cost of 4 {{.*}} trunc 102 %E = trunc <4 x i64> undef to <4 x i32> 103 ;CHECK-AVX2: cost of 2 {{.*}} trunc 104 ;CHECK-AVX: cost of 5 {{.*}} trunc 105 %F = trunc <8 x i32> undef to <8 x i16> 106 ;CHECK-AVX2: cost of 4 {{.*}} trunc 107 ;CHECK-AVX: cost of 4 {{.*}} trunc 108 %F1 = trunc <16 x i16> undef to <16 x i8> 109 ;CHECK-AVX2: cost of 2 {{.*}} trunc 110 ;CHECK-AVX: cost of 4 {{.*}} trunc 111 %F2 = trunc <8 x i32> undef to <8 x i8> 112 ;CHECK-AVX2: cost of 2 {{.*}} trunc 113 ;CHECK-AVX: cost of 4 {{.*}} trunc 114 %F3 = trunc <4 x i64> undef to <4 x i8> 115 116 ;CHECK-AVX2: cost of 4 {{.*}} trunc 117 ;CHECK-AVX: cost of 9 {{.*}} trunc 118 ;CHECK_AVX512: cost of 1 {{.*}} G = trunc 119 %G = trunc <8 x i64> undef to <8 x i32> 120 121 ;CHECK-AVX512: cost of 1 {{.*}} %G1 = trunc 122 %G1 = trunc <16 x i32> undef to <16 x i16> 123 124 ;CHECK-AVX512: cost of 1 {{.*}} %G2 = trunc 125 %G2 = trunc <16 x i32> undef to <16 x i8> 126 ret i32 undef 127 } 128 129 define i32 @masks8(<8 x i1> %in) { 130 ; CHECK-AVX2-LABEL: for function 'masks8' 131 ; CHECK-AVX-LABEL: for function 'masks8' 132 133 ;CHECK-AVX2: cost of 3 {{.*}} zext 134 ;CHECK-AVX: cost of 4 {{.*}} zext 135 %Z = zext <8 x i1> %in to <8 x i32> 136 ;CHECK-AVX2: cost of 3 {{.*}} sext 137 ;CHECK-AVX: cost of 7 {{.*}} sext 138 %S = sext <8 x i1> %in to <8 x i32> 139 ret i32 undef 140 } 141 142 define i32 @masks4(<4 x i1> %in) { 143 ; CHECK-AVX2-LABEL: for function 'masks4' 144 ; CHECK-AVX-LABEL: for function 'masks4' 145 146 ;CHECK-AVX2: cost of 3 {{.*}} zext 147 ;CHECK-AVX: cost of 4 {{.*}} zext 148 %Z = zext <4 x i1> %in to <4 x i64> 149 ;CHECK-AVX2: cost of 3 {{.*}} sext 150 ;CHECK-AVX: cost of 6 {{.*}} sext 151 %S = sext <4 x i1> %in to <4 x i64> 152 ret i32 undef 153 } 154 155 define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { 156 ; CHECK-LABEL: for function 'sitofp4' 157 ; CHECK: cost of 3 {{.*}} sitofp 158 %A1 = sitofp <4 x i1> %a to <4 x float> 159 ; CHECK: cost of 3 {{.*}} sitofp 160 %A2 = sitofp <4 x i1> %a to <4 x double> 161 162 ; CHECK: cost of 3 {{.*}} sitofp 163 %B1 = sitofp <4 x i8> %b to <4 x float> 164 ; CHECK: cost of 3 {{.*}} sitofp 165 %B2 = sitofp <4 x i8> %b to <4 x double> 166 167 ; CHECK: cost of 3 {{.*}} sitofp 168 %C1 = sitofp <4 x i16> %c to <4 x float> 169 ; CHECK: cost of 3 {{.*}} sitofp 170 %C2 = sitofp <4 x i16> %c to <4 x double> 171 172 ; CHECK: cost of 1 {{.*}} sitofp 173 %D1 = sitofp <4 x i32> %d to <4 x float> 174 ; CHECK: cost of 1 {{.*}} sitofp 175 %D2 = sitofp <4 x i32> %d to <4 x double> 176 ret void 177 } 178 179 define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { 180 ; CHECK-LABEL: for function 'sitofp8' 181 ; CHECK: cost of 8 {{.*}} sitofp 182 %A1 = sitofp <8 x i1> %a to <8 x float> 183 184 ; CHECK: cost of 8 {{.*}} sitofp 185 %B1 = sitofp <8 x i8> %b to <8 x float> 186 187 ; CHECK: cost of 5 {{.*}} sitofp 188 %C1 = sitofp <8 x i16> %c to <8 x float> 189 190 ; CHECK: cost of 1 {{.*}} sitofp 191 %D1 = sitofp <8 x i32> %d to <8 x float> 192 ret void 193 } 194 195 define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { 196 ; CHECK-LABEL: for function 'uitofp4' 197 ; CHECK: cost of 7 {{.*}} uitofp 198 %A1 = uitofp <4 x i1> %a to <4 x float> 199 ; CHECK: cost of 7 {{.*}} uitofp 200 %A2 = uitofp <4 x i1> %a to <4 x double> 201 202 ; CHECK: cost of 2 {{.*}} uitofp 203 %B1 = uitofp <4 x i8> %b to <4 x float> 204 ; CHECK: cost of 2 {{.*}} uitofp 205 %B2 = uitofp <4 x i8> %b to <4 x double> 206 207 ; CHECK: cost of 2 {{.*}} uitofp 208 %C1 = uitofp <4 x i16> %c to <4 x float> 209 ; CHECK: cost of 2 {{.*}} uitofp 210 %C2 = uitofp <4 x i16> %c to <4 x double> 211 212 ; CHECK-AVX2: cost of 6 {{.*}} uitofp 213 %D1 = uitofp <4 x i32> %d to <4 x float> 214 ; CHECK-AVX2: cost of 6 {{.*}} uitofp 215 %D2 = uitofp <4 x i32> %d to <4 x double> 216 ret void 217 } 218 219 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { 220 ; CHECK-LABEL: for function 'uitofp8' 221 ; CHECK-AVX2: cost of 6 {{.*}} uitofp 222 %A1 = uitofp <8 x i1> %a to <8 x float> 223 224 ; CHECK-AVX2: cost of 5 {{.*}} uitofp 225 ; CHECK-AVX512: cost of 2 {{.*}} uitofp 226 %B1 = uitofp <8 x i8> %b to <8 x float> 227 228 ; CHECK-AVX2: cost of 5 {{.*}} uitofp 229 ; CHECK-AVX512: cost of 2 {{.*}} uitofp 230 %C1 = uitofp <8 x i16> %c to <8 x float> 231 232 ; CHECK-AVX2: cost of 8 {{.*}} uitofp 233 ; CHECK-AVX512: cost of 1 {{.*}} uitofp 234 ; CHECK-AVX: cost of 9 {{.*}} uitofp 235 %D1 = uitofp <8 x i32> %d to <8 x float> 236 ret void 237 } 238 239 define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) { 240 ;CHECK-LABEL: for function 'fp_conv' 241 ; CHECK-AVX512: cost of 1 {{.*}} fpext 242 %A1 = fpext <8 x float> %a to <8 x double> 243 244 ; CHECK-AVX512: cost of 1 {{.*}} fpext 245 %A2 = fpext <4 x float> %c to <4 x double> 246 247 ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext 248 ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext 249 %A3 = fpext <8 x float> %a to <8 x double> 250 251 ; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc 252 ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc 253 %A4 = fptrunc <8 x double> undef to <8 x float> 254 255 ; CHECK-AVX512: cost of 1 {{.*}} %A5 = fptrunc 256 %A5 = fptrunc <4 x double> undef to <4 x float> 257 ret void 258 } 259