Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
      2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
      3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
      4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
      5 
      6 
      7 ; Verify the cost of vector select instructions.
      8 
      9 ; SSE41 added blend instructions with an immediate for <2 x double> and
     10 ; <4 x float>. Integers of the same size should also use those instructions.
     11 
     12 define <2 x i64> @test_2i64(<2 x i64> %a, <2 x i64> %b) {
     13 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2i64':
     14 ; SSE2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     15 ; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     16 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     17 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     18   %sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
     19   ret <2 x i64> %sel
     20 }
     21 
     22 define <2 x double> @test_2double(<2 x double> %a, <2 x double> %b) {
     23 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_2double':
     24 ; SSE2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     25 ; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     26 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     27 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <2 x i1>
     28   %sel = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
     29   ret <2 x double> %sel
     30 }
     31 
     32 define <4 x i32> @test_4i32(<4 x i32> %a, <4 x i32> %b) {
     33 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i32':
     34 ; SSE2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     35 ; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     36 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     37 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     38   %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %a, <4 x i32> %b
     39   ret <4 x i32> %sel
     40 }
     41 
     42 define <4 x float> @test_4float(<4 x float> %a, <4 x float> %b) {
     43 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4float':
     44 ; SSE2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     45 ; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     46 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     47 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     48   %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
     49   ret <4 x float> %sel
     50 }
     51 
     52 define <16 x i8> @test_16i8(<16 x i8> %a, <16 x i8> %b) {
     53 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_16i8':
     54 ; SSE2: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
     55 ; SSE41: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
     56 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
     57 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
     58   %sel = select <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <16 x i8> %a, <16 x i8> %b
     59   ret <16 x i8> %sel
     60 }
     61 
     62 ; AVX added blend instructions with an immediate for <4 x double> and
     63 ; <8 x float>. Integers of the same size should also use those instructions.
     64 define <4 x i64> @test_4i64(<4 x i64> %a, <4 x i64> %b) {
     65 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4i64':
     66 ; SSE2: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
     67 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
     68 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     69 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     70   %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i64> %a, <4 x i64> %b
     71   ret <4 x i64> %sel
     72 }
     73 
     74 define <4 x double> @test_4double(<4 x double> %a, <4 x double> %b) {
     75 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_4double':
     76 ; SSE2: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
     77 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <4 x i1>
     78 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     79 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <4 x i1>
     80   %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %a, <4 x double> %b
     81   ret <4 x double> %sel
     82 }
     83 
     84 define <8 x i32> @test_8i32(<8 x i32> %a, <8 x i32> %b) {
     85 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8i32':
     86 ; SSE2: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
     87 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
     88 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
     89 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
     90   %sel = select <8 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false>, <8 x i32> %a, <8 x i32> %b
     91   ret <8 x i32> %sel
     92 }
     93 
     94 define <8 x float> @test_8float(<8 x float> %a, <8 x float> %b) {
     95 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_8float':
     96 ; SSE2: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
     97 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <8 x i1>
     98 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
     99 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <8 x i1>
    100   %sel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %a, <8 x float> %b
    101   ret <8 x float> %sel
    102 }
    103 
    104 ; AVX2
    105 define <16 x i16> @test_16i16(<16 x i16> %a, <16 x i16> %b) {
    106 ; CHECK:Printing analysis 'Cost Model Analysis' for function 'test_16i16':
    107 ; SSE2: Cost Model: {{.*}} 2 for instruction:   %sel = select <16 x i1>
    108 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <16 x i1>
    109 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
    110 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <16 x i1>
    111   %sel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i16> %a, <16 x i16> %b
    112   ret <16 x i16> %sel
    113 }
    114 
    115 define <32 x i8> @test_32i8(<32 x i8> %a, <32 x i8> %b) {
    116 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_32i8':
    117 ; SSE2: Cost Model: {{.*}} 2 for instruction:   %sel = select <32 x i1>
    118 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %sel = select <32 x i1>
    119 ; AVX: Cost Model: {{.*}} 1 for instruction:   %sel = select <32 x i1>
    120 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %sel = select <32 x i1>
    121   %sel = select <32 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true>, <32 x i8> %a, <32 x i8> %b
    122   ret <32 x i8> %sel
    123 }
    124 
    125