Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
      2 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
      3 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
      4 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
      5 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
      6 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
      7 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
      8 ;
      9 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
     10 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
     11 ; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2
     12 
     13 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
     14 target triple = "x86_64-apple-macosx10.8.0"
     15 
     16 define i32 @ceil(i32 %arg) {
     17 ; SSE2-LABEL: 'ceil'
     18 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
     19 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     20 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     21 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     22 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
     23 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
     24 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
     25 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
     26 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
     27 ;
     28 ; SSE42-LABEL: 'ceil'
     29 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
     30 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     31 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     32 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     33 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
     34 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
     35 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
     36 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
     37 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
     38 ;
     39 ; AVX-LABEL: 'ceil'
     40 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
     41 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     42 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     43 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     44 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
     45 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
     46 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
     47 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
     48 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
     49 ;
     50 ; AVX512-LABEL: 'ceil'
     51 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
     52 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     53 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     54 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     55 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
     56 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
     57 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
     58 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
     59 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
     60 ;
     61 ; SLM-LABEL: 'ceil'
     62 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
     63 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     64 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     65 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     66 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
     67 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
     68 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
     69 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
     70 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
     71 ;
     72 ; GLM-LABEL: 'ceil'
     73 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
     74 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     75 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     76 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     77 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
     78 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
     79 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
     80 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
     81 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
     82 ;
     83 ; BTVER2-LABEL: 'ceil'
     84 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
     85 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     86 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     87 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     88 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
     89 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
     90 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
     91 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
     92 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
     93 ;
     94   %F32 = call float @llvm.ceil.f32(float undef)
     95   %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
     96   %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
     97   %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
     98 
     99   %F64 = call double @llvm.ceil.f64(double undef)
    100   %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
    101   %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
    102   %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
    103 
    104   ret i32 undef
    105 }
    106 
    107 define i32 @floor(i32 %arg) {
    108 ; SSE2-LABEL: 'floor'
    109 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.floor.f32(float undef)
    110 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    111 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    112 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    113 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.floor.f64(double undef)
    114 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    115 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    116 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    117 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    118 ;
    119 ; SSE42-LABEL: 'floor'
    120 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
    121 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    122 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    123 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    124 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
    125 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    126 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    127 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    128 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    129 ;
    130 ; AVX-LABEL: 'floor'
    131 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
    132 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    133 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    134 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    135 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
    136 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    137 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    138 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    139 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    140 ;
    141 ; AVX512-LABEL: 'floor'
    142 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
    143 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    144 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    145 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    146 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
    147 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    148 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    149 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    150 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    151 ;
    152 ; SLM-LABEL: 'floor'
    153 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
    154 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    155 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    156 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    157 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
    158 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    159 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    160 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    161 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    162 ;
    163 ; GLM-LABEL: 'floor'
    164 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
    165 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    166 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    167 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    168 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
    169 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    170 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    171 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    172 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    173 ;
    174 ; BTVER2-LABEL: 'floor'
    175 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
    176 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    177 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    178 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    179 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
    180 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    181 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    182 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    183 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    184 ;
    185   %F32 = call float @llvm.floor.f32(float undef)
    186   %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
    187   %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
    188   %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
    189 
    190   %F64 = call double @llvm.floor.f64(double undef)
    191   %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
    192   %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
    193   %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
    194 
    195   ret i32 undef
    196 }
    197 
    198 define i32 @nearbyint(i32 %arg) {
    199 ; SSE2-LABEL: 'nearbyint'
    200 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
    201 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    202 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    203 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    204 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
    205 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    206 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    207 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    208 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    209 ;
    210 ; SSE42-LABEL: 'nearbyint'
    211 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
    212 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    213 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    214 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    215 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
    216 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    217 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    218 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    219 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    220 ;
    221 ; AVX-LABEL: 'nearbyint'
    222 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
    223 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    224 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    225 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    226 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
    227 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    228 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    229 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    230 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    231 ;
    232 ; AVX512-LABEL: 'nearbyint'
    233 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
    234 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    235 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    236 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    237 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
    238 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    239 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    240 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    241 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    242 ;
    243 ; SLM-LABEL: 'nearbyint'
    244 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
    245 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    246 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    247 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    248 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
    249 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    250 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    251 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    252 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    253 ;
    254 ; GLM-LABEL: 'nearbyint'
    255 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
    256 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    257 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    258 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    259 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
    260 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    261 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    262 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    263 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    264 ;
    265 ; BTVER2-LABEL: 'nearbyint'
    266 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
    267 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    268 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    269 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    270 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
    271 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    272 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    273 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    274 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    275 ;
    276   %F32 = call float @llvm.nearbyint.f32(float undef)
    277   %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
    278   %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
    279   %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
    280 
    281   %F64 = call double @llvm.nearbyint.f64(double undef)
    282   %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
    283   %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
    284   %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
    285 
    286   ret i32 undef
    287 }
    288 
    289 define i32 @rint(i32 %arg) {
    290 ; SSE2-LABEL: 'rint'
    291 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
    292 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    293 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    294 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    295 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
    296 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    297 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    298 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    299 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    300 ;
    301 ; SSE42-LABEL: 'rint'
    302 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
    303 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    304 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    305 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    306 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
    307 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    308 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    309 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    310 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    311 ;
    312 ; AVX-LABEL: 'rint'
    313 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
    314 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    315 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    316 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    317 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
    318 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    319 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    320 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    321 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    322 ;
    323 ; AVX512-LABEL: 'rint'
    324 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
    325 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    326 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    327 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    328 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
    329 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    330 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    331 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    332 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    333 ;
    334 ; SLM-LABEL: 'rint'
    335 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
    336 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    337 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    338 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    339 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
    340 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    341 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    342 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    343 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    344 ;
    345 ; GLM-LABEL: 'rint'
    346 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
    347 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    348 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    349 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    350 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
    351 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    352 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    353 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    354 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    355 ;
    356 ; BTVER2-LABEL: 'rint'
    357 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
    358 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    359 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    360 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    361 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
    362 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    363 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    364 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    365 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    366 ;
    367   %F32 = call float @llvm.rint.f32(float undef)
    368   %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
    369   %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
    370   %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
    371 
    372   %F64 = call double @llvm.rint.f64(double undef)
    373   %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
    374   %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
    375   %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
    376 
    377   ret i32 undef
    378 }
    379 
    380 define i32 @trunc(i32 %arg) {
    381 ; SSE2-LABEL: 'trunc'
    382 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
    383 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    384 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    385 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    386 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
    387 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    388 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    389 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    390 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    391 ;
    392 ; SSE42-LABEL: 'trunc'
    393 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
    394 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    395 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    396 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    397 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
    398 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    399 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    400 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    401 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    402 ;
    403 ; AVX-LABEL: 'trunc'
    404 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
    405 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    406 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    407 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    408 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
    409 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    410 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    411 ; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    412 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    413 ;
    414 ; AVX512-LABEL: 'trunc'
    415 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
    416 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    417 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    418 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    419 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
    420 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    421 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    422 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    423 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    424 ;
    425 ; SLM-LABEL: 'trunc'
    426 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
    427 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    428 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    429 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    430 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
    431 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    432 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    433 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    434 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    435 ;
    436 ; GLM-LABEL: 'trunc'
    437 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
    438 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    439 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    440 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    441 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
    442 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    443 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    444 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    445 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    446 ;
    447 ; BTVER2-LABEL: 'trunc'
    448 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
    449 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    450 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    451 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    452 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
    453 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    454 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    455 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    456 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
    457 ;
    458   %F32 = call float @llvm.trunc.f32(float undef)
    459   %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
    460   %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
    461   %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
    462 
    463   %F64 = call double @llvm.trunc.f64(double undef)
    464   %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
    465   %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
    466   %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
    467 
    468   ret i32 undef
    469 }
    470 
    471 declare float @llvm.ceil.f32(float)
    472 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
    473 declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
    474 declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
    475 
    476 declare double @llvm.ceil.f64(double)
    477 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
    478 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
    479 declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
    480 
    481 declare float @llvm.floor.f32(float)
    482 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
    483 declare <8 x float> @llvm.floor.v8f32(<8 x float>)
    484 declare <16 x float> @llvm.floor.v16f32(<16 x float>)
    485 
    486 declare double @llvm.floor.f64(double)
    487 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
    488 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
    489 declare <8 x double> @llvm.floor.v8f64(<8 x double>)
    490 
    491 declare float @llvm.nearbyint.f32(float)
    492 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
    493 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
    494 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
    495 
    496 declare double @llvm.nearbyint.f64(double)
    497 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
    498 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
    499 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
    500 
    501 declare float @llvm.rint.f32(float)
    502 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
    503 declare <8 x float> @llvm.rint.v8f32(<8 x float>)
    504 declare <16 x float> @llvm.rint.v16f32(<16 x float>)
    505 
    506 declare double @llvm.rint.f64(double)
    507 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
    508 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
    509 declare <8 x double> @llvm.rint.v8f64(<8 x double>)
    510 
    511 declare float @llvm.trunc.f32(float)
    512 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
    513 declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
    514 declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
    515 
    516 declare double @llvm.trunc.f64(double)
    517 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
    518 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
    519 declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
    520