1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE 11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL 12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE 13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE 14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE 15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX 16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE 17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE 19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 20 21 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 22 ; GENERIC-LABEL: test_blendpd: 23 ; GENERIC: # %bb.0: 24 ; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] 25 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 26 ; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 27 ; GENERIC-NEXT: retq # sched: [1:1.00] 28 ; 29 ; SLM-LABEL: test_blendpd: 30 ; SLM: # %bb.0: 31 ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] 32 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 33 ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00] 34 ; SLM-NEXT: retq # sched: [4:1.00] 35 ; 36 ; SANDY-SSE-LABEL: test_blendpd: 37 ; SANDY-SSE: # %bb.0: 38 ; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] 39 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 40 ; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 41 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 42 ; 43 ; SANDY-LABEL: test_blendpd: 44 ; SANDY: # %bb.0: 45 ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] 46 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 47 ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 48 ; SANDY-NEXT: retq # sched: [1:1.00] 49 ; 50 ; HASWELL-SSE-LABEL: test_blendpd: 51 ; HASWELL-SSE: # %bb.0: 52 ; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 53 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 54 ; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 55 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 56 ; 57 ; HASWELL-LABEL: test_blendpd: 58 ; HASWELL: # %bb.0: 59 ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 60 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 61 ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 62 ; HASWELL-NEXT: retq # sched: [7:1.00] 63 ; 64 ; BROADWELL-SSE-LABEL: test_blendpd: 65 ; BROADWELL-SSE: # %bb.0: 66 ; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 67 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 68 ; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] 69 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 70 ; 71 ; BROADWELL-LABEL: test_blendpd: 72 ; BROADWELL: # %bb.0: 73 ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 74 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 75 ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] 76 ; BROADWELL-NEXT: retq # sched: [7:1.00] 77 ; 78 ; SKYLAKE-SSE-LABEL: test_blendpd: 79 ; SKYLAKE-SSE: # %bb.0: 80 ; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 81 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 82 ; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 83 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 84 ; 85 ; SKYLAKE-LABEL: test_blendpd: 86 ; SKYLAKE: # %bb.0: 87 ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 88 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 89 ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 90 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 91 ; 92 ; SKX-SSE-LABEL: test_blendpd: 93 ; SKX-SSE: # %bb.0: 94 ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 95 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 96 ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 97 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 98 ; 99 ; SKX-LABEL: test_blendpd: 100 ; SKX: # %bb.0: 101 ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] 102 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 103 ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] 104 ; SKX-NEXT: retq # sched: [7:1.00] 105 ; 106 ; BTVER2-SSE-LABEL: test_blendpd: 107 ; BTVER2-SSE: # %bb.0: 108 ; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] 109 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 110 ; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] 111 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 112 ; 113 ; BTVER2-LABEL: test_blendpd: 114 ; BTVER2: # %bb.0: 115 ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] 116 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 117 ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] 118 ; BTVER2-NEXT: retq # sched: [4:1.00] 119 ; 120 ; ZNVER1-SSE-LABEL: test_blendpd: 121 ; ZNVER1-SSE: # %bb.0: 122 ; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] 123 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 124 ; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] 125 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 126 ; 127 ; ZNVER1-LABEL: test_blendpd: 128 ; ZNVER1: # %bb.0: 129 ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] 130 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 131 ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] 132 ; ZNVER1-NEXT: retq # sched: [1:0.50] 133 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3> 134 %2 = load <2 x double>, <2 x double> *%a2, align 16 135 %3 = fadd <2 x double> %a1, %1 136 %4 = shufflevector <2 x double> %3, <2 x double> %2, <2 x i32> <i32 0, i32 3> 137 ret <2 x double> %4 138 } 139 140 define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 141 ; GENERIC-LABEL: test_blendps: 142 ; GENERIC: # %bb.0: 143 ; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] 144 ; GENERIC-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 145 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 146 ; GENERIC-NEXT: retq # sched: [1:1.00] 147 ; 148 ; SLM-LABEL: test_blendps: 149 ; SLM: # %bb.0: 150 ; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00] 151 ; SLM-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [4:1.00] 152 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 153 ; SLM-NEXT: retq # sched: [4:1.00] 154 ; 155 ; SANDY-SSE-LABEL: test_blendps: 156 ; SANDY-SSE: # %bb.0: 157 ; SANDY-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] 158 ; SANDY-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 159 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 160 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 161 ; 162 ; SANDY-LABEL: test_blendps: 163 ; SANDY: # %bb.0: 164 ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] 165 ; SANDY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 166 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 167 ; SANDY-NEXT: retq # sched: [1:1.00] 168 ; 169 ; HASWELL-SSE-LABEL: test_blendps: 170 ; HASWELL-SSE: # %bb.0: 171 ; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 172 ; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 173 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 174 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 175 ; 176 ; HASWELL-LABEL: test_blendps: 177 ; HASWELL: # %bb.0: 178 ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 179 ; HASWELL-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 180 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 181 ; HASWELL-NEXT: retq # sched: [7:1.00] 182 ; 183 ; BROADWELL-SSE-LABEL: test_blendps: 184 ; BROADWELL-SSE: # %bb.0: 185 ; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 186 ; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50] 187 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 188 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 189 ; 190 ; BROADWELL-LABEL: test_blendps: 191 ; BROADWELL: # %bb.0: 192 ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 193 ; BROADWELL-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50] 194 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 195 ; BROADWELL-NEXT: retq # sched: [7:1.00] 196 ; 197 ; SKYLAKE-SSE-LABEL: test_blendps: 198 ; SKYLAKE-SSE: # %bb.0: 199 ; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 200 ; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 201 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 202 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 203 ; 204 ; SKYLAKE-LABEL: test_blendps: 205 ; SKYLAKE: # %bb.0: 206 ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 207 ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 208 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 209 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 210 ; 211 ; SKX-SSE-LABEL: test_blendps: 212 ; SKX-SSE: # %bb.0: 213 ; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 214 ; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 215 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 216 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 217 ; 218 ; SKX-LABEL: test_blendps: 219 ; SKX: # %bb.0: 220 ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] 221 ; SKX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] 222 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 223 ; SKX-NEXT: retq # sched: [7:1.00] 224 ; 225 ; BTVER2-SSE-LABEL: test_blendps: 226 ; BTVER2-SSE: # %bb.0: 227 ; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] 228 ; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00] 229 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 230 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 231 ; 232 ; BTVER2-LABEL: test_blendps: 233 ; BTVER2: # %bb.0: 234 ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] 235 ; BTVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00] 236 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 237 ; BTVER2-NEXT: retq # sched: [4:1.00] 238 ; 239 ; ZNVER1-SSE-LABEL: test_blendps: 240 ; ZNVER1-SSE: # %bb.0: 241 ; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] 242 ; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50] 243 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 244 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 245 ; 246 ; ZNVER1-LABEL: test_blendps: 247 ; ZNVER1: # %bb.0: 248 ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] 249 ; ZNVER1-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50] 250 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 251 ; ZNVER1-NEXT: retq # sched: [1:0.50] 252 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 253 %2 = load <4 x float>, <4 x float> *%a2, align 16 254 %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 255 %4 = fadd <4 x float> %1, %3 256 ret <4 x float> %4 257 } 258 259 define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { 260 ; GENERIC-LABEL: test_blendvpd: 261 ; GENERIC: # %bb.0: 262 ; GENERIC-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] 263 ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 264 ; GENERIC-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00] 265 ; GENERIC-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00] 266 ; GENERIC-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] 267 ; GENERIC-NEXT: retq # sched: [1:1.00] 268 ; 269 ; SLM-LABEL: test_blendvpd: 270 ; SLM: # %bb.0: 271 ; SLM-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] 272 ; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 273 ; SLM-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:1.00] 274 ; SLM-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [4:1.00] 275 ; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] 276 ; SLM-NEXT: retq # sched: [4:1.00] 277 ; 278 ; SANDY-SSE-LABEL: test_blendvpd: 279 ; SANDY-SSE: # %bb.0: 280 ; SANDY-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] 281 ; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 282 ; SANDY-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00] 283 ; SANDY-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00] 284 ; SANDY-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] 285 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 286 ; 287 ; SANDY-LABEL: test_blendvpd: 288 ; SANDY: # %bb.0: 289 ; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 290 ; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 291 ; SANDY-NEXT: retq # sched: [1:1.00] 292 ; 293 ; HASWELL-SSE-LABEL: test_blendvpd: 294 ; HASWELL-SSE: # %bb.0: 295 ; HASWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] 296 ; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 297 ; HASWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 298 ; HASWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00] 299 ; HASWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] 300 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 301 ; 302 ; HASWELL-LABEL: test_blendvpd: 303 ; HASWELL: # %bb.0: 304 ; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 305 ; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 306 ; HASWELL-NEXT: retq # sched: [7:1.00] 307 ; 308 ; BROADWELL-SSE-LABEL: test_blendvpd: 309 ; BROADWELL-SSE: # %bb.0: 310 ; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] 311 ; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 312 ; BROADWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 313 ; BROADWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] 314 ; BROADWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] 315 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 316 ; 317 ; BROADWELL-LABEL: test_blendvpd: 318 ; BROADWELL: # %bb.0: 319 ; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 320 ; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 321 ; BROADWELL-NEXT: retq # sched: [7:1.00] 322 ; 323 ; SKYLAKE-SSE-LABEL: test_blendvpd: 324 ; SKYLAKE-SSE: # %bb.0: 325 ; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] 326 ; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] 327 ; SKYLAKE-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] 328 ; SKYLAKE-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] 329 ; SKYLAKE-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] 330 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 331 ; 332 ; SKYLAKE-LABEL: test_blendvpd: 333 ; SKYLAKE: # %bb.0: 334 ; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] 335 ; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] 336 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 337 ; 338 ; SKX-SSE-LABEL: test_blendvpd: 339 ; SKX-SSE: # %bb.0: 340 ; SKX-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] 341 ; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] 342 ; SKX-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] 343 ; SKX-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] 344 ; SKX-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] 345 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 346 ; 347 ; SKX-LABEL: test_blendvpd: 348 ; SKX: # %bb.0: 349 ; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] 350 ; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] 351 ; SKX-NEXT: retq # sched: [7:1.00] 352 ; 353 ; BTVER2-SSE-LABEL: test_blendvpd: 354 ; BTVER2-SSE: # %bb.0: 355 ; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] 356 ; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 357 ; BTVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 358 ; BTVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] 359 ; BTVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] 360 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 361 ; 362 ; BTVER2-LABEL: test_blendvpd: 363 ; BTVER2: # %bb.0: 364 ; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 365 ; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 366 ; BTVER2-NEXT: retq # sched: [4:1.00] 367 ; 368 ; ZNVER1-SSE-LABEL: test_blendvpd: 369 ; ZNVER1-SSE: # %bb.0: 370 ; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.25] 371 ; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] 372 ; ZNVER1-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:0.50] 373 ; ZNVER1-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.50] 374 ; ZNVER1-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.25] 375 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 376 ; 377 ; ZNVER1-LABEL: test_blendvpd: 378 ; ZNVER1: # %bb.0: 379 ; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 380 ; ZNVER1-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 381 ; ZNVER1-NEXT: retq # sched: [1:0.50] 382 %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 383 %2 = load <2 x double>, <2 x double> *%a3, align 16 384 %3 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %1, <2 x double> %2, <2 x double> %a2) 385 ret <2 x double> %3 386 } 387 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 388 389 define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { 390 ; GENERIC-LABEL: test_blendvps: 391 ; GENERIC: # %bb.0: 392 ; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] 393 ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 394 ; GENERIC-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00] 395 ; GENERIC-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00] 396 ; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] 397 ; GENERIC-NEXT: retq # sched: [1:1.00] 398 ; 399 ; SLM-LABEL: test_blendvps: 400 ; SLM: # %bb.0: 401 ; SLM-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] 402 ; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 403 ; SLM-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:1.00] 404 ; SLM-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [4:1.00] 405 ; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] 406 ; SLM-NEXT: retq # sched: [4:1.00] 407 ; 408 ; SANDY-SSE-LABEL: test_blendvps: 409 ; SANDY-SSE: # %bb.0: 410 ; SANDY-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] 411 ; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 412 ; SANDY-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00] 413 ; SANDY-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00] 414 ; SANDY-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] 415 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 416 ; 417 ; SANDY-LABEL: test_blendvps: 418 ; SANDY: # %bb.0: 419 ; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 420 ; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 421 ; SANDY-NEXT: retq # sched: [1:1.00] 422 ; 423 ; HASWELL-SSE-LABEL: test_blendvps: 424 ; HASWELL-SSE: # %bb.0: 425 ; HASWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] 426 ; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 427 ; HASWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 428 ; HASWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00] 429 ; HASWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] 430 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 431 ; 432 ; HASWELL-LABEL: test_blendvps: 433 ; HASWELL: # %bb.0: 434 ; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 435 ; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 436 ; HASWELL-NEXT: retq # sched: [7:1.00] 437 ; 438 ; BROADWELL-SSE-LABEL: test_blendvps: 439 ; BROADWELL-SSE: # %bb.0: 440 ; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] 441 ; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 442 ; BROADWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 443 ; BROADWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] 444 ; BROADWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] 445 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 446 ; 447 ; BROADWELL-LABEL: test_blendvps: 448 ; BROADWELL: # %bb.0: 449 ; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 450 ; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 451 ; BROADWELL-NEXT: retq # sched: [7:1.00] 452 ; 453 ; SKYLAKE-SSE-LABEL: test_blendvps: 454 ; SKYLAKE-SSE: # %bb.0: 455 ; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] 456 ; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] 457 ; SKYLAKE-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] 458 ; SKYLAKE-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] 459 ; SKYLAKE-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] 460 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 461 ; 462 ; SKYLAKE-LABEL: test_blendvps: 463 ; SKYLAKE: # %bb.0: 464 ; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] 465 ; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] 466 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 467 ; 468 ; SKX-SSE-LABEL: test_blendvps: 469 ; SKX-SSE: # %bb.0: 470 ; SKX-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] 471 ; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] 472 ; SKX-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] 473 ; SKX-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] 474 ; SKX-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] 475 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 476 ; 477 ; SKX-LABEL: test_blendvps: 478 ; SKX: # %bb.0: 479 ; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] 480 ; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] 481 ; SKX-NEXT: retq # sched: [7:1.00] 482 ; 483 ; BTVER2-SSE-LABEL: test_blendvps: 484 ; BTVER2-SSE: # %bb.0: 485 ; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] 486 ; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 487 ; BTVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 488 ; BTVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] 489 ; BTVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] 490 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 491 ; 492 ; BTVER2-LABEL: test_blendvps: 493 ; BTVER2: # %bb.0: 494 ; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 495 ; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 496 ; BTVER2-NEXT: retq # sched: [4:1.00] 497 ; 498 ; ZNVER1-SSE-LABEL: test_blendvps: 499 ; ZNVER1-SSE: # %bb.0: 500 ; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.25] 501 ; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] 502 ; ZNVER1-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:0.50] 503 ; ZNVER1-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.50] 504 ; ZNVER1-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.25] 505 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 506 ; 507 ; ZNVER1-LABEL: test_blendvps: 508 ; ZNVER1: # %bb.0: 509 ; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 510 ; ZNVER1-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 511 ; ZNVER1-NEXT: retq # sched: [1:0.50] 512 %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 513 %2 = load <4 x float>, <4 x float> *%a3 514 %3 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %1, <4 x float> %2, <4 x float> %a2) 515 ret <4 x float> %3 516 } 517 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 518 519 define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 520 ; GENERIC-LABEL: test_dppd: 521 ; GENERIC: # %bb.0: 522 ; GENERIC-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] 523 ; GENERIC-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] 524 ; GENERIC-NEXT: retq # sched: [1:1.00] 525 ; 526 ; SLM-LABEL: test_dppd: 527 ; SLM: # %bb.0: 528 ; SLM-NEXT: dppd $7, %xmm1, %xmm0 # sched: [3:1.00] 529 ; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00] 530 ; SLM-NEXT: retq # sched: [4:1.00] 531 ; 532 ; SANDY-SSE-LABEL: test_dppd: 533 ; SANDY-SSE: # %bb.0: 534 ; SANDY-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] 535 ; SANDY-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] 536 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 537 ; 538 ; SANDY-LABEL: test_dppd: 539 ; SANDY: # %bb.0: 540 ; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] 541 ; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] 542 ; SANDY-NEXT: retq # sched: [1:1.00] 543 ; 544 ; HASWELL-SSE-LABEL: test_dppd: 545 ; HASWELL-SSE: # %bb.0: 546 ; HASWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] 547 ; HASWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] 548 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 549 ; 550 ; HASWELL-LABEL: test_dppd: 551 ; HASWELL: # %bb.0: 552 ; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] 553 ; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] 554 ; HASWELL-NEXT: retq # sched: [7:1.00] 555 ; 556 ; BROADWELL-SSE-LABEL: test_dppd: 557 ; BROADWELL-SSE: # %bb.0: 558 ; BROADWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] 559 ; BROADWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:1.00] 560 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 561 ; 562 ; BROADWELL-LABEL: test_dppd: 563 ; BROADWELL: # %bb.0: 564 ; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] 565 ; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] 566 ; BROADWELL-NEXT: retq # sched: [7:1.00] 567 ; 568 ; SKYLAKE-SSE-LABEL: test_dppd: 569 ; SKYLAKE-SSE: # %bb.0: 570 ; SKYLAKE-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] 571 ; SKYLAKE-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] 572 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 573 ; 574 ; SKYLAKE-LABEL: test_dppd: 575 ; SKYLAKE: # %bb.0: 576 ; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] 577 ; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] 578 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 579 ; 580 ; SKX-SSE-LABEL: test_dppd: 581 ; SKX-SSE: # %bb.0: 582 ; SKX-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] 583 ; SKX-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] 584 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 585 ; 586 ; SKX-LABEL: test_dppd: 587 ; SKX: # %bb.0: 588 ; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] 589 ; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] 590 ; SKX-NEXT: retq # sched: [7:1.00] 591 ; 592 ; BTVER2-SSE-LABEL: test_dppd: 593 ; BTVER2-SSE: # %bb.0: 594 ; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00] 595 ; BTVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:3.00] 596 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 597 ; 598 ; BTVER2-LABEL: test_dppd: 599 ; BTVER2: # %bb.0: 600 ; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00] 601 ; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00] 602 ; BTVER2-NEXT: retq # sched: [4:1.00] 603 ; 604 ; ZNVER1-SSE-LABEL: test_dppd: 605 ; ZNVER1-SSE: # %bb.0: 606 ; ZNVER1-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [100:0.25] 607 ; ZNVER1-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [100:0.25] 608 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 609 ; 610 ; ZNVER1-LABEL: test_dppd: 611 ; ZNVER1: # %bb.0: 612 ; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 613 ; ZNVER1-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 614 ; ZNVER1-NEXT: retq # sched: [1:0.50] 615 %1 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 616 %2 = load <2 x double>, <2 x double> *%a2, align 16 617 %3 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %1, <2 x double> %2, i8 7) 618 ret <2 x double> %3 619 } 620 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 621 622 define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 623 ; GENERIC-LABEL: test_dpps: 624 ; GENERIC: # %bb.0: 625 ; GENERIC-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] 626 ; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00] 627 ; GENERIC-NEXT: retq # sched: [1:1.00] 628 ; 629 ; SLM-LABEL: test_dpps: 630 ; SLM: # %bb.0: 631 ; SLM-NEXT: dpps $7, %xmm1, %xmm0 # sched: [3:1.00] 632 ; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00] 633 ; SLM-NEXT: retq # sched: [4:1.00] 634 ; 635 ; SANDY-SSE-LABEL: test_dpps: 636 ; SANDY-SSE: # %bb.0: 637 ; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] 638 ; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00] 639 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 640 ; 641 ; SANDY-LABEL: test_dpps: 642 ; SANDY: # %bb.0: 643 ; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] 644 ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00] 645 ; SANDY-NEXT: retq # sched: [1:1.00] 646 ; 647 ; HASWELL-SSE-LABEL: test_dpps: 648 ; HASWELL-SSE: # %bb.0: 649 ; HASWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] 650 ; HASWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [20:2.00] 651 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 652 ; 653 ; HASWELL-LABEL: test_dpps: 654 ; HASWELL: # %bb.0: 655 ; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] 656 ; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00] 657 ; HASWELL-NEXT: retq # sched: [7:1.00] 658 ; 659 ; BROADWELL-SSE-LABEL: test_dpps: 660 ; BROADWELL-SSE: # %bb.0: 661 ; BROADWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] 662 ; BROADWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:2.00] 663 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 664 ; 665 ; BROADWELL-LABEL: test_dpps: 666 ; BROADWELL: # %bb.0: 667 ; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] 668 ; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00] 669 ; BROADWELL-NEXT: retq # sched: [7:1.00] 670 ; 671 ; SKYLAKE-SSE-LABEL: test_dpps: 672 ; SKYLAKE-SSE: # %bb.0: 673 ; SKYLAKE-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.50] 674 ; SKYLAKE-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.50] 675 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 676 ; 677 ; SKYLAKE-LABEL: test_dpps: 678 ; SKYLAKE: # %bb.0: 679 ; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50] 680 ; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50] 681 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 682 ; 683 ; SKX-SSE-LABEL: test_dpps: 684 ; SKX-SSE: # %bb.0: 685 ; SKX-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.33] 686 ; SKX-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.33] 687 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 688 ; 689 ; SKX-LABEL: test_dpps: 690 ; SKX: # %bb.0: 691 ; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] 692 ; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33] 693 ; SKX-NEXT: retq # sched: [7:1.00] 694 ; 695 ; BTVER2-SSE-LABEL: test_dpps: 696 ; BTVER2-SSE: # %bb.0: 697 ; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00] 698 ; BTVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [16:3.00] 699 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 700 ; 701 ; BTVER2-LABEL: test_dpps: 702 ; BTVER2: # %bb.0: 703 ; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00] 704 ; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00] 705 ; BTVER2-NEXT: retq # sched: [4:1.00] 706 ; 707 ; ZNVER1-SSE-LABEL: test_dpps: 708 ; ZNVER1-SSE: # %bb.0: 709 ; ZNVER1-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [100:0.25] 710 ; ZNVER1-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [100:0.25] 711 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 712 ; 713 ; ZNVER1-LABEL: test_dpps: 714 ; ZNVER1: # %bb.0: 715 ; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 716 ; ZNVER1-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 717 ; ZNVER1-NEXT: retq # sched: [1:0.50] 718 %1 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 719 %2 = load <4 x float>, <4 x float> *%a2, align 16 720 %3 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %1, <4 x float> %2, i8 7) 721 ret <4 x float> %3 722 } 723 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 724 725 define i32 @test_extractps(<4 x float> %a0, i32 *%a1) { 726 ; GENERIC-LABEL: test_extractps: 727 ; GENERIC: # %bb.0: 728 ; GENERIC-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] 729 ; GENERIC-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00] 730 ; GENERIC-NEXT: retq # sched: [1:1.00] 731 ; 732 ; SLM-LABEL: test_extractps: 733 ; SLM: # %bb.0: 734 ; SLM-NEXT: extractps $3, %xmm0, %eax # sched: [1:1.00] 735 ; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00] 736 ; SLM-NEXT: retq # sched: [4:1.00] 737 ; 738 ; SANDY-SSE-LABEL: test_extractps: 739 ; SANDY-SSE: # %bb.0: 740 ; SANDY-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] 741 ; SANDY-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00] 742 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 743 ; 744 ; SANDY-LABEL: test_extractps: 745 ; SANDY: # %bb.0: 746 ; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] 747 ; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00] 748 ; SANDY-NEXT: retq # sched: [1:1.00] 749 ; 750 ; HASWELL-SSE-LABEL: test_extractps: 751 ; HASWELL-SSE: # %bb.0: 752 ; HASWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] 753 ; HASWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] 754 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 755 ; 756 ; HASWELL-LABEL: test_extractps: 757 ; HASWELL: # %bb.0: 758 ; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] 759 ; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] 760 ; HASWELL-NEXT: retq # sched: [7:1.00] 761 ; 762 ; BROADWELL-SSE-LABEL: test_extractps: 763 ; BROADWELL-SSE: # %bb.0: 764 ; BROADWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] 765 ; BROADWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] 766 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 767 ; 768 ; BROADWELL-LABEL: test_extractps: 769 ; BROADWELL: # %bb.0: 770 ; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] 771 ; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] 772 ; BROADWELL-NEXT: retq # sched: [7:1.00] 773 ; 774 ; SKYLAKE-SSE-LABEL: test_extractps: 775 ; SKYLAKE-SSE: # %bb.0: 776 ; SKYLAKE-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] 777 ; SKYLAKE-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] 778 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 779 ; 780 ; SKYLAKE-LABEL: test_extractps: 781 ; SKYLAKE: # %bb.0: 782 ; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] 783 ; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] 784 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 785 ; 786 ; SKX-SSE-LABEL: test_extractps: 787 ; SKX-SSE: # %bb.0: 788 ; SKX-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] 789 ; SKX-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] 790 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 791 ; 792 ; SKX-LABEL: test_extractps: 793 ; SKX: # %bb.0: 794 ; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] 795 ; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] 796 ; SKX-NEXT: retq # sched: [7:1.00] 797 ; 798 ; BTVER2-SSE-LABEL: test_extractps: 799 ; BTVER2-SSE: # %bb.0: 800 ; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] 801 ; BTVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [3:1.00] 802 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 803 ; 804 ; BTVER2-LABEL: test_extractps: 805 ; BTVER2: # %bb.0: 806 ; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] 807 ; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [3:1.00] 808 ; BTVER2-NEXT: retq # sched: [4:1.00] 809 ; 810 ; ZNVER1-SSE-LABEL: test_extractps: 811 ; ZNVER1-SSE: # %bb.0: 812 ; ZNVER1-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:2.00] 813 ; ZNVER1-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:2.50] 814 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 815 ; 816 ; ZNVER1-LABEL: test_extractps: 817 ; ZNVER1: # %bb.0: 818 ; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00] 819 ; ZNVER1-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:2.50] 820 ; ZNVER1-NEXT: retq # sched: [1:0.50] 821 %1 = extractelement <4 x float> %a0, i32 3 822 %2 = extractelement <4 x float> %a0, i32 1 823 %3 = bitcast float %1 to i32 824 %4 = bitcast float %2 to i32 825 store i32 %4, i32 *%a1 826 ret i32 %3 827 } 828 829 define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) { 830 ; GENERIC-LABEL: test_insertps: 831 ; GENERIC: # %bb.0: 832 ; GENERIC-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 833 ; GENERIC-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 834 ; GENERIC-NEXT: retq # sched: [1:1.00] 835 ; 836 ; SLM-LABEL: test_insertps: 837 ; SLM: # %bb.0: 838 ; SLM-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 839 ; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00] 840 ; SLM-NEXT: retq # sched: [4:1.00] 841 ; 842 ; SANDY-SSE-LABEL: test_insertps: 843 ; SANDY-SSE: # %bb.0: 844 ; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 845 ; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 846 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 847 ; 848 ; SANDY-LABEL: test_insertps: 849 ; SANDY: # %bb.0: 850 ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 851 ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 852 ; SANDY-NEXT: retq # sched: [1:1.00] 853 ; 854 ; HASWELL-SSE-LABEL: test_insertps: 855 ; HASWELL-SSE: # %bb.0: 856 ; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 857 ; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 858 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 859 ; 860 ; HASWELL-LABEL: test_insertps: 861 ; HASWELL: # %bb.0: 862 ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 863 ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 864 ; HASWELL-NEXT: retq # sched: [7:1.00] 865 ; 866 ; BROADWELL-SSE-LABEL: test_insertps: 867 ; BROADWELL-SSE: # %bb.0: 868 ; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 869 ; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] 870 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 871 ; 872 ; BROADWELL-LABEL: test_insertps: 873 ; BROADWELL: # %bb.0: 874 ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 875 ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] 876 ; BROADWELL-NEXT: retq # sched: [7:1.00] 877 ; 878 ; SKYLAKE-SSE-LABEL: test_insertps: 879 ; SKYLAKE-SSE: # %bb.0: 880 ; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 881 ; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 882 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 883 ; 884 ; SKYLAKE-LABEL: test_insertps: 885 ; SKYLAKE: # %bb.0: 886 ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 887 ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 888 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 889 ; 890 ; SKX-SSE-LABEL: test_insertps: 891 ; SKX-SSE: # %bb.0: 892 ; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 893 ; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 894 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 895 ; 896 ; SKX-LABEL: test_insertps: 897 ; SKX: # %bb.0: 898 ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] 899 ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] 900 ; SKX-NEXT: retq # sched: [7:1.00] 901 ; 902 ; BTVER2-SSE-LABEL: test_insertps: 903 ; BTVER2-SSE: # %bb.0: 904 ; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] 905 ; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] 906 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 907 ; 908 ; BTVER2-LABEL: test_insertps: 909 ; BTVER2: # %bb.0: 910 ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] 911 ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] 912 ; BTVER2-NEXT: retq # sched: [4:1.00] 913 ; 914 ; ZNVER1-SSE-LABEL: test_insertps: 915 ; ZNVER1-SSE: # %bb.0: 916 ; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] 917 ; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50] 918 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 919 ; 920 ; ZNVER1-LABEL: test_insertps: 921 ; ZNVER1: # %bb.0: 922 ; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] 923 ; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50] 924 ; ZNVER1-NEXT: retq # sched: [1:0.50] 925 %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) 926 %2 = load float, float *%a2 927 %3 = insertelement <4 x float> %1, float %2, i32 3 928 ret <4 x float> %3 929 } 930 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 931 932 define <2 x i64> @test_movntdqa(i8* %a0) { 933 ; GENERIC-LABEL: test_movntdqa: 934 ; GENERIC: # %bb.0: 935 ; GENERIC-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] 936 ; GENERIC-NEXT: retq # sched: [1:1.00] 937 ; 938 ; SLM-LABEL: test_movntdqa: 939 ; SLM: # %bb.0: 940 ; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00] 941 ; SLM-NEXT: retq # sched: [4:1.00] 942 ; 943 ; SANDY-SSE-LABEL: test_movntdqa: 944 ; SANDY-SSE: # %bb.0: 945 ; SANDY-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] 946 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 947 ; 948 ; SANDY-LABEL: test_movntdqa: 949 ; SANDY: # %bb.0: 950 ; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] 951 ; SANDY-NEXT: retq # sched: [1:1.00] 952 ; 953 ; HASWELL-SSE-LABEL: test_movntdqa: 954 ; HASWELL-SSE: # %bb.0: 955 ; HASWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] 956 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 957 ; 958 ; HASWELL-LABEL: test_movntdqa: 959 ; HASWELL: # %bb.0: 960 ; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] 961 ; HASWELL-NEXT: retq # sched: [7:1.00] 962 ; 963 ; BROADWELL-SSE-LABEL: test_movntdqa: 964 ; BROADWELL-SSE: # %bb.0: 965 ; BROADWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50] 966 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 967 ; 968 ; BROADWELL-LABEL: test_movntdqa: 969 ; BROADWELL: # %bb.0: 970 ; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50] 971 ; BROADWELL-NEXT: retq # sched: [7:1.00] 972 ; 973 ; SKYLAKE-SSE-LABEL: test_movntdqa: 974 ; SKYLAKE-SSE: # %bb.0: 975 ; SKYLAKE-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] 976 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 977 ; 978 ; SKYLAKE-LABEL: test_movntdqa: 979 ; SKYLAKE: # %bb.0: 980 ; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] 981 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 982 ; 983 ; SKX-SSE-LABEL: test_movntdqa: 984 ; SKX-SSE: # %bb.0: 985 ; SKX-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] 986 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 987 ; 988 ; SKX-LABEL: test_movntdqa: 989 ; SKX: # %bb.0: 990 ; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] 991 ; SKX-NEXT: retq # sched: [7:1.00] 992 ; 993 ; BTVER2-SSE-LABEL: test_movntdqa: 994 ; BTVER2-SSE: # %bb.0: 995 ; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00] 996 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 997 ; 998 ; BTVER2-LABEL: test_movntdqa: 999 ; BTVER2: # %bb.0: 1000 ; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00] 1001 ; BTVER2-NEXT: retq # sched: [4:1.00] 1002 ; 1003 ; ZNVER1-SSE-LABEL: test_movntdqa: 1004 ; ZNVER1-SSE: # %bb.0: 1005 ; ZNVER1-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [8:0.50] 1006 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1007 ; 1008 ; ZNVER1-LABEL: test_movntdqa: 1009 ; ZNVER1: # %bb.0: 1010 ; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50] 1011 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1012 %1 = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) 1013 ret <2 x i64> %1 1014 } 1015 declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone 1016 1017 define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 1018 ; GENERIC-LABEL: test_mpsadbw: 1019 ; GENERIC: # %bb.0: 1020 ; GENERIC-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] 1021 ; GENERIC-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00] 1022 ; GENERIC-NEXT: retq # sched: [1:1.00] 1023 ; 1024 ; SLM-LABEL: test_mpsadbw: 1025 ; SLM: # %bb.0: 1026 ; SLM-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] 1027 ; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00] 1028 ; SLM-NEXT: retq # sched: [4:1.00] 1029 ; 1030 ; SANDY-SSE-LABEL: test_mpsadbw: 1031 ; SANDY-SSE: # %bb.0: 1032 ; SANDY-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] 1033 ; SANDY-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00] 1034 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1035 ; 1036 ; SANDY-LABEL: test_mpsadbw: 1037 ; SANDY: # %bb.0: 1038 ; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00] 1039 ; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] 1040 ; SANDY-NEXT: retq # sched: [1:1.00] 1041 ; 1042 ; HASWELL-SSE-LABEL: test_mpsadbw: 1043 ; HASWELL-SSE: # %bb.0: 1044 ; HASWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] 1045 ; HASWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00] 1046 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1047 ; 1048 ; HASWELL-LABEL: test_mpsadbw: 1049 ; HASWELL: # %bb.0: 1050 ; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] 1051 ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00] 1052 ; HASWELL-NEXT: retq # sched: [7:1.00] 1053 ; 1054 ; BROADWELL-SSE-LABEL: test_mpsadbw: 1055 ; BROADWELL-SSE: # %bb.0: 1056 ; BROADWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] 1057 ; BROADWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00] 1058 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1059 ; 1060 ; BROADWELL-LABEL: test_mpsadbw: 1061 ; BROADWELL: # %bb.0: 1062 ; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] 1063 ; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 1064 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1065 ; 1066 ; SKYLAKE-SSE-LABEL: test_mpsadbw: 1067 ; SKYLAKE-SSE: # %bb.0: 1068 ; SKYLAKE-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] 1069 ; SKYLAKE-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] 1070 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1071 ; 1072 ; SKYLAKE-LABEL: test_mpsadbw: 1073 ; SKYLAKE: # %bb.0: 1074 ; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] 1075 ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 1076 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1077 ; 1078 ; SKX-SSE-LABEL: test_mpsadbw: 1079 ; SKX-SSE: # %bb.0: 1080 ; SKX-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] 1081 ; SKX-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] 1082 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1083 ; 1084 ; SKX-LABEL: test_mpsadbw: 1085 ; SKX: # %bb.0: 1086 ; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] 1087 ; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 1088 ; SKX-NEXT: retq # sched: [7:1.00] 1089 ; 1090 ; BTVER2-SSE-LABEL: test_mpsadbw: 1091 ; BTVER2-SSE: # %bb.0: 1092 ; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00] 1093 ; BTVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00] 1094 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1095 ; 1096 ; BTVER2-LABEL: test_mpsadbw: 1097 ; BTVER2: # %bb.0: 1098 ; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1099 ; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 1100 ; BTVER2-NEXT: retq # sched: [4:1.00] 1101 ; 1102 ; ZNVER1-SSE-LABEL: test_mpsadbw: 1103 ; ZNVER1-SSE: # %bb.0: 1104 ; ZNVER1-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [100:0.25] 1105 ; ZNVER1-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [100:0.25] 1106 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1107 ; 1108 ; ZNVER1-LABEL: test_mpsadbw: 1109 ; ZNVER1: # %bb.0: 1110 ; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 1111 ; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 1112 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1113 %1 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) 1114 %2 = bitcast <8 x i16> %1 to <16 x i8> 1115 %3 = load <16 x i8>, <16 x i8> *%a2, align 16 1116 %4 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %2, <16 x i8> %3, i8 7) 1117 ret <8 x i16> %4 1118 } 1119 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 1120 1121 define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 1122 ; GENERIC-LABEL: test_packusdw: 1123 ; GENERIC: # %bb.0: 1124 ; GENERIC-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] 1125 ; GENERIC-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] 1126 ; GENERIC-NEXT: retq # sched: [1:1.00] 1127 ; 1128 ; SLM-LABEL: test_packusdw: 1129 ; SLM: # %bb.0: 1130 ; SLM-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] 1131 ; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00] 1132 ; SLM-NEXT: retq # sched: [4:1.00] 1133 ; 1134 ; SANDY-SSE-LABEL: test_packusdw: 1135 ; SANDY-SSE: # %bb.0: 1136 ; SANDY-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] 1137 ; SANDY-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] 1138 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1139 ; 1140 ; SANDY-LABEL: test_packusdw: 1141 ; SANDY: # %bb.0: 1142 ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1143 ; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1144 ; SANDY-NEXT: retq # sched: [1:1.00] 1145 ; 1146 ; HASWELL-SSE-LABEL: test_packusdw: 1147 ; HASWELL-SSE: # %bb.0: 1148 ; HASWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] 1149 ; HASWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] 1150 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1151 ; 1152 ; HASWELL-LABEL: test_packusdw: 1153 ; HASWELL: # %bb.0: 1154 ; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1155 ; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1156 ; HASWELL-NEXT: retq # sched: [7:1.00] 1157 ; 1158 ; BROADWELL-SSE-LABEL: test_packusdw: 1159 ; BROADWELL-SSE: # %bb.0: 1160 ; BROADWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] 1161 ; BROADWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] 1162 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1163 ; 1164 ; BROADWELL-LABEL: test_packusdw: 1165 ; BROADWELL: # %bb.0: 1166 ; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1167 ; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1168 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1169 ; 1170 ; SKYLAKE-SSE-LABEL: test_packusdw: 1171 ; SKYLAKE-SSE: # %bb.0: 1172 ; SKYLAKE-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] 1173 ; SKYLAKE-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] 1174 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1175 ; 1176 ; SKYLAKE-LABEL: test_packusdw: 1177 ; SKYLAKE: # %bb.0: 1178 ; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1179 ; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1180 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1181 ; 1182 ; SKX-SSE-LABEL: test_packusdw: 1183 ; SKX-SSE: # %bb.0: 1184 ; SKX-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] 1185 ; SKX-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] 1186 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1187 ; 1188 ; SKX-LABEL: test_packusdw: 1189 ; SKX: # %bb.0: 1190 ; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1191 ; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1192 ; SKX-NEXT: retq # sched: [7:1.00] 1193 ; 1194 ; BTVER2-SSE-LABEL: test_packusdw: 1195 ; BTVER2-SSE: # %bb.0: 1196 ; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] 1197 ; BTVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] 1198 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1199 ; 1200 ; BTVER2-LABEL: test_packusdw: 1201 ; BTVER2: # %bb.0: 1202 ; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1203 ; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1204 ; BTVER2-NEXT: retq # sched: [4:1.00] 1205 ; 1206 ; ZNVER1-SSE-LABEL: test_packusdw: 1207 ; ZNVER1-SSE: # %bb.0: 1208 ; ZNVER1-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.25] 1209 ; ZNVER1-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [8:0.50] 1210 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1211 ; 1212 ; ZNVER1-LABEL: test_packusdw: 1213 ; ZNVER1: # %bb.0: 1214 ; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1215 ; ZNVER1-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 1216 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1217 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) 1218 %2 = bitcast <8 x i16> %1 to <4 x i32> 1219 %3 = load <4 x i32>, <4 x i32> *%a2, align 16 1220 %4 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %2, <4 x i32> %3) 1221 ret <8 x i16> %4 1222 } 1223 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 1224 1225 define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> *%a3) { 1226 ; GENERIC-LABEL: test_pblendvb: 1227 ; GENERIC: # %bb.0: 1228 ; GENERIC-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] 1229 ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 1230 ; GENERIC-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00] 1231 ; GENERIC-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] 1232 ; GENERIC-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] 1233 ; GENERIC-NEXT: retq # sched: [1:1.00] 1234 ; 1235 ; SLM-LABEL: test_pblendvb: 1236 ; SLM: # %bb.0: 1237 ; SLM-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50] 1238 ; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 1239 ; SLM-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00] 1240 ; SLM-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [4:1.00] 1241 ; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] 1242 ; SLM-NEXT: retq # sched: [4:1.00] 1243 ; 1244 ; SANDY-SSE-LABEL: test_pblendvb: 1245 ; SANDY-SSE: # %bb.0: 1246 ; SANDY-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] 1247 ; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 1248 ; SANDY-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00] 1249 ; SANDY-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] 1250 ; SANDY-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] 1251 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1252 ; 1253 ; SANDY-LABEL: test_pblendvb: 1254 ; SANDY: # %bb.0: 1255 ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 1256 ; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 1257 ; SANDY-NEXT: retq # sched: [1:1.00] 1258 ; 1259 ; HASWELL-SSE-LABEL: test_pblendvb: 1260 ; HASWELL-SSE: # %bb.0: 1261 ; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] 1262 ; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 1263 ; HASWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 1264 ; HASWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00] 1265 ; HASWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] 1266 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1267 ; 1268 ; HASWELL-LABEL: test_pblendvb: 1269 ; HASWELL: # %bb.0: 1270 ; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 1271 ; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 1272 ; HASWELL-NEXT: retq # sched: [7:1.00] 1273 ; 1274 ; BROADWELL-SSE-LABEL: test_pblendvb: 1275 ; BROADWELL-SSE: # %bb.0: 1276 ; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] 1277 ; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 1278 ; BROADWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 1279 ; BROADWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] 1280 ; BROADWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] 1281 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1282 ; 1283 ; BROADWELL-LABEL: test_pblendvb: 1284 ; BROADWELL: # %bb.0: 1285 ; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 1286 ; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 1287 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1288 ; 1289 ; SKYLAKE-SSE-LABEL: test_pblendvb: 1290 ; SKYLAKE-SSE: # %bb.0: 1291 ; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] 1292 ; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] 1293 ; SKYLAKE-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] 1294 ; SKYLAKE-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] 1295 ; SKYLAKE-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] 1296 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1297 ; 1298 ; SKYLAKE-LABEL: test_pblendvb: 1299 ; SKYLAKE: # %bb.0: 1300 ; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] 1301 ; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] 1302 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1303 ; 1304 ; SKX-SSE-LABEL: test_pblendvb: 1305 ; SKX-SSE: # %bb.0: 1306 ; SKX-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] 1307 ; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] 1308 ; SKX-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] 1309 ; SKX-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] 1310 ; SKX-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] 1311 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1312 ; 1313 ; SKX-LABEL: test_pblendvb: 1314 ; SKX: # %bb.0: 1315 ; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] 1316 ; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] 1317 ; SKX-NEXT: retq # sched: [7:1.00] 1318 ; 1319 ; BTVER2-SSE-LABEL: test_pblendvb: 1320 ; BTVER2-SSE: # %bb.0: 1321 ; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50] 1322 ; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 1323 ; BTVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] 1324 ; BTVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] 1325 ; BTVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] 1326 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1327 ; 1328 ; BTVER2-LABEL: test_pblendvb: 1329 ; BTVER2: # %bb.0: 1330 ; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 1331 ; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 1332 ; BTVER2-NEXT: retq # sched: [4:1.00] 1333 ; 1334 ; ZNVER1-SSE-LABEL: test_pblendvb: 1335 ; ZNVER1-SSE: # %bb.0: 1336 ; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.25] 1337 ; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] 1338 ; ZNVER1-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00] 1339 ; ZNVER1-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] 1340 ; ZNVER1-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.25] 1341 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1342 ; 1343 ; ZNVER1-LABEL: test_pblendvb: 1344 ; ZNVER1: # %bb.0: 1345 ; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1346 ; ZNVER1-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 1347 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1348 %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) 1349 %2 = load <16 x i8>, <16 x i8> *%a3, align 16 1350 %3 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %1, <16 x i8> %2, <16 x i8> %a2) 1351 ret <16 x i8> %3 1352 } 1353 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1354 1355 define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 1356 ; GENERIC-LABEL: test_pblendw: 1357 ; GENERIC: # %bb.0: 1358 ; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] 1359 ; GENERIC-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] 1360 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 1361 ; GENERIC-NEXT: retq # sched: [1:1.00] 1362 ; 1363 ; SLM-LABEL: test_pblendw: 1364 ; SLM: # %bb.0: 1365 ; SLM-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1366 ; SLM-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [4:1.00] 1367 ; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 1368 ; SLM-NEXT: retq # sched: [4:1.00] 1369 ; 1370 ; SANDY-SSE-LABEL: test_pblendw: 1371 ; SANDY-SSE: # %bb.0: 1372 ; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] 1373 ; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] 1374 ; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 1375 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1376 ; 1377 ; SANDY-LABEL: test_pblendw: 1378 ; SANDY: # %bb.0: 1379 ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] 1380 ; SANDY-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] 1381 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1382 ; SANDY-NEXT: retq # sched: [1:1.00] 1383 ; 1384 ; HASWELL-SSE-LABEL: test_pblendw: 1385 ; HASWELL-SSE: # %bb.0: 1386 ; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1387 ; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] 1388 ; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 1389 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1390 ; 1391 ; HASWELL-LABEL: test_pblendw: 1392 ; HASWELL: # %bb.0: 1393 ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1394 ; HASWELL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] 1395 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1396 ; HASWELL-NEXT: retq # sched: [7:1.00] 1397 ; 1398 ; BROADWELL-SSE-LABEL: test_pblendw: 1399 ; BROADWELL-SSE: # %bb.0: 1400 ; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1401 ; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] 1402 ; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 1403 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1404 ; 1405 ; BROADWELL-LABEL: test_pblendw: 1406 ; BROADWELL: # %bb.0: 1407 ; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1408 ; BROADWELL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] 1409 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1410 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1411 ; 1412 ; SKYLAKE-SSE-LABEL: test_pblendw: 1413 ; SKYLAKE-SSE: # %bb.0: 1414 ; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1415 ; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] 1416 ; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 1417 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1418 ; 1419 ; SKYLAKE-LABEL: test_pblendw: 1420 ; SKYLAKE: # %bb.0: 1421 ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1422 ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] 1423 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1424 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1425 ; 1426 ; SKX-SSE-LABEL: test_pblendw: 1427 ; SKX-SSE: # %bb.0: 1428 ; SKX-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1429 ; SKX-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] 1430 ; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 1431 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1432 ; 1433 ; SKX-LABEL: test_pblendw: 1434 ; SKX: # %bb.0: 1435 ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] 1436 ; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] 1437 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1438 ; SKX-NEXT: retq # sched: [7:1.00] 1439 ; 1440 ; BTVER2-SSE-LABEL: test_pblendw: 1441 ; BTVER2-SSE: # %bb.0: 1442 ; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] 1443 ; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] 1444 ; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 1445 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1446 ; 1447 ; BTVER2-LABEL: test_pblendw: 1448 ; BTVER2: # %bb.0: 1449 ; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] 1450 ; BTVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] 1451 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1452 ; BTVER2-NEXT: retq # sched: [4:1.00] 1453 ; 1454 ; ZNVER1-SSE-LABEL: test_pblendw: 1455 ; ZNVER1-SSE: # %bb.0: 1456 ; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] 1457 ; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50] 1458 ; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] 1459 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1460 ; 1461 ; ZNVER1-LABEL: test_pblendw: 1462 ; ZNVER1: # %bb.0: 1463 ; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] 1464 ; ZNVER1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50] 1465 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1466 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1467 %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1468 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 1469 %3 = shufflevector <8 x i16> %a1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15> 1470 %4 = add <8 x i16> %1, %3 1471 ret <8 x i16> %4 1472 } 1473 1474 define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 1475 ; GENERIC-LABEL: test_pcmpeqq: 1476 ; GENERIC: # %bb.0: 1477 ; GENERIC-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1478 ; GENERIC-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] 1479 ; GENERIC-NEXT: retq # sched: [1:1.00] 1480 ; 1481 ; SLM-LABEL: test_pcmpeqq: 1482 ; SLM: # %bb.0: 1483 ; SLM-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1484 ; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00] 1485 ; SLM-NEXT: retq # sched: [4:1.00] 1486 ; 1487 ; SANDY-SSE-LABEL: test_pcmpeqq: 1488 ; SANDY-SSE: # %bb.0: 1489 ; SANDY-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1490 ; SANDY-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] 1491 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1492 ; 1493 ; SANDY-LABEL: test_pcmpeqq: 1494 ; SANDY: # %bb.0: 1495 ; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1496 ; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1497 ; SANDY-NEXT: retq # sched: [1:1.00] 1498 ; 1499 ; HASWELL-SSE-LABEL: test_pcmpeqq: 1500 ; HASWELL-SSE: # %bb.0: 1501 ; HASWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1502 ; HASWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] 1503 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1504 ; 1505 ; HASWELL-LABEL: test_pcmpeqq: 1506 ; HASWELL: # %bb.0: 1507 ; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1508 ; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1509 ; HASWELL-NEXT: retq # sched: [7:1.00] 1510 ; 1511 ; BROADWELL-SSE-LABEL: test_pcmpeqq: 1512 ; BROADWELL-SSE: # %bb.0: 1513 ; BROADWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1514 ; BROADWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:0.50] 1515 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1516 ; 1517 ; BROADWELL-LABEL: test_pcmpeqq: 1518 ; BROADWELL: # %bb.0: 1519 ; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1520 ; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 1521 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1522 ; 1523 ; SKYLAKE-SSE-LABEL: test_pcmpeqq: 1524 ; SKYLAKE-SSE: # %bb.0: 1525 ; SKYLAKE-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1526 ; SKYLAKE-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] 1527 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1528 ; 1529 ; SKYLAKE-LABEL: test_pcmpeqq: 1530 ; SKYLAKE: # %bb.0: 1531 ; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1532 ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1533 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1534 ; 1535 ; SKX-SSE-LABEL: test_pcmpeqq: 1536 ; SKX-SSE: # %bb.0: 1537 ; SKX-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1538 ; SKX-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] 1539 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1540 ; 1541 ; SKX-LABEL: test_pcmpeqq: 1542 ; SKX: # %bb.0: 1543 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1544 ; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1545 ; SKX-NEXT: retq # sched: [7:1.00] 1546 ; 1547 ; BTVER2-SSE-LABEL: test_pcmpeqq: 1548 ; BTVER2-SSE: # %bb.0: 1549 ; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] 1550 ; BTVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:1.00] 1551 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1552 ; 1553 ; BTVER2-LABEL: test_pcmpeqq: 1554 ; BTVER2: # %bb.0: 1555 ; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1556 ; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1557 ; BTVER2-NEXT: retq # sched: [4:1.00] 1558 ; 1559 ; ZNVER1-SSE-LABEL: test_pcmpeqq: 1560 ; ZNVER1-SSE: # %bb.0: 1561 ; ZNVER1-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.25] 1562 ; ZNVER1-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [8:0.50] 1563 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1564 ; 1565 ; ZNVER1-LABEL: test_pcmpeqq: 1566 ; ZNVER1: # %bb.0: 1567 ; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1568 ; ZNVER1-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 1569 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1570 %1 = icmp eq <2 x i64> %a0, %a1 1571 %2 = sext <2 x i1> %1 to <2 x i64> 1572 %3 = load <2 x i64>, <2 x i64>*%a2, align 16 1573 %4 = icmp eq <2 x i64> %2, %3 1574 %5 = sext <2 x i1> %4 to <2 x i64> 1575 ret <2 x i64> %5 1576 } 1577 1578 define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { 1579 ; GENERIC-LABEL: test_pextrb: 1580 ; GENERIC: # %bb.0: 1581 ; GENERIC-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] 1582 ; GENERIC-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] 1583 ; GENERIC-NEXT: retq # sched: [1:1.00] 1584 ; 1585 ; SLM-LABEL: test_pextrb: 1586 ; SLM: # %bb.0: 1587 ; SLM-NEXT: pextrb $3, %xmm0, %eax # sched: [1:1.00] 1588 ; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00] 1589 ; SLM-NEXT: retq # sched: [4:1.00] 1590 ; 1591 ; SANDY-SSE-LABEL: test_pextrb: 1592 ; SANDY-SSE: # %bb.0: 1593 ; SANDY-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] 1594 ; SANDY-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] 1595 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1596 ; 1597 ; SANDY-LABEL: test_pextrb: 1598 ; SANDY: # %bb.0: 1599 ; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] 1600 ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] 1601 ; SANDY-NEXT: retq # sched: [1:1.00] 1602 ; 1603 ; HASWELL-SSE-LABEL: test_pextrb: 1604 ; HASWELL-SSE: # %bb.0: 1605 ; HASWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] 1606 ; HASWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1607 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1608 ; 1609 ; HASWELL-LABEL: test_pextrb: 1610 ; HASWELL: # %bb.0: 1611 ; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] 1612 ; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1613 ; HASWELL-NEXT: retq # sched: [7:1.00] 1614 ; 1615 ; BROADWELL-SSE-LABEL: test_pextrb: 1616 ; BROADWELL-SSE: # %bb.0: 1617 ; BROADWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] 1618 ; BROADWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1619 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1620 ; 1621 ; BROADWELL-LABEL: test_pextrb: 1622 ; BROADWELL: # %bb.0: 1623 ; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] 1624 ; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1625 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1626 ; 1627 ; SKYLAKE-SSE-LABEL: test_pextrb: 1628 ; SKYLAKE-SSE: # %bb.0: 1629 ; SKYLAKE-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] 1630 ; SKYLAKE-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1631 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1632 ; 1633 ; SKYLAKE-LABEL: test_pextrb: 1634 ; SKYLAKE: # %bb.0: 1635 ; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] 1636 ; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1637 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1638 ; 1639 ; SKX-SSE-LABEL: test_pextrb: 1640 ; SKX-SSE: # %bb.0: 1641 ; SKX-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] 1642 ; SKX-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1643 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1644 ; 1645 ; SKX-LABEL: test_pextrb: 1646 ; SKX: # %bb.0: 1647 ; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] 1648 ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] 1649 ; SKX-NEXT: retq # sched: [7:1.00] 1650 ; 1651 ; BTVER2-SSE-LABEL: test_pextrb: 1652 ; BTVER2-SSE: # %bb.0: 1653 ; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] 1654 ; BTVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [3:1.00] 1655 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1656 ; 1657 ; BTVER2-LABEL: test_pextrb: 1658 ; BTVER2: # %bb.0: 1659 ; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] 1660 ; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [3:1.00] 1661 ; BTVER2-NEXT: retq # sched: [4:1.00] 1662 ; 1663 ; ZNVER1-SSE-LABEL: test_pextrb: 1664 ; ZNVER1-SSE: # %bb.0: 1665 ; ZNVER1-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:2.00] 1666 ; ZNVER1-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:3.00] 1667 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1668 ; 1669 ; ZNVER1-LABEL: test_pextrb: 1670 ; ZNVER1: # %bb.0: 1671 ; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:2.00] 1672 ; ZNVER1-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:3.00] 1673 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1674 %1 = extractelement <16 x i8> %a0, i32 3 1675 %2 = extractelement <16 x i8> %a0, i32 1 1676 store i8 %2, i8 *%a1 1677 %3 = zext i8 %1 to i32 1678 ret i32 %3 1679 } 1680 1681 define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { 1682 ; GENERIC-LABEL: test_pextrd: 1683 ; GENERIC: # %bb.0: 1684 ; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] 1685 ; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] 1686 ; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] 1687 ; GENERIC-NEXT: retq # sched: [1:1.00] 1688 ; 1689 ; SLM-LABEL: test_pextrd: 1690 ; SLM: # %bb.0: 1691 ; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] 1692 ; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00] 1693 ; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00] 1694 ; SLM-NEXT: retq # sched: [4:1.00] 1695 ; 1696 ; SANDY-SSE-LABEL: test_pextrd: 1697 ; SANDY-SSE: # %bb.0: 1698 ; SANDY-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] 1699 ; SANDY-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] 1700 ; SANDY-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] 1701 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1702 ; 1703 ; SANDY-LABEL: test_pextrd: 1704 ; SANDY: # %bb.0: 1705 ; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 1706 ; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] 1707 ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] 1708 ; SANDY-NEXT: retq # sched: [1:1.00] 1709 ; 1710 ; HASWELL-SSE-LABEL: test_pextrd: 1711 ; HASWELL-SSE: # %bb.0: 1712 ; HASWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] 1713 ; HASWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] 1714 ; HASWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1715 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1716 ; 1717 ; HASWELL-LABEL: test_pextrd: 1718 ; HASWELL: # %bb.0: 1719 ; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 1720 ; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] 1721 ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1722 ; HASWELL-NEXT: retq # sched: [7:1.00] 1723 ; 1724 ; BROADWELL-SSE-LABEL: test_pextrd: 1725 ; BROADWELL-SSE: # %bb.0: 1726 ; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] 1727 ; BROADWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] 1728 ; BROADWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1729 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1730 ; 1731 ; BROADWELL-LABEL: test_pextrd: 1732 ; BROADWELL: # %bb.0: 1733 ; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 1734 ; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] 1735 ; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1736 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1737 ; 1738 ; SKYLAKE-SSE-LABEL: test_pextrd: 1739 ; SKYLAKE-SSE: # %bb.0: 1740 ; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] 1741 ; SKYLAKE-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] 1742 ; SKYLAKE-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1743 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1744 ; 1745 ; SKYLAKE-LABEL: test_pextrd: 1746 ; SKYLAKE: # %bb.0: 1747 ; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 1748 ; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] 1749 ; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1750 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1751 ; 1752 ; SKX-SSE-LABEL: test_pextrd: 1753 ; SKX-SSE: # %bb.0: 1754 ; SKX-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] 1755 ; SKX-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] 1756 ; SKX-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1757 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1758 ; 1759 ; SKX-LABEL: test_pextrd: 1760 ; SKX: # %bb.0: 1761 ; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 1762 ; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] 1763 ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] 1764 ; SKX-NEXT: retq # sched: [7:1.00] 1765 ; 1766 ; BTVER2-SSE-LABEL: test_pextrd: 1767 ; BTVER2-SSE: # %bb.0: 1768 ; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] 1769 ; BTVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] 1770 ; BTVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [3:1.00] 1771 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1772 ; 1773 ; BTVER2-LABEL: test_pextrd: 1774 ; BTVER2: # %bb.0: 1775 ; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 1776 ; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] 1777 ; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [3:1.00] 1778 ; BTVER2-NEXT: retq # sched: [4:1.00] 1779 ; 1780 ; ZNVER1-SSE-LABEL: test_pextrd: 1781 ; ZNVER1-SSE: # %bb.0: 1782 ; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.25] 1783 ; ZNVER1-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:2.00] 1784 ; ZNVER1-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:3.00] 1785 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1786 ; 1787 ; ZNVER1-LABEL: test_pextrd: 1788 ; ZNVER1: # %bb.0: 1789 ; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25] 1790 ; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:2.00] 1791 ; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:3.00] 1792 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1793 %1 = add <4 x i32> %a0, %a0 1794 %2 = extractelement <4 x i32> %1, i32 3 1795 %3 = extractelement <4 x i32> %1, i32 1 1796 store i32 %3, i32 *%a1 1797 ret i32 %2 1798 } 1799 1800 define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { 1801 ; GENERIC-LABEL: test_pextrq: 1802 ; GENERIC: # %bb.0: 1803 ; GENERIC-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] 1804 ; GENERIC-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] 1805 ; GENERIC-NEXT: retq # sched: [1:1.00] 1806 ; 1807 ; SLM-LABEL: test_pextrq: 1808 ; SLM: # %bb.0: 1809 ; SLM-NEXT: pextrq $1, %xmm0, %rax # sched: [1:1.00] 1810 ; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00] 1811 ; SLM-NEXT: retq # sched: [4:1.00] 1812 ; 1813 ; SANDY-SSE-LABEL: test_pextrq: 1814 ; SANDY-SSE: # %bb.0: 1815 ; SANDY-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] 1816 ; SANDY-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] 1817 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1818 ; 1819 ; SANDY-LABEL: test_pextrq: 1820 ; SANDY: # %bb.0: 1821 ; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] 1822 ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] 1823 ; SANDY-NEXT: retq # sched: [1:1.00] 1824 ; 1825 ; HASWELL-SSE-LABEL: test_pextrq: 1826 ; HASWELL-SSE: # %bb.0: 1827 ; HASWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] 1828 ; HASWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1829 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1830 ; 1831 ; HASWELL-LABEL: test_pextrq: 1832 ; HASWELL: # %bb.0: 1833 ; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] 1834 ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1835 ; HASWELL-NEXT: retq # sched: [7:1.00] 1836 ; 1837 ; BROADWELL-SSE-LABEL: test_pextrq: 1838 ; BROADWELL-SSE: # %bb.0: 1839 ; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] 1840 ; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1841 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1842 ; 1843 ; BROADWELL-LABEL: test_pextrq: 1844 ; BROADWELL: # %bb.0: 1845 ; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] 1846 ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1847 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1848 ; 1849 ; SKYLAKE-SSE-LABEL: test_pextrq: 1850 ; SKYLAKE-SSE: # %bb.0: 1851 ; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] 1852 ; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1853 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1854 ; 1855 ; SKYLAKE-LABEL: test_pextrq: 1856 ; SKYLAKE: # %bb.0: 1857 ; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] 1858 ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1859 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1860 ; 1861 ; SKX-SSE-LABEL: test_pextrq: 1862 ; SKX-SSE: # %bb.0: 1863 ; SKX-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] 1864 ; SKX-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1865 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1866 ; 1867 ; SKX-LABEL: test_pextrq: 1868 ; SKX: # %bb.0: 1869 ; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] 1870 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 1871 ; SKX-NEXT: retq # sched: [7:1.00] 1872 ; 1873 ; BTVER2-SSE-LABEL: test_pextrq: 1874 ; BTVER2-SSE: # %bb.0: 1875 ; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] 1876 ; BTVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [3:1.00] 1877 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1878 ; 1879 ; BTVER2-LABEL: test_pextrq: 1880 ; BTVER2: # %bb.0: 1881 ; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] 1882 ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00] 1883 ; BTVER2-NEXT: retq # sched: [4:1.00] 1884 ; 1885 ; ZNVER1-SSE-LABEL: test_pextrq: 1886 ; ZNVER1-SSE: # %bb.0: 1887 ; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:2.00] 1888 ; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:3.00] 1889 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1890 ; 1891 ; ZNVER1-LABEL: test_pextrq: 1892 ; ZNVER1: # %bb.0: 1893 ; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:2.00] 1894 ; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00] 1895 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1896 %1 = extractelement <2 x i64> %a0, i32 1 1897 %2 = extractelement <2 x i64> %a0, i32 1 1898 store i64 %2, i64 *%a2 1899 ret i64 %1 1900 } 1901 1902 define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { 1903 ; GENERIC-LABEL: test_pextrw: 1904 ; GENERIC: # %bb.0: 1905 ; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] 1906 ; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] 1907 ; GENERIC-NEXT: retq # sched: [1:1.00] 1908 ; 1909 ; SLM-LABEL: test_pextrw: 1910 ; SLM: # %bb.0: 1911 ; SLM-NEXT: pextrw $3, %xmm0, %eax # sched: [1:1.00] 1912 ; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00] 1913 ; SLM-NEXT: retq # sched: [4:1.00] 1914 ; 1915 ; SANDY-SSE-LABEL: test_pextrw: 1916 ; SANDY-SSE: # %bb.0: 1917 ; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] 1918 ; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] 1919 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1920 ; 1921 ; SANDY-LABEL: test_pextrw: 1922 ; SANDY: # %bb.0: 1923 ; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] 1924 ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] 1925 ; SANDY-NEXT: retq # sched: [1:1.00] 1926 ; 1927 ; HASWELL-SSE-LABEL: test_pextrw: 1928 ; HASWELL-SSE: # %bb.0: 1929 ; HASWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] 1930 ; HASWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1931 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1932 ; 1933 ; HASWELL-LABEL: test_pextrw: 1934 ; HASWELL: # %bb.0: 1935 ; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] 1936 ; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1937 ; HASWELL-NEXT: retq # sched: [7:1.00] 1938 ; 1939 ; BROADWELL-SSE-LABEL: test_pextrw: 1940 ; BROADWELL-SSE: # %bb.0: 1941 ; BROADWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] 1942 ; BROADWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1943 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1944 ; 1945 ; BROADWELL-LABEL: test_pextrw: 1946 ; BROADWELL: # %bb.0: 1947 ; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] 1948 ; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1949 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1950 ; 1951 ; SKYLAKE-SSE-LABEL: test_pextrw: 1952 ; SKYLAKE-SSE: # %bb.0: 1953 ; SKYLAKE-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] 1954 ; SKYLAKE-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1955 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1956 ; 1957 ; SKYLAKE-LABEL: test_pextrw: 1958 ; SKYLAKE: # %bb.0: 1959 ; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] 1960 ; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1961 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1962 ; 1963 ; SKX-SSE-LABEL: test_pextrw: 1964 ; SKX-SSE: # %bb.0: 1965 ; SKX-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] 1966 ; SKX-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1967 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1968 ; 1969 ; SKX-LABEL: test_pextrw: 1970 ; SKX: # %bb.0: 1971 ; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] 1972 ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] 1973 ; SKX-NEXT: retq # sched: [7:1.00] 1974 ; 1975 ; BTVER2-SSE-LABEL: test_pextrw: 1976 ; BTVER2-SSE: # %bb.0: 1977 ; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] 1978 ; BTVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [3:1.00] 1979 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1980 ; 1981 ; BTVER2-LABEL: test_pextrw: 1982 ; BTVER2: # %bb.0: 1983 ; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] 1984 ; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [3:1.00] 1985 ; BTVER2-NEXT: retq # sched: [4:1.00] 1986 ; 1987 ; ZNVER1-SSE-LABEL: test_pextrw: 1988 ; ZNVER1-SSE: # %bb.0: 1989 ; ZNVER1-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:2.00] 1990 ; ZNVER1-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:3.00] 1991 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1992 ; 1993 ; ZNVER1-LABEL: test_pextrw: 1994 ; ZNVER1: # %bb.0: 1995 ; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:2.00] 1996 ; ZNVER1-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:3.00] 1997 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1998 %1 = extractelement <8 x i16> %a0, i32 3 1999 %2 = extractelement <8 x i16> %a0, i32 1 2000 store i16 %2, i16 *%a1 2001 %3 = zext i16 %1 to i32 2002 ret i32 %3 2003 } 2004 2005 define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { 2006 ; GENERIC-LABEL: test_phminposuw: 2007 ; GENERIC: # %bb.0: 2008 ; GENERIC-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] 2009 ; GENERIC-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] 2010 ; GENERIC-NEXT: retq # sched: [1:1.00] 2011 ; 2012 ; SLM-LABEL: test_phminposuw: 2013 ; SLM: # %bb.0: 2014 ; SLM-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00] 2015 ; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] 2016 ; SLM-NEXT: retq # sched: [4:1.00] 2017 ; 2018 ; SANDY-SSE-LABEL: test_phminposuw: 2019 ; SANDY-SSE: # %bb.0: 2020 ; SANDY-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] 2021 ; SANDY-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] 2022 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2023 ; 2024 ; SANDY-LABEL: test_phminposuw: 2025 ; SANDY: # %bb.0: 2026 ; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] 2027 ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] 2028 ; SANDY-NEXT: retq # sched: [1:1.00] 2029 ; 2030 ; HASWELL-SSE-LABEL: test_phminposuw: 2031 ; HASWELL-SSE: # %bb.0: 2032 ; HASWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] 2033 ; HASWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] 2034 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2035 ; 2036 ; HASWELL-LABEL: test_phminposuw: 2037 ; HASWELL: # %bb.0: 2038 ; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] 2039 ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] 2040 ; HASWELL-NEXT: retq # sched: [7:1.00] 2041 ; 2042 ; BROADWELL-SSE-LABEL: test_phminposuw: 2043 ; BROADWELL-SSE: # %bb.0: 2044 ; BROADWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] 2045 ; BROADWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] 2046 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2047 ; 2048 ; BROADWELL-LABEL: test_phminposuw: 2049 ; BROADWELL: # %bb.0: 2050 ; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] 2051 ; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] 2052 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2053 ; 2054 ; SKYLAKE-SSE-LABEL: test_phminposuw: 2055 ; SKYLAKE-SSE: # %bb.0: 2056 ; SKYLAKE-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] 2057 ; SKYLAKE-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.50] 2058 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2059 ; 2060 ; SKYLAKE-LABEL: test_phminposuw: 2061 ; SKYLAKE: # %bb.0: 2062 ; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] 2063 ; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.50] 2064 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2065 ; 2066 ; SKX-SSE-LABEL: test_phminposuw: 2067 ; SKX-SSE: # %bb.0: 2068 ; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] 2069 ; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] 2070 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2071 ; 2072 ; SKX-LABEL: test_phminposuw: 2073 ; SKX: # %bb.0: 2074 ; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] 2075 ; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] 2076 ; SKX-NEXT: retq # sched: [7:1.00] 2077 ; 2078 ; BTVER2-SSE-LABEL: test_phminposuw: 2079 ; BTVER2-SSE: # %bb.0: 2080 ; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00] 2081 ; BTVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [2:1.00] 2082 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2083 ; 2084 ; BTVER2-LABEL: test_phminposuw: 2085 ; BTVER2: # %bb.0: 2086 ; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00] 2087 ; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:1.00] 2088 ; BTVER2-NEXT: retq # sched: [4:1.00] 2089 ; 2090 ; ZNVER1-SSE-LABEL: test_phminposuw: 2091 ; ZNVER1-SSE: # %bb.0: 2092 ; ZNVER1-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] 2093 ; ZNVER1-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] 2094 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2095 ; 2096 ; ZNVER1-LABEL: test_phminposuw: 2097 ; ZNVER1: # %bb.0: 2098 ; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] 2099 ; ZNVER1-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] 2100 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2101 %1 = load <8 x i16>, <8 x i16> *%a0, align 16 2102 %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %1) 2103 %3 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %2) 2104 ret <8 x i16> %3 2105 } 2106 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 2107 2108 define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { 2109 ; GENERIC-LABEL: test_pinsrb: 2110 ; GENERIC: # %bb.0: 2111 ; GENERIC-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] 2112 ; GENERIC-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] 2113 ; GENERIC-NEXT: retq # sched: [1:1.00] 2114 ; 2115 ; SLM-LABEL: test_pinsrb: 2116 ; SLM: # %bb.0: 2117 ; SLM-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:1.00] 2118 ; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00] 2119 ; SLM-NEXT: retq # sched: [4:1.00] 2120 ; 2121 ; SANDY-SSE-LABEL: test_pinsrb: 2122 ; SANDY-SSE: # %bb.0: 2123 ; SANDY-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] 2124 ; SANDY-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] 2125 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2126 ; 2127 ; SANDY-LABEL: test_pinsrb: 2128 ; SANDY: # %bb.0: 2129 ; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] 2130 ; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] 2131 ; SANDY-NEXT: retq # sched: [1:1.00] 2132 ; 2133 ; HASWELL-SSE-LABEL: test_pinsrb: 2134 ; HASWELL-SSE: # %bb.0: 2135 ; HASWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] 2136 ; HASWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] 2137 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2138 ; 2139 ; HASWELL-LABEL: test_pinsrb: 2140 ; HASWELL: # %bb.0: 2141 ; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2142 ; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2143 ; HASWELL-NEXT: retq # sched: [7:1.00] 2144 ; 2145 ; BROADWELL-SSE-LABEL: test_pinsrb: 2146 ; BROADWELL-SSE: # %bb.0: 2147 ; BROADWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] 2148 ; BROADWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] 2149 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2150 ; 2151 ; BROADWELL-LABEL: test_pinsrb: 2152 ; BROADWELL: # %bb.0: 2153 ; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2154 ; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2155 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2156 ; 2157 ; SKYLAKE-SSE-LABEL: test_pinsrb: 2158 ; SKYLAKE-SSE: # %bb.0: 2159 ; SKYLAKE-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] 2160 ; SKYLAKE-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] 2161 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2162 ; 2163 ; SKYLAKE-LABEL: test_pinsrb: 2164 ; SKYLAKE: # %bb.0: 2165 ; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2166 ; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2167 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2168 ; 2169 ; SKX-SSE-LABEL: test_pinsrb: 2170 ; SKX-SSE: # %bb.0: 2171 ; SKX-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] 2172 ; SKX-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] 2173 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2174 ; 2175 ; SKX-LABEL: test_pinsrb: 2176 ; SKX: # %bb.0: 2177 ; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2178 ; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2179 ; SKX-NEXT: retq # sched: [7:1.00] 2180 ; 2181 ; BTVER2-SSE-LABEL: test_pinsrb: 2182 ; BTVER2-SSE: # %bb.0: 2183 ; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [7:0.50] 2184 ; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00] 2185 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2186 ; 2187 ; BTVER2-LABEL: test_pinsrb: 2188 ; BTVER2: # %bb.0: 2189 ; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [7:0.50] 2190 ; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00] 2191 ; BTVER2-NEXT: retq # sched: [4:1.00] 2192 ; 2193 ; ZNVER1-SSE-LABEL: test_pinsrb: 2194 ; ZNVER1-SSE: # %bb.0: 2195 ; ZNVER1-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.25] 2196 ; ZNVER1-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50] 2197 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2198 ; 2199 ; ZNVER1-LABEL: test_pinsrb: 2200 ; ZNVER1: # %bb.0: 2201 ; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] 2202 ; ZNVER1-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50] 2203 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2204 %1 = insertelement <16 x i8> %a0, i8 %a1, i32 1 2205 %2 = load i8, i8 *%a2 2206 %3 = insertelement <16 x i8> %1, i8 %2, i32 3 2207 ret <16 x i8> %3 2208 } 2209 2210 define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { 2211 ; GENERIC-LABEL: test_pinsrd: 2212 ; GENERIC: # %bb.0: 2213 ; GENERIC-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] 2214 ; GENERIC-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] 2215 ; GENERIC-NEXT: retq # sched: [1:1.00] 2216 ; 2217 ; SLM-LABEL: test_pinsrd: 2218 ; SLM: # %bb.0: 2219 ; SLM-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:1.00] 2220 ; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00] 2221 ; SLM-NEXT: retq # sched: [4:1.00] 2222 ; 2223 ; SANDY-SSE-LABEL: test_pinsrd: 2224 ; SANDY-SSE: # %bb.0: 2225 ; SANDY-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] 2226 ; SANDY-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] 2227 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2228 ; 2229 ; SANDY-LABEL: test_pinsrd: 2230 ; SANDY: # %bb.0: 2231 ; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] 2232 ; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] 2233 ; SANDY-NEXT: retq # sched: [1:1.00] 2234 ; 2235 ; HASWELL-SSE-LABEL: test_pinsrd: 2236 ; HASWELL-SSE: # %bb.0: 2237 ; HASWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] 2238 ; HASWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] 2239 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2240 ; 2241 ; HASWELL-LABEL: test_pinsrd: 2242 ; HASWELL: # %bb.0: 2243 ; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2244 ; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2245 ; HASWELL-NEXT: retq # sched: [7:1.00] 2246 ; 2247 ; BROADWELL-SSE-LABEL: test_pinsrd: 2248 ; BROADWELL-SSE: # %bb.0: 2249 ; BROADWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] 2250 ; BROADWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] 2251 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2252 ; 2253 ; BROADWELL-LABEL: test_pinsrd: 2254 ; BROADWELL: # %bb.0: 2255 ; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2256 ; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2257 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2258 ; 2259 ; SKYLAKE-SSE-LABEL: test_pinsrd: 2260 ; SKYLAKE-SSE: # %bb.0: 2261 ; SKYLAKE-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] 2262 ; SKYLAKE-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] 2263 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2264 ; 2265 ; SKYLAKE-LABEL: test_pinsrd: 2266 ; SKYLAKE: # %bb.0: 2267 ; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2268 ; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2269 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2270 ; 2271 ; SKX-SSE-LABEL: test_pinsrd: 2272 ; SKX-SSE: # %bb.0: 2273 ; SKX-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] 2274 ; SKX-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] 2275 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2276 ; 2277 ; SKX-LABEL: test_pinsrd: 2278 ; SKX: # %bb.0: 2279 ; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 2280 ; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 2281 ; SKX-NEXT: retq # sched: [7:1.00] 2282 ; 2283 ; BTVER2-SSE-LABEL: test_pinsrd: 2284 ; BTVER2-SSE: # %bb.0: 2285 ; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [7:0.50] 2286 ; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00] 2287 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2288 ; 2289 ; BTVER2-LABEL: test_pinsrd: 2290 ; BTVER2: # %bb.0: 2291 ; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [7:0.50] 2292 ; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00] 2293 ; BTVER2-NEXT: retq # sched: [4:1.00] 2294 ; 2295 ; ZNVER1-SSE-LABEL: test_pinsrd: 2296 ; ZNVER1-SSE: # %bb.0: 2297 ; ZNVER1-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.25] 2298 ; ZNVER1-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50] 2299 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2300 ; 2301 ; ZNVER1-LABEL: test_pinsrd: 2302 ; ZNVER1: # %bb.0: 2303 ; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] 2304 ; ZNVER1-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50] 2305 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2306 %1 = insertelement <4 x i32> %a0, i32 %a1, i32 1 2307 %2 = load i32, i32 *%a2 2308 %3 = insertelement <4 x i32> %1, i32 %2, i32 3 2309 ret <4 x i32> %3 2310 } 2311 2312 define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { 2313 ; GENERIC-LABEL: test_pinsrq: 2314 ; GENERIC: # %bb.0: 2315 ; GENERIC-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] 2316 ; GENERIC-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] 2317 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 2318 ; GENERIC-NEXT: retq # sched: [1:1.00] 2319 ; 2320 ; SLM-LABEL: test_pinsrq: 2321 ; SLM: # %bb.0: 2322 ; SLM-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00] 2323 ; SLM-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:1.00] 2324 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 2325 ; SLM-NEXT: retq # sched: [4:1.00] 2326 ; 2327 ; SANDY-SSE-LABEL: test_pinsrq: 2328 ; SANDY-SSE: # %bb.0: 2329 ; SANDY-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] 2330 ; SANDY-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] 2331 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 2332 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2333 ; 2334 ; SANDY-LABEL: test_pinsrq: 2335 ; SANDY: # %bb.0: 2336 ; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] 2337 ; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50] 2338 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2339 ; SANDY-NEXT: retq # sched: [1:1.00] 2340 ; 2341 ; HASWELL-SSE-LABEL: test_pinsrq: 2342 ; HASWELL-SSE: # %bb.0: 2343 ; HASWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] 2344 ; HASWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] 2345 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 2346 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2347 ; 2348 ; HASWELL-LABEL: test_pinsrq: 2349 ; HASWELL: # %bb.0: 2350 ; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] 2351 ; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] 2352 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2353 ; HASWELL-NEXT: retq # sched: [7:1.00] 2354 ; 2355 ; BROADWELL-SSE-LABEL: test_pinsrq: 2356 ; BROADWELL-SSE: # %bb.0: 2357 ; BROADWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] 2358 ; BROADWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] 2359 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 2360 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2361 ; 2362 ; BROADWELL-LABEL: test_pinsrq: 2363 ; BROADWELL: # %bb.0: 2364 ; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] 2365 ; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] 2366 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2367 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2368 ; 2369 ; SKYLAKE-SSE-LABEL: test_pinsrq: 2370 ; SKYLAKE-SSE: # %bb.0: 2371 ; SKYLAKE-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] 2372 ; SKYLAKE-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] 2373 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 2374 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2375 ; 2376 ; SKYLAKE-LABEL: test_pinsrq: 2377 ; SKYLAKE: # %bb.0: 2378 ; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] 2379 ; SKYLAKE-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] 2380 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2381 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2382 ; 2383 ; SKX-SSE-LABEL: test_pinsrq: 2384 ; SKX-SSE: # %bb.0: 2385 ; SKX-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] 2386 ; SKX-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] 2387 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 2388 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2389 ; 2390 ; SKX-LABEL: test_pinsrq: 2391 ; SKX: # %bb.0: 2392 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] 2393 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] 2394 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2395 ; SKX-NEXT: retq # sched: [7:1.00] 2396 ; 2397 ; BTVER2-SSE-LABEL: test_pinsrq: 2398 ; BTVER2-SSE: # %bb.0: 2399 ; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [7:0.50] 2400 ; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00] 2401 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 2402 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2403 ; 2404 ; BTVER2-LABEL: test_pinsrq: 2405 ; BTVER2: # %bb.0: 2406 ; BTVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [7:0.50] 2407 ; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [4:1.00] 2408 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2409 ; BTVER2-NEXT: retq # sched: [4:1.00] 2410 ; 2411 ; ZNVER1-SSE-LABEL: test_pinsrq: 2412 ; ZNVER1-SSE: # %bb.0: 2413 ; ZNVER1-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50] 2414 ; ZNVER1-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.25] 2415 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 2416 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2417 ; 2418 ; ZNVER1-LABEL: test_pinsrq: 2419 ; ZNVER1: # %bb.0: 2420 ; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50] 2421 ; ZNVER1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.25] 2422 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2423 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2424 %1 = insertelement <2 x i64> %a0, i64 %a2, i32 1 2425 %2 = load i64, i64 *%a3 2426 %3 = insertelement <2 x i64> %a1, i64 %2, i32 1 2427 %4 = add <2 x i64> %1, %3 2428 ret <2 x i64> %4 2429 } 2430 2431 define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 2432 ; GENERIC-LABEL: test_pmaxsb: 2433 ; GENERIC: # %bb.0: 2434 ; GENERIC-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2435 ; GENERIC-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] 2436 ; GENERIC-NEXT: retq # sched: [1:1.00] 2437 ; 2438 ; SLM-LABEL: test_pmaxsb: 2439 ; SLM: # %bb.0: 2440 ; SLM-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2441 ; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00] 2442 ; SLM-NEXT: retq # sched: [4:1.00] 2443 ; 2444 ; SANDY-SSE-LABEL: test_pmaxsb: 2445 ; SANDY-SSE: # %bb.0: 2446 ; SANDY-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2447 ; SANDY-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] 2448 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2449 ; 2450 ; SANDY-LABEL: test_pmaxsb: 2451 ; SANDY: # %bb.0: 2452 ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2453 ; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2454 ; SANDY-NEXT: retq # sched: [1:1.00] 2455 ; 2456 ; HASWELL-SSE-LABEL: test_pmaxsb: 2457 ; HASWELL-SSE: # %bb.0: 2458 ; HASWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2459 ; HASWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] 2460 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2461 ; 2462 ; HASWELL-LABEL: test_pmaxsb: 2463 ; HASWELL: # %bb.0: 2464 ; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2465 ; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2466 ; HASWELL-NEXT: retq # sched: [7:1.00] 2467 ; 2468 ; BROADWELL-SSE-LABEL: test_pmaxsb: 2469 ; BROADWELL-SSE: # %bb.0: 2470 ; BROADWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2471 ; BROADWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:0.50] 2472 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2473 ; 2474 ; BROADWELL-LABEL: test_pmaxsb: 2475 ; BROADWELL: # %bb.0: 2476 ; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2477 ; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 2478 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2479 ; 2480 ; SKYLAKE-SSE-LABEL: test_pmaxsb: 2481 ; SKYLAKE-SSE: # %bb.0: 2482 ; SKYLAKE-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2483 ; SKYLAKE-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] 2484 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2485 ; 2486 ; SKYLAKE-LABEL: test_pmaxsb: 2487 ; SKYLAKE: # %bb.0: 2488 ; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2489 ; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2490 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2491 ; 2492 ; SKX-SSE-LABEL: test_pmaxsb: 2493 ; SKX-SSE: # %bb.0: 2494 ; SKX-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2495 ; SKX-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] 2496 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2497 ; 2498 ; SKX-LABEL: test_pmaxsb: 2499 ; SKX: # %bb.0: 2500 ; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2501 ; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2502 ; SKX-NEXT: retq # sched: [7:1.00] 2503 ; 2504 ; BTVER2-SSE-LABEL: test_pmaxsb: 2505 ; BTVER2-SSE: # %bb.0: 2506 ; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] 2507 ; BTVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:1.00] 2508 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2509 ; 2510 ; BTVER2-LABEL: test_pmaxsb: 2511 ; BTVER2: # %bb.0: 2512 ; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2513 ; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 2514 ; BTVER2-NEXT: retq # sched: [4:1.00] 2515 ; 2516 ; ZNVER1-SSE-LABEL: test_pmaxsb: 2517 ; ZNVER1-SSE: # %bb.0: 2518 ; ZNVER1-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.25] 2519 ; ZNVER1-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [8:0.50] 2520 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2521 ; 2522 ; ZNVER1-LABEL: test_pmaxsb: 2523 ; ZNVER1: # %bb.0: 2524 ; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2525 ; ZNVER1-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 2526 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2527 %1 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) 2528 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 2529 %3 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %1, <16 x i8> %2) 2530 ret <16 x i8> %3 2531 } 2532 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 2533 2534 define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 2535 ; GENERIC-LABEL: test_pmaxsd: 2536 ; GENERIC: # %bb.0: 2537 ; GENERIC-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2538 ; GENERIC-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] 2539 ; GENERIC-NEXT: retq # sched: [1:1.00] 2540 ; 2541 ; SLM-LABEL: test_pmaxsd: 2542 ; SLM: # %bb.0: 2543 ; SLM-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2544 ; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00] 2545 ; SLM-NEXT: retq # sched: [4:1.00] 2546 ; 2547 ; SANDY-SSE-LABEL: test_pmaxsd: 2548 ; SANDY-SSE: # %bb.0: 2549 ; SANDY-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2550 ; SANDY-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] 2551 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2552 ; 2553 ; SANDY-LABEL: test_pmaxsd: 2554 ; SANDY: # %bb.0: 2555 ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2556 ; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2557 ; SANDY-NEXT: retq # sched: [1:1.00] 2558 ; 2559 ; HASWELL-SSE-LABEL: test_pmaxsd: 2560 ; HASWELL-SSE: # %bb.0: 2561 ; HASWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2562 ; HASWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] 2563 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2564 ; 2565 ; HASWELL-LABEL: test_pmaxsd: 2566 ; HASWELL: # %bb.0: 2567 ; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2568 ; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2569 ; HASWELL-NEXT: retq # sched: [7:1.00] 2570 ; 2571 ; BROADWELL-SSE-LABEL: test_pmaxsd: 2572 ; BROADWELL-SSE: # %bb.0: 2573 ; BROADWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2574 ; BROADWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:0.50] 2575 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2576 ; 2577 ; BROADWELL-LABEL: test_pmaxsd: 2578 ; BROADWELL: # %bb.0: 2579 ; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2580 ; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 2581 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2582 ; 2583 ; SKYLAKE-SSE-LABEL: test_pmaxsd: 2584 ; SKYLAKE-SSE: # %bb.0: 2585 ; SKYLAKE-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2586 ; SKYLAKE-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] 2587 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2588 ; 2589 ; SKYLAKE-LABEL: test_pmaxsd: 2590 ; SKYLAKE: # %bb.0: 2591 ; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2592 ; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2593 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2594 ; 2595 ; SKX-SSE-LABEL: test_pmaxsd: 2596 ; SKX-SSE: # %bb.0: 2597 ; SKX-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2598 ; SKX-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] 2599 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2600 ; 2601 ; SKX-LABEL: test_pmaxsd: 2602 ; SKX: # %bb.0: 2603 ; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2604 ; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2605 ; SKX-NEXT: retq # sched: [7:1.00] 2606 ; 2607 ; BTVER2-SSE-LABEL: test_pmaxsd: 2608 ; BTVER2-SSE: # %bb.0: 2609 ; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] 2610 ; BTVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:1.00] 2611 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2612 ; 2613 ; BTVER2-LABEL: test_pmaxsd: 2614 ; BTVER2: # %bb.0: 2615 ; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2616 ; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 2617 ; BTVER2-NEXT: retq # sched: [4:1.00] 2618 ; 2619 ; ZNVER1-SSE-LABEL: test_pmaxsd: 2620 ; ZNVER1-SSE: # %bb.0: 2621 ; ZNVER1-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.25] 2622 ; ZNVER1-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [8:0.50] 2623 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2624 ; 2625 ; ZNVER1-LABEL: test_pmaxsd: 2626 ; ZNVER1: # %bb.0: 2627 ; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2628 ; ZNVER1-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 2629 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2630 %1 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) 2631 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 2632 %3 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> %2) 2633 ret <4 x i32> %3 2634 } 2635 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 2636 2637 define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 2638 ; GENERIC-LABEL: test_pmaxud: 2639 ; GENERIC: # %bb.0: 2640 ; GENERIC-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2641 ; GENERIC-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] 2642 ; GENERIC-NEXT: retq # sched: [1:1.00] 2643 ; 2644 ; SLM-LABEL: test_pmaxud: 2645 ; SLM: # %bb.0: 2646 ; SLM-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2647 ; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00] 2648 ; SLM-NEXT: retq # sched: [4:1.00] 2649 ; 2650 ; SANDY-SSE-LABEL: test_pmaxud: 2651 ; SANDY-SSE: # %bb.0: 2652 ; SANDY-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2653 ; SANDY-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] 2654 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2655 ; 2656 ; SANDY-LABEL: test_pmaxud: 2657 ; SANDY: # %bb.0: 2658 ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2659 ; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2660 ; SANDY-NEXT: retq # sched: [1:1.00] 2661 ; 2662 ; HASWELL-SSE-LABEL: test_pmaxud: 2663 ; HASWELL-SSE: # %bb.0: 2664 ; HASWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2665 ; HASWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] 2666 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2667 ; 2668 ; HASWELL-LABEL: test_pmaxud: 2669 ; HASWELL: # %bb.0: 2670 ; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2671 ; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2672 ; HASWELL-NEXT: retq # sched: [7:1.00] 2673 ; 2674 ; BROADWELL-SSE-LABEL: test_pmaxud: 2675 ; BROADWELL-SSE: # %bb.0: 2676 ; BROADWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2677 ; BROADWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:0.50] 2678 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2679 ; 2680 ; BROADWELL-LABEL: test_pmaxud: 2681 ; BROADWELL: # %bb.0: 2682 ; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2683 ; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 2684 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2685 ; 2686 ; SKYLAKE-SSE-LABEL: test_pmaxud: 2687 ; SKYLAKE-SSE: # %bb.0: 2688 ; SKYLAKE-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2689 ; SKYLAKE-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] 2690 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2691 ; 2692 ; SKYLAKE-LABEL: test_pmaxud: 2693 ; SKYLAKE: # %bb.0: 2694 ; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2695 ; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2696 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2697 ; 2698 ; SKX-SSE-LABEL: test_pmaxud: 2699 ; SKX-SSE: # %bb.0: 2700 ; SKX-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2701 ; SKX-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] 2702 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2703 ; 2704 ; SKX-LABEL: test_pmaxud: 2705 ; SKX: # %bb.0: 2706 ; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2707 ; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2708 ; SKX-NEXT: retq # sched: [7:1.00] 2709 ; 2710 ; BTVER2-SSE-LABEL: test_pmaxud: 2711 ; BTVER2-SSE: # %bb.0: 2712 ; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] 2713 ; BTVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:1.00] 2714 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2715 ; 2716 ; BTVER2-LABEL: test_pmaxud: 2717 ; BTVER2: # %bb.0: 2718 ; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2719 ; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 2720 ; BTVER2-NEXT: retq # sched: [4:1.00] 2721 ; 2722 ; ZNVER1-SSE-LABEL: test_pmaxud: 2723 ; ZNVER1-SSE: # %bb.0: 2724 ; ZNVER1-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.25] 2725 ; ZNVER1-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [8:0.50] 2726 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2727 ; 2728 ; ZNVER1-LABEL: test_pmaxud: 2729 ; ZNVER1: # %bb.0: 2730 ; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2731 ; ZNVER1-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 2732 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2733 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) 2734 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 2735 %3 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> %2) 2736 ret <4 x i32> %3 2737 } 2738 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 2739 2740 define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 2741 ; GENERIC-LABEL: test_pmaxuw: 2742 ; GENERIC: # %bb.0: 2743 ; GENERIC-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2744 ; GENERIC-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] 2745 ; GENERIC-NEXT: retq # sched: [1:1.00] 2746 ; 2747 ; SLM-LABEL: test_pmaxuw: 2748 ; SLM: # %bb.0: 2749 ; SLM-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2750 ; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00] 2751 ; SLM-NEXT: retq # sched: [4:1.00] 2752 ; 2753 ; SANDY-SSE-LABEL: test_pmaxuw: 2754 ; SANDY-SSE: # %bb.0: 2755 ; SANDY-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2756 ; SANDY-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] 2757 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2758 ; 2759 ; SANDY-LABEL: test_pmaxuw: 2760 ; SANDY: # %bb.0: 2761 ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2762 ; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2763 ; SANDY-NEXT: retq # sched: [1:1.00] 2764 ; 2765 ; HASWELL-SSE-LABEL: test_pmaxuw: 2766 ; HASWELL-SSE: # %bb.0: 2767 ; HASWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2768 ; HASWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] 2769 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2770 ; 2771 ; HASWELL-LABEL: test_pmaxuw: 2772 ; HASWELL: # %bb.0: 2773 ; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2774 ; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2775 ; HASWELL-NEXT: retq # sched: [7:1.00] 2776 ; 2777 ; BROADWELL-SSE-LABEL: test_pmaxuw: 2778 ; BROADWELL-SSE: # %bb.0: 2779 ; BROADWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2780 ; BROADWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:0.50] 2781 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2782 ; 2783 ; BROADWELL-LABEL: test_pmaxuw: 2784 ; BROADWELL: # %bb.0: 2785 ; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2786 ; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 2787 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2788 ; 2789 ; SKYLAKE-SSE-LABEL: test_pmaxuw: 2790 ; SKYLAKE-SSE: # %bb.0: 2791 ; SKYLAKE-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2792 ; SKYLAKE-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] 2793 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2794 ; 2795 ; SKYLAKE-LABEL: test_pmaxuw: 2796 ; SKYLAKE: # %bb.0: 2797 ; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2798 ; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2799 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2800 ; 2801 ; SKX-SSE-LABEL: test_pmaxuw: 2802 ; SKX-SSE: # %bb.0: 2803 ; SKX-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2804 ; SKX-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] 2805 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2806 ; 2807 ; SKX-LABEL: test_pmaxuw: 2808 ; SKX: # %bb.0: 2809 ; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2810 ; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2811 ; SKX-NEXT: retq # sched: [7:1.00] 2812 ; 2813 ; BTVER2-SSE-LABEL: test_pmaxuw: 2814 ; BTVER2-SSE: # %bb.0: 2815 ; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] 2816 ; BTVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:1.00] 2817 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2818 ; 2819 ; BTVER2-LABEL: test_pmaxuw: 2820 ; BTVER2: # %bb.0: 2821 ; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2822 ; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 2823 ; BTVER2-NEXT: retq # sched: [4:1.00] 2824 ; 2825 ; ZNVER1-SSE-LABEL: test_pmaxuw: 2826 ; ZNVER1-SSE: # %bb.0: 2827 ; ZNVER1-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.25] 2828 ; ZNVER1-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [8:0.50] 2829 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2830 ; 2831 ; ZNVER1-LABEL: test_pmaxuw: 2832 ; ZNVER1: # %bb.0: 2833 ; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2834 ; ZNVER1-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 2835 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2836 %1 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) 2837 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 2838 %3 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %1, <8 x i16> %2) 2839 ret <8 x i16> %3 2840 } 2841 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 2842 2843 define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 2844 ; GENERIC-LABEL: test_pminsb: 2845 ; GENERIC: # %bb.0: 2846 ; GENERIC-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2847 ; GENERIC-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] 2848 ; GENERIC-NEXT: retq # sched: [1:1.00] 2849 ; 2850 ; SLM-LABEL: test_pminsb: 2851 ; SLM: # %bb.0: 2852 ; SLM-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2853 ; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00] 2854 ; SLM-NEXT: retq # sched: [4:1.00] 2855 ; 2856 ; SANDY-SSE-LABEL: test_pminsb: 2857 ; SANDY-SSE: # %bb.0: 2858 ; SANDY-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2859 ; SANDY-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] 2860 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2861 ; 2862 ; SANDY-LABEL: test_pminsb: 2863 ; SANDY: # %bb.0: 2864 ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2865 ; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2866 ; SANDY-NEXT: retq # sched: [1:1.00] 2867 ; 2868 ; HASWELL-SSE-LABEL: test_pminsb: 2869 ; HASWELL-SSE: # %bb.0: 2870 ; HASWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2871 ; HASWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] 2872 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2873 ; 2874 ; HASWELL-LABEL: test_pminsb: 2875 ; HASWELL: # %bb.0: 2876 ; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2877 ; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2878 ; HASWELL-NEXT: retq # sched: [7:1.00] 2879 ; 2880 ; BROADWELL-SSE-LABEL: test_pminsb: 2881 ; BROADWELL-SSE: # %bb.0: 2882 ; BROADWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2883 ; BROADWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:0.50] 2884 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2885 ; 2886 ; BROADWELL-LABEL: test_pminsb: 2887 ; BROADWELL: # %bb.0: 2888 ; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2889 ; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 2890 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2891 ; 2892 ; SKYLAKE-SSE-LABEL: test_pminsb: 2893 ; SKYLAKE-SSE: # %bb.0: 2894 ; SKYLAKE-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2895 ; SKYLAKE-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] 2896 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2897 ; 2898 ; SKYLAKE-LABEL: test_pminsb: 2899 ; SKYLAKE: # %bb.0: 2900 ; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2901 ; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2902 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2903 ; 2904 ; SKX-SSE-LABEL: test_pminsb: 2905 ; SKX-SSE: # %bb.0: 2906 ; SKX-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2907 ; SKX-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] 2908 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2909 ; 2910 ; SKX-LABEL: test_pminsb: 2911 ; SKX: # %bb.0: 2912 ; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2913 ; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2914 ; SKX-NEXT: retq # sched: [7:1.00] 2915 ; 2916 ; BTVER2-SSE-LABEL: test_pminsb: 2917 ; BTVER2-SSE: # %bb.0: 2918 ; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] 2919 ; BTVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:1.00] 2920 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2921 ; 2922 ; BTVER2-LABEL: test_pminsb: 2923 ; BTVER2: # %bb.0: 2924 ; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2925 ; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 2926 ; BTVER2-NEXT: retq # sched: [4:1.00] 2927 ; 2928 ; ZNVER1-SSE-LABEL: test_pminsb: 2929 ; ZNVER1-SSE: # %bb.0: 2930 ; ZNVER1-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.25] 2931 ; ZNVER1-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [8:0.50] 2932 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2933 ; 2934 ; ZNVER1-LABEL: test_pminsb: 2935 ; ZNVER1: # %bb.0: 2936 ; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2937 ; ZNVER1-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 2938 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2939 %1 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) 2940 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 2941 %3 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %1, <16 x i8> %2) 2942 ret <16 x i8> %3 2943 } 2944 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 2945 2946 define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 2947 ; GENERIC-LABEL: test_pminsd: 2948 ; GENERIC: # %bb.0: 2949 ; GENERIC-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 2950 ; GENERIC-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] 2951 ; GENERIC-NEXT: retq # sched: [1:1.00] 2952 ; 2953 ; SLM-LABEL: test_pminsd: 2954 ; SLM: # %bb.0: 2955 ; SLM-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 2956 ; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00] 2957 ; SLM-NEXT: retq # sched: [4:1.00] 2958 ; 2959 ; SANDY-SSE-LABEL: test_pminsd: 2960 ; SANDY-SSE: # %bb.0: 2961 ; SANDY-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 2962 ; SANDY-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] 2963 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2964 ; 2965 ; SANDY-LABEL: test_pminsd: 2966 ; SANDY: # %bb.0: 2967 ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2968 ; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2969 ; SANDY-NEXT: retq # sched: [1:1.00] 2970 ; 2971 ; HASWELL-SSE-LABEL: test_pminsd: 2972 ; HASWELL-SSE: # %bb.0: 2973 ; HASWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 2974 ; HASWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] 2975 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2976 ; 2977 ; HASWELL-LABEL: test_pminsd: 2978 ; HASWELL: # %bb.0: 2979 ; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2980 ; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 2981 ; HASWELL-NEXT: retq # sched: [7:1.00] 2982 ; 2983 ; BROADWELL-SSE-LABEL: test_pminsd: 2984 ; BROADWELL-SSE: # %bb.0: 2985 ; BROADWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 2986 ; BROADWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:0.50] 2987 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2988 ; 2989 ; BROADWELL-LABEL: test_pminsd: 2990 ; BROADWELL: # %bb.0: 2991 ; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2992 ; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 2993 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2994 ; 2995 ; SKYLAKE-SSE-LABEL: test_pminsd: 2996 ; SKYLAKE-SSE: # %bb.0: 2997 ; SKYLAKE-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 2998 ; SKYLAKE-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] 2999 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3000 ; 3001 ; SKYLAKE-LABEL: test_pminsd: 3002 ; SKYLAKE: # %bb.0: 3003 ; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3004 ; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3005 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3006 ; 3007 ; SKX-SSE-LABEL: test_pminsd: 3008 ; SKX-SSE: # %bb.0: 3009 ; SKX-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 3010 ; SKX-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] 3011 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3012 ; 3013 ; SKX-LABEL: test_pminsd: 3014 ; SKX: # %bb.0: 3015 ; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3016 ; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3017 ; SKX-NEXT: retq # sched: [7:1.00] 3018 ; 3019 ; BTVER2-SSE-LABEL: test_pminsd: 3020 ; BTVER2-SSE: # %bb.0: 3021 ; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] 3022 ; BTVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:1.00] 3023 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3024 ; 3025 ; BTVER2-LABEL: test_pminsd: 3026 ; BTVER2: # %bb.0: 3027 ; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3028 ; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 3029 ; BTVER2-NEXT: retq # sched: [4:1.00] 3030 ; 3031 ; ZNVER1-SSE-LABEL: test_pminsd: 3032 ; ZNVER1-SSE: # %bb.0: 3033 ; ZNVER1-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.25] 3034 ; ZNVER1-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [8:0.50] 3035 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3036 ; 3037 ; ZNVER1-LABEL: test_pminsd: 3038 ; ZNVER1: # %bb.0: 3039 ; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3040 ; ZNVER1-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3041 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3042 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) 3043 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 3044 %3 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %1, <4 x i32> %2) 3045 ret <4 x i32> %3 3046 } 3047 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 3048 3049 define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 3050 ; GENERIC-LABEL: test_pminud: 3051 ; GENERIC: # %bb.0: 3052 ; GENERIC-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3053 ; GENERIC-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] 3054 ; GENERIC-NEXT: retq # sched: [1:1.00] 3055 ; 3056 ; SLM-LABEL: test_pminud: 3057 ; SLM: # %bb.0: 3058 ; SLM-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3059 ; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00] 3060 ; SLM-NEXT: retq # sched: [4:1.00] 3061 ; 3062 ; SANDY-SSE-LABEL: test_pminud: 3063 ; SANDY-SSE: # %bb.0: 3064 ; SANDY-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3065 ; SANDY-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] 3066 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3067 ; 3068 ; SANDY-LABEL: test_pminud: 3069 ; SANDY: # %bb.0: 3070 ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3071 ; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3072 ; SANDY-NEXT: retq # sched: [1:1.00] 3073 ; 3074 ; HASWELL-SSE-LABEL: test_pminud: 3075 ; HASWELL-SSE: # %bb.0: 3076 ; HASWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3077 ; HASWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] 3078 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3079 ; 3080 ; HASWELL-LABEL: test_pminud: 3081 ; HASWELL: # %bb.0: 3082 ; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3083 ; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3084 ; HASWELL-NEXT: retq # sched: [7:1.00] 3085 ; 3086 ; BROADWELL-SSE-LABEL: test_pminud: 3087 ; BROADWELL-SSE: # %bb.0: 3088 ; BROADWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3089 ; BROADWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:0.50] 3090 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3091 ; 3092 ; BROADWELL-LABEL: test_pminud: 3093 ; BROADWELL: # %bb.0: 3094 ; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3095 ; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 3096 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3097 ; 3098 ; SKYLAKE-SSE-LABEL: test_pminud: 3099 ; SKYLAKE-SSE: # %bb.0: 3100 ; SKYLAKE-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3101 ; SKYLAKE-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] 3102 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3103 ; 3104 ; SKYLAKE-LABEL: test_pminud: 3105 ; SKYLAKE: # %bb.0: 3106 ; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3107 ; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3108 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3109 ; 3110 ; SKX-SSE-LABEL: test_pminud: 3111 ; SKX-SSE: # %bb.0: 3112 ; SKX-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3113 ; SKX-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] 3114 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3115 ; 3116 ; SKX-LABEL: test_pminud: 3117 ; SKX: # %bb.0: 3118 ; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3119 ; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3120 ; SKX-NEXT: retq # sched: [7:1.00] 3121 ; 3122 ; BTVER2-SSE-LABEL: test_pminud: 3123 ; BTVER2-SSE: # %bb.0: 3124 ; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] 3125 ; BTVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:1.00] 3126 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3127 ; 3128 ; BTVER2-LABEL: test_pminud: 3129 ; BTVER2: # %bb.0: 3130 ; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3131 ; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 3132 ; BTVER2-NEXT: retq # sched: [4:1.00] 3133 ; 3134 ; ZNVER1-SSE-LABEL: test_pminud: 3135 ; ZNVER1-SSE: # %bb.0: 3136 ; ZNVER1-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.25] 3137 ; ZNVER1-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [8:0.50] 3138 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3139 ; 3140 ; ZNVER1-LABEL: test_pminud: 3141 ; ZNVER1: # %bb.0: 3142 ; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3143 ; ZNVER1-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3144 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3145 %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) 3146 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 3147 %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> %2) 3148 ret <4 x i32> %3 3149 } 3150 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 3151 3152 define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 3153 ; GENERIC-LABEL: test_pminuw: 3154 ; GENERIC: # %bb.0: 3155 ; GENERIC-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3156 ; GENERIC-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] 3157 ; GENERIC-NEXT: retq # sched: [1:1.00] 3158 ; 3159 ; SLM-LABEL: test_pminuw: 3160 ; SLM: # %bb.0: 3161 ; SLM-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3162 ; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00] 3163 ; SLM-NEXT: retq # sched: [4:1.00] 3164 ; 3165 ; SANDY-SSE-LABEL: test_pminuw: 3166 ; SANDY-SSE: # %bb.0: 3167 ; SANDY-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3168 ; SANDY-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] 3169 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3170 ; 3171 ; SANDY-LABEL: test_pminuw: 3172 ; SANDY: # %bb.0: 3173 ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3174 ; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3175 ; SANDY-NEXT: retq # sched: [1:1.00] 3176 ; 3177 ; HASWELL-SSE-LABEL: test_pminuw: 3178 ; HASWELL-SSE: # %bb.0: 3179 ; HASWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3180 ; HASWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] 3181 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3182 ; 3183 ; HASWELL-LABEL: test_pminuw: 3184 ; HASWELL: # %bb.0: 3185 ; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3186 ; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3187 ; HASWELL-NEXT: retq # sched: [7:1.00] 3188 ; 3189 ; BROADWELL-SSE-LABEL: test_pminuw: 3190 ; BROADWELL-SSE: # %bb.0: 3191 ; BROADWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3192 ; BROADWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:0.50] 3193 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3194 ; 3195 ; BROADWELL-LABEL: test_pminuw: 3196 ; BROADWELL: # %bb.0: 3197 ; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3198 ; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 3199 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3200 ; 3201 ; SKYLAKE-SSE-LABEL: test_pminuw: 3202 ; SKYLAKE-SSE: # %bb.0: 3203 ; SKYLAKE-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3204 ; SKYLAKE-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] 3205 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3206 ; 3207 ; SKYLAKE-LABEL: test_pminuw: 3208 ; SKYLAKE: # %bb.0: 3209 ; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3210 ; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3211 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3212 ; 3213 ; SKX-SSE-LABEL: test_pminuw: 3214 ; SKX-SSE: # %bb.0: 3215 ; SKX-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3216 ; SKX-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] 3217 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3218 ; 3219 ; SKX-LABEL: test_pminuw: 3220 ; SKX: # %bb.0: 3221 ; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3222 ; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3223 ; SKX-NEXT: retq # sched: [7:1.00] 3224 ; 3225 ; BTVER2-SSE-LABEL: test_pminuw: 3226 ; BTVER2-SSE: # %bb.0: 3227 ; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] 3228 ; BTVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:1.00] 3229 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3230 ; 3231 ; BTVER2-LABEL: test_pminuw: 3232 ; BTVER2: # %bb.0: 3233 ; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3234 ; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 3235 ; BTVER2-NEXT: retq # sched: [4:1.00] 3236 ; 3237 ; ZNVER1-SSE-LABEL: test_pminuw: 3238 ; ZNVER1-SSE: # %bb.0: 3239 ; ZNVER1-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.25] 3240 ; ZNVER1-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [8:0.50] 3241 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3242 ; 3243 ; ZNVER1-LABEL: test_pminuw: 3244 ; ZNVER1: # %bb.0: 3245 ; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3246 ; ZNVER1-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3247 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3248 %1 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) 3249 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 3250 %3 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %1, <8 x i16> %2) 3251 ret <8 x i16> %3 3252 } 3253 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 3254 3255 define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { 3256 ; GENERIC-LABEL: test_pmovsxbw: 3257 ; GENERIC: # %bb.0: 3258 ; GENERIC-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] 3259 ; GENERIC-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] 3260 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 3261 ; GENERIC-NEXT: retq # sched: [1:1.00] 3262 ; 3263 ; SLM-LABEL: test_pmovsxbw: 3264 ; SLM: # %bb.0: 3265 ; SLM-NEXT: pmovsxbw (%rdi), %xmm1 # sched: [4:1.00] 3266 ; SLM-NEXT: pmovsxbw %xmm0, %xmm0 # sched: [1:1.00] 3267 ; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] 3268 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 3269 ; SLM-NEXT: retq # sched: [4:1.00] 3270 ; 3271 ; SANDY-SSE-LABEL: test_pmovsxbw: 3272 ; SANDY-SSE: # %bb.0: 3273 ; SANDY-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] 3274 ; SANDY-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] 3275 ; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 3276 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3277 ; 3278 ; SANDY-LABEL: test_pmovsxbw: 3279 ; SANDY: # %bb.0: 3280 ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] 3281 ; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] 3282 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3283 ; SANDY-NEXT: retq # sched: [1:1.00] 3284 ; 3285 ; HASWELL-SSE-LABEL: test_pmovsxbw: 3286 ; HASWELL-SSE: # %bb.0: 3287 ; HASWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] 3288 ; HASWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] 3289 ; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 3290 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3291 ; 3292 ; HASWELL-LABEL: test_pmovsxbw: 3293 ; HASWELL: # %bb.0: 3294 ; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] 3295 ; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] 3296 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3297 ; HASWELL-NEXT: retq # sched: [7:1.00] 3298 ; 3299 ; BROADWELL-SSE-LABEL: test_pmovsxbw: 3300 ; BROADWELL-SSE: # %bb.0: 3301 ; BROADWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] 3302 ; BROADWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] 3303 ; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 3304 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3305 ; 3306 ; BROADWELL-LABEL: test_pmovsxbw: 3307 ; BROADWELL: # %bb.0: 3308 ; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] 3309 ; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] 3310 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3311 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3312 ; 3313 ; SKYLAKE-SSE-LABEL: test_pmovsxbw: 3314 ; SKYLAKE-SSE: # %bb.0: 3315 ; SKYLAKE-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] 3316 ; SKYLAKE-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] 3317 ; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 3318 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3319 ; 3320 ; SKYLAKE-LABEL: test_pmovsxbw: 3321 ; SKYLAKE: # %bb.0: 3322 ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] 3323 ; SKYLAKE-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] 3324 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3325 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3326 ; 3327 ; SKX-SSE-LABEL: test_pmovsxbw: 3328 ; SKX-SSE: # %bb.0: 3329 ; SKX-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] 3330 ; SKX-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] 3331 ; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 3332 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3333 ; 3334 ; SKX-LABEL: test_pmovsxbw: 3335 ; SKX: # %bb.0: 3336 ; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] 3337 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] 3338 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3339 ; SKX-NEXT: retq # sched: [7:1.00] 3340 ; 3341 ; BTVER2-SSE-LABEL: test_pmovsxbw: 3342 ; BTVER2-SSE: # %bb.0: 3343 ; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] 3344 ; BTVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] 3345 ; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 3346 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3347 ; 3348 ; BTVER2-LABEL: test_pmovsxbw: 3349 ; BTVER2: # %bb.0: 3350 ; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] 3351 ; BTVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] 3352 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3353 ; BTVER2-NEXT: retq # sched: [4:1.00] 3354 ; 3355 ; ZNVER1-SSE-LABEL: test_pmovsxbw: 3356 ; ZNVER1-SSE: # %bb.0: 3357 ; ZNVER1-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.25] 3358 ; ZNVER1-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [8:0.50] 3359 ; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] 3360 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3361 ; 3362 ; ZNVER1-LABEL: test_pmovsxbw: 3363 ; ZNVER1: # %bb.0: 3364 ; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50] 3365 ; ZNVER1-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.25] 3366 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3367 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3368 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3369 %2 = sext <8 x i8> %1 to <8 x i16> 3370 %3 = load <8 x i8>, <8 x i8>* %a1, align 1 3371 %4 = sext <8 x i8> %3 to <8 x i16> 3372 %5 = add <8 x i16> %2, %4 3373 ret <8 x i16> %5 3374 } 3375 3376 define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { 3377 ; GENERIC-LABEL: test_pmovsxbd: 3378 ; GENERIC: # %bb.0: 3379 ; GENERIC-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] 3380 ; GENERIC-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] 3381 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3382 ; GENERIC-NEXT: retq # sched: [1:1.00] 3383 ; 3384 ; SLM-LABEL: test_pmovsxbd: 3385 ; SLM: # %bb.0: 3386 ; SLM-NEXT: pmovsxbd (%rdi), %xmm1 # sched: [4:1.00] 3387 ; SLM-NEXT: pmovsxbd %xmm0, %xmm0 # sched: [1:1.00] 3388 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 3389 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 3390 ; SLM-NEXT: retq # sched: [4:1.00] 3391 ; 3392 ; SANDY-SSE-LABEL: test_pmovsxbd: 3393 ; SANDY-SSE: # %bb.0: 3394 ; SANDY-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] 3395 ; SANDY-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] 3396 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3397 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3398 ; 3399 ; SANDY-LABEL: test_pmovsxbd: 3400 ; SANDY: # %bb.0: 3401 ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] 3402 ; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] 3403 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3404 ; SANDY-NEXT: retq # sched: [1:1.00] 3405 ; 3406 ; HASWELL-SSE-LABEL: test_pmovsxbd: 3407 ; HASWELL-SSE: # %bb.0: 3408 ; HASWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] 3409 ; HASWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] 3410 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3411 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3412 ; 3413 ; HASWELL-LABEL: test_pmovsxbd: 3414 ; HASWELL: # %bb.0: 3415 ; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] 3416 ; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] 3417 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3418 ; HASWELL-NEXT: retq # sched: [7:1.00] 3419 ; 3420 ; BROADWELL-SSE-LABEL: test_pmovsxbd: 3421 ; BROADWELL-SSE: # %bb.0: 3422 ; BROADWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] 3423 ; BROADWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] 3424 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3425 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3426 ; 3427 ; BROADWELL-LABEL: test_pmovsxbd: 3428 ; BROADWELL: # %bb.0: 3429 ; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] 3430 ; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] 3431 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3432 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3433 ; 3434 ; SKYLAKE-SSE-LABEL: test_pmovsxbd: 3435 ; SKYLAKE-SSE: # %bb.0: 3436 ; SKYLAKE-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] 3437 ; SKYLAKE-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] 3438 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 3439 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3440 ; 3441 ; SKYLAKE-LABEL: test_pmovsxbd: 3442 ; SKYLAKE: # %bb.0: 3443 ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] 3444 ; SKYLAKE-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] 3445 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3446 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3447 ; 3448 ; SKX-SSE-LABEL: test_pmovsxbd: 3449 ; SKX-SSE: # %bb.0: 3450 ; SKX-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] 3451 ; SKX-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] 3452 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 3453 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3454 ; 3455 ; SKX-LABEL: test_pmovsxbd: 3456 ; SKX: # %bb.0: 3457 ; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] 3458 ; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] 3459 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3460 ; SKX-NEXT: retq # sched: [7:1.00] 3461 ; 3462 ; BTVER2-SSE-LABEL: test_pmovsxbd: 3463 ; BTVER2-SSE: # %bb.0: 3464 ; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] 3465 ; BTVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] 3466 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3467 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3468 ; 3469 ; BTVER2-LABEL: test_pmovsxbd: 3470 ; BTVER2: # %bb.0: 3471 ; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] 3472 ; BTVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] 3473 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3474 ; BTVER2-NEXT: retq # sched: [4:1.00] 3475 ; 3476 ; ZNVER1-SSE-LABEL: test_pmovsxbd: 3477 ; ZNVER1-SSE: # %bb.0: 3478 ; ZNVER1-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.25] 3479 ; ZNVER1-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [8:0.50] 3480 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 3481 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3482 ; 3483 ; ZNVER1-LABEL: test_pmovsxbd: 3484 ; ZNVER1: # %bb.0: 3485 ; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50] 3486 ; ZNVER1-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.25] 3487 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3488 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3489 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3490 %2 = sext <4 x i8> %1 to <4 x i32> 3491 %3 = load <4 x i8>, <4 x i8>* %a1, align 1 3492 %4 = sext <4 x i8> %3 to <4 x i32> 3493 %5 = add <4 x i32> %2, %4 3494 ret <4 x i32> %5 3495 } 3496 3497 define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { 3498 ; GENERIC-LABEL: test_pmovsxbq: 3499 ; GENERIC: # %bb.0: 3500 ; GENERIC-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] 3501 ; GENERIC-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] 3502 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3503 ; GENERIC-NEXT: retq # sched: [1:1.00] 3504 ; 3505 ; SLM-LABEL: test_pmovsxbq: 3506 ; SLM: # %bb.0: 3507 ; SLM-NEXT: pmovsxbq (%rdi), %xmm1 # sched: [4:1.00] 3508 ; SLM-NEXT: pmovsxbq %xmm0, %xmm0 # sched: [1:1.00] 3509 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 3510 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 3511 ; SLM-NEXT: retq # sched: [4:1.00] 3512 ; 3513 ; SANDY-SSE-LABEL: test_pmovsxbq: 3514 ; SANDY-SSE: # %bb.0: 3515 ; SANDY-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] 3516 ; SANDY-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] 3517 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3518 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3519 ; 3520 ; SANDY-LABEL: test_pmovsxbq: 3521 ; SANDY: # %bb.0: 3522 ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] 3523 ; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] 3524 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3525 ; SANDY-NEXT: retq # sched: [1:1.00] 3526 ; 3527 ; HASWELL-SSE-LABEL: test_pmovsxbq: 3528 ; HASWELL-SSE: # %bb.0: 3529 ; HASWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] 3530 ; HASWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] 3531 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3532 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3533 ; 3534 ; HASWELL-LABEL: test_pmovsxbq: 3535 ; HASWELL: # %bb.0: 3536 ; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] 3537 ; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] 3538 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3539 ; HASWELL-NEXT: retq # sched: [7:1.00] 3540 ; 3541 ; BROADWELL-SSE-LABEL: test_pmovsxbq: 3542 ; BROADWELL-SSE: # %bb.0: 3543 ; BROADWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] 3544 ; BROADWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] 3545 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3546 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3547 ; 3548 ; BROADWELL-LABEL: test_pmovsxbq: 3549 ; BROADWELL: # %bb.0: 3550 ; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] 3551 ; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] 3552 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3553 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3554 ; 3555 ; SKYLAKE-SSE-LABEL: test_pmovsxbq: 3556 ; SKYLAKE-SSE: # %bb.0: 3557 ; SKYLAKE-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] 3558 ; SKYLAKE-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] 3559 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 3560 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3561 ; 3562 ; SKYLAKE-LABEL: test_pmovsxbq: 3563 ; SKYLAKE: # %bb.0: 3564 ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] 3565 ; SKYLAKE-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] 3566 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3567 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3568 ; 3569 ; SKX-SSE-LABEL: test_pmovsxbq: 3570 ; SKX-SSE: # %bb.0: 3571 ; SKX-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] 3572 ; SKX-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] 3573 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 3574 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3575 ; 3576 ; SKX-LABEL: test_pmovsxbq: 3577 ; SKX: # %bb.0: 3578 ; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] 3579 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] 3580 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3581 ; SKX-NEXT: retq # sched: [7:1.00] 3582 ; 3583 ; BTVER2-SSE-LABEL: test_pmovsxbq: 3584 ; BTVER2-SSE: # %bb.0: 3585 ; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] 3586 ; BTVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] 3587 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3588 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3589 ; 3590 ; BTVER2-LABEL: test_pmovsxbq: 3591 ; BTVER2: # %bb.0: 3592 ; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] 3593 ; BTVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] 3594 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3595 ; BTVER2-NEXT: retq # sched: [4:1.00] 3596 ; 3597 ; ZNVER1-SSE-LABEL: test_pmovsxbq: 3598 ; ZNVER1-SSE: # %bb.0: 3599 ; ZNVER1-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.25] 3600 ; ZNVER1-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [8:0.50] 3601 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 3602 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3603 ; 3604 ; ZNVER1-LABEL: test_pmovsxbq: 3605 ; ZNVER1: # %bb.0: 3606 ; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50] 3607 ; ZNVER1-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.25] 3608 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3609 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3610 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 3611 %2 = sext <2 x i8> %1 to <2 x i64> 3612 %3 = load <2 x i8>, <2 x i8>* %a1, align 1 3613 %4 = sext <2 x i8> %3 to <2 x i64> 3614 %5 = add <2 x i64> %2, %4 3615 ret <2 x i64> %5 3616 } 3617 3618 define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { 3619 ; GENERIC-LABEL: test_pmovsxdq: 3620 ; GENERIC: # %bb.0: 3621 ; GENERIC-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] 3622 ; GENERIC-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] 3623 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3624 ; GENERIC-NEXT: retq # sched: [1:1.00] 3625 ; 3626 ; SLM-LABEL: test_pmovsxdq: 3627 ; SLM: # %bb.0: 3628 ; SLM-NEXT: pmovsxdq (%rdi), %xmm1 # sched: [4:1.00] 3629 ; SLM-NEXT: pmovsxdq %xmm0, %xmm0 # sched: [1:1.00] 3630 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 3631 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 3632 ; SLM-NEXT: retq # sched: [4:1.00] 3633 ; 3634 ; SANDY-SSE-LABEL: test_pmovsxdq: 3635 ; SANDY-SSE: # %bb.0: 3636 ; SANDY-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] 3637 ; SANDY-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] 3638 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3639 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3640 ; 3641 ; SANDY-LABEL: test_pmovsxdq: 3642 ; SANDY: # %bb.0: 3643 ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] 3644 ; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] 3645 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3646 ; SANDY-NEXT: retq # sched: [1:1.00] 3647 ; 3648 ; HASWELL-SSE-LABEL: test_pmovsxdq: 3649 ; HASWELL-SSE: # %bb.0: 3650 ; HASWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] 3651 ; HASWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] 3652 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3653 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3654 ; 3655 ; HASWELL-LABEL: test_pmovsxdq: 3656 ; HASWELL: # %bb.0: 3657 ; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] 3658 ; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] 3659 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3660 ; HASWELL-NEXT: retq # sched: [7:1.00] 3661 ; 3662 ; BROADWELL-SSE-LABEL: test_pmovsxdq: 3663 ; BROADWELL-SSE: # %bb.0: 3664 ; BROADWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] 3665 ; BROADWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] 3666 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3667 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3668 ; 3669 ; BROADWELL-LABEL: test_pmovsxdq: 3670 ; BROADWELL: # %bb.0: 3671 ; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] 3672 ; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] 3673 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3674 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3675 ; 3676 ; SKYLAKE-SSE-LABEL: test_pmovsxdq: 3677 ; SKYLAKE-SSE: # %bb.0: 3678 ; SKYLAKE-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] 3679 ; SKYLAKE-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] 3680 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 3681 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3682 ; 3683 ; SKYLAKE-LABEL: test_pmovsxdq: 3684 ; SKYLAKE: # %bb.0: 3685 ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] 3686 ; SKYLAKE-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] 3687 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3688 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3689 ; 3690 ; SKX-SSE-LABEL: test_pmovsxdq: 3691 ; SKX-SSE: # %bb.0: 3692 ; SKX-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] 3693 ; SKX-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] 3694 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 3695 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3696 ; 3697 ; SKX-LABEL: test_pmovsxdq: 3698 ; SKX: # %bb.0: 3699 ; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] 3700 ; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] 3701 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3702 ; SKX-NEXT: retq # sched: [7:1.00] 3703 ; 3704 ; BTVER2-SSE-LABEL: test_pmovsxdq: 3705 ; BTVER2-SSE: # %bb.0: 3706 ; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] 3707 ; BTVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] 3708 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3709 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3710 ; 3711 ; BTVER2-LABEL: test_pmovsxdq: 3712 ; BTVER2: # %bb.0: 3713 ; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] 3714 ; BTVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] 3715 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3716 ; BTVER2-NEXT: retq # sched: [4:1.00] 3717 ; 3718 ; ZNVER1-SSE-LABEL: test_pmovsxdq: 3719 ; ZNVER1-SSE: # %bb.0: 3720 ; ZNVER1-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.25] 3721 ; ZNVER1-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [8:0.50] 3722 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 3723 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3724 ; 3725 ; ZNVER1-LABEL: test_pmovsxdq: 3726 ; ZNVER1: # %bb.0: 3727 ; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50] 3728 ; ZNVER1-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.25] 3729 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3730 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3731 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 3732 %2 = sext <2 x i32> %1 to <2 x i64> 3733 %3 = load <2 x i32>, <2 x i32>* %a1, align 1 3734 %4 = sext <2 x i32> %3 to <2 x i64> 3735 %5 = add <2 x i64> %2, %4 3736 ret <2 x i64> %5 3737 } 3738 3739 define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { 3740 ; GENERIC-LABEL: test_pmovsxwd: 3741 ; GENERIC: # %bb.0: 3742 ; GENERIC-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] 3743 ; GENERIC-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] 3744 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3745 ; GENERIC-NEXT: retq # sched: [1:1.00] 3746 ; 3747 ; SLM-LABEL: test_pmovsxwd: 3748 ; SLM: # %bb.0: 3749 ; SLM-NEXT: pmovsxwd (%rdi), %xmm1 # sched: [4:1.00] 3750 ; SLM-NEXT: pmovsxwd %xmm0, %xmm0 # sched: [1:1.00] 3751 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 3752 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 3753 ; SLM-NEXT: retq # sched: [4:1.00] 3754 ; 3755 ; SANDY-SSE-LABEL: test_pmovsxwd: 3756 ; SANDY-SSE: # %bb.0: 3757 ; SANDY-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] 3758 ; SANDY-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] 3759 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3760 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3761 ; 3762 ; SANDY-LABEL: test_pmovsxwd: 3763 ; SANDY: # %bb.0: 3764 ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] 3765 ; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] 3766 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3767 ; SANDY-NEXT: retq # sched: [1:1.00] 3768 ; 3769 ; HASWELL-SSE-LABEL: test_pmovsxwd: 3770 ; HASWELL-SSE: # %bb.0: 3771 ; HASWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] 3772 ; HASWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] 3773 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3774 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3775 ; 3776 ; HASWELL-LABEL: test_pmovsxwd: 3777 ; HASWELL: # %bb.0: 3778 ; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] 3779 ; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] 3780 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3781 ; HASWELL-NEXT: retq # sched: [7:1.00] 3782 ; 3783 ; BROADWELL-SSE-LABEL: test_pmovsxwd: 3784 ; BROADWELL-SSE: # %bb.0: 3785 ; BROADWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] 3786 ; BROADWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] 3787 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3788 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3789 ; 3790 ; BROADWELL-LABEL: test_pmovsxwd: 3791 ; BROADWELL: # %bb.0: 3792 ; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] 3793 ; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] 3794 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3795 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3796 ; 3797 ; SKYLAKE-SSE-LABEL: test_pmovsxwd: 3798 ; SKYLAKE-SSE: # %bb.0: 3799 ; SKYLAKE-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] 3800 ; SKYLAKE-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] 3801 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 3802 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3803 ; 3804 ; SKYLAKE-LABEL: test_pmovsxwd: 3805 ; SKYLAKE: # %bb.0: 3806 ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] 3807 ; SKYLAKE-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] 3808 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3809 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3810 ; 3811 ; SKX-SSE-LABEL: test_pmovsxwd: 3812 ; SKX-SSE: # %bb.0: 3813 ; SKX-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] 3814 ; SKX-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] 3815 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 3816 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3817 ; 3818 ; SKX-LABEL: test_pmovsxwd: 3819 ; SKX: # %bb.0: 3820 ; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] 3821 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] 3822 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3823 ; SKX-NEXT: retq # sched: [7:1.00] 3824 ; 3825 ; BTVER2-SSE-LABEL: test_pmovsxwd: 3826 ; BTVER2-SSE: # %bb.0: 3827 ; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] 3828 ; BTVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] 3829 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 3830 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3831 ; 3832 ; BTVER2-LABEL: test_pmovsxwd: 3833 ; BTVER2: # %bb.0: 3834 ; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] 3835 ; BTVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] 3836 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3837 ; BTVER2-NEXT: retq # sched: [4:1.00] 3838 ; 3839 ; ZNVER1-SSE-LABEL: test_pmovsxwd: 3840 ; ZNVER1-SSE: # %bb.0: 3841 ; ZNVER1-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.25] 3842 ; ZNVER1-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [8:0.50] 3843 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 3844 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3845 ; 3846 ; ZNVER1-LABEL: test_pmovsxwd: 3847 ; ZNVER1: # %bb.0: 3848 ; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50] 3849 ; ZNVER1-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.25] 3850 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3851 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3852 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3853 %2 = sext <4 x i16> %1 to <4 x i32> 3854 %3 = load <4 x i16>, <4 x i16>* %a1, align 1 3855 %4 = sext <4 x i16> %3 to <4 x i32> 3856 %5 = add <4 x i32> %2, %4 3857 ret <4 x i32> %5 3858 } 3859 3860 define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { 3861 ; GENERIC-LABEL: test_pmovsxwq: 3862 ; GENERIC: # %bb.0: 3863 ; GENERIC-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] 3864 ; GENERIC-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] 3865 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3866 ; GENERIC-NEXT: retq # sched: [1:1.00] 3867 ; 3868 ; SLM-LABEL: test_pmovsxwq: 3869 ; SLM: # %bb.0: 3870 ; SLM-NEXT: pmovsxwq (%rdi), %xmm1 # sched: [4:1.00] 3871 ; SLM-NEXT: pmovsxwq %xmm0, %xmm0 # sched: [1:1.00] 3872 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 3873 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 3874 ; SLM-NEXT: retq # sched: [4:1.00] 3875 ; 3876 ; SANDY-SSE-LABEL: test_pmovsxwq: 3877 ; SANDY-SSE: # %bb.0: 3878 ; SANDY-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] 3879 ; SANDY-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] 3880 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3881 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3882 ; 3883 ; SANDY-LABEL: test_pmovsxwq: 3884 ; SANDY: # %bb.0: 3885 ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] 3886 ; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] 3887 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3888 ; SANDY-NEXT: retq # sched: [1:1.00] 3889 ; 3890 ; HASWELL-SSE-LABEL: test_pmovsxwq: 3891 ; HASWELL-SSE: # %bb.0: 3892 ; HASWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] 3893 ; HASWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] 3894 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3895 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3896 ; 3897 ; HASWELL-LABEL: test_pmovsxwq: 3898 ; HASWELL: # %bb.0: 3899 ; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] 3900 ; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] 3901 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3902 ; HASWELL-NEXT: retq # sched: [7:1.00] 3903 ; 3904 ; BROADWELL-SSE-LABEL: test_pmovsxwq: 3905 ; BROADWELL-SSE: # %bb.0: 3906 ; BROADWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] 3907 ; BROADWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] 3908 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3909 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3910 ; 3911 ; BROADWELL-LABEL: test_pmovsxwq: 3912 ; BROADWELL: # %bb.0: 3913 ; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] 3914 ; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] 3915 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3916 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3917 ; 3918 ; SKYLAKE-SSE-LABEL: test_pmovsxwq: 3919 ; SKYLAKE-SSE: # %bb.0: 3920 ; SKYLAKE-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] 3921 ; SKYLAKE-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] 3922 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 3923 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3924 ; 3925 ; SKYLAKE-LABEL: test_pmovsxwq: 3926 ; SKYLAKE: # %bb.0: 3927 ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] 3928 ; SKYLAKE-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] 3929 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3930 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3931 ; 3932 ; SKX-SSE-LABEL: test_pmovsxwq: 3933 ; SKX-SSE: # %bb.0: 3934 ; SKX-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] 3935 ; SKX-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] 3936 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 3937 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3938 ; 3939 ; SKX-LABEL: test_pmovsxwq: 3940 ; SKX: # %bb.0: 3941 ; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] 3942 ; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] 3943 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3944 ; SKX-NEXT: retq # sched: [7:1.00] 3945 ; 3946 ; BTVER2-SSE-LABEL: test_pmovsxwq: 3947 ; BTVER2-SSE: # %bb.0: 3948 ; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] 3949 ; BTVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] 3950 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 3951 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3952 ; 3953 ; BTVER2-LABEL: test_pmovsxwq: 3954 ; BTVER2: # %bb.0: 3955 ; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] 3956 ; BTVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] 3957 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3958 ; BTVER2-NEXT: retq # sched: [4:1.00] 3959 ; 3960 ; ZNVER1-SSE-LABEL: test_pmovsxwq: 3961 ; ZNVER1-SSE: # %bb.0: 3962 ; ZNVER1-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.25] 3963 ; ZNVER1-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [8:0.50] 3964 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 3965 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3966 ; 3967 ; ZNVER1-LABEL: test_pmovsxwq: 3968 ; ZNVER1: # %bb.0: 3969 ; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50] 3970 ; ZNVER1-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.25] 3971 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3972 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3973 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 3974 %2 = sext <2 x i16> %1 to <2 x i64> 3975 %3 = load <2 x i16>, <2 x i16>* %a1, align 1 3976 %4 = sext <2 x i16> %3 to <2 x i64> 3977 %5 = add <2 x i64> %2, %4 3978 ret <2 x i64> %5 3979 } 3980 3981 define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { 3982 ; GENERIC-LABEL: test_pmovzxbw: 3983 ; GENERIC: # %bb.0: 3984 ; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] 3985 ; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] 3986 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 3987 ; GENERIC-NEXT: retq # sched: [1:1.00] 3988 ; 3989 ; SLM-LABEL: test_pmovzxbw: 3990 ; SLM: # %bb.0: 3991 ; SLM-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [4:1.00] 3992 ; SLM-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 3993 ; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] 3994 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 3995 ; SLM-NEXT: retq # sched: [4:1.00] 3996 ; 3997 ; SANDY-SSE-LABEL: test_pmovzxbw: 3998 ; SANDY-SSE: # %bb.0: 3999 ; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] 4000 ; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] 4001 ; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 4002 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4003 ; 4004 ; SANDY-LABEL: test_pmovzxbw: 4005 ; SANDY: # %bb.0: 4006 ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] 4007 ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] 4008 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4009 ; SANDY-NEXT: retq # sched: [1:1.00] 4010 ; 4011 ; HASWELL-SSE-LABEL: test_pmovzxbw: 4012 ; HASWELL-SSE: # %bb.0: 4013 ; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4014 ; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4015 ; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 4016 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4017 ; 4018 ; HASWELL-LABEL: test_pmovzxbw: 4019 ; HASWELL: # %bb.0: 4020 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4021 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4022 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4023 ; HASWELL-NEXT: retq # sched: [7:1.00] 4024 ; 4025 ; BROADWELL-SSE-LABEL: test_pmovzxbw: 4026 ; BROADWELL-SSE: # %bb.0: 4027 ; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4028 ; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4029 ; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 4030 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4031 ; 4032 ; BROADWELL-LABEL: test_pmovzxbw: 4033 ; BROADWELL: # %bb.0: 4034 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4035 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4036 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4037 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4038 ; 4039 ; SKYLAKE-SSE-LABEL: test_pmovzxbw: 4040 ; SKYLAKE-SSE: # %bb.0: 4041 ; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4042 ; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4043 ; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 4044 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4045 ; 4046 ; SKYLAKE-LABEL: test_pmovzxbw: 4047 ; SKYLAKE: # %bb.0: 4048 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4049 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4050 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4051 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4052 ; 4053 ; SKX-SSE-LABEL: test_pmovzxbw: 4054 ; SKX-SSE: # %bb.0: 4055 ; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4056 ; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4057 ; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 4058 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4059 ; 4060 ; SKX-LABEL: test_pmovzxbw: 4061 ; SKX: # %bb.0: 4062 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4063 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4064 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4065 ; SKX-NEXT: retq # sched: [7:1.00] 4066 ; 4067 ; BTVER2-SSE-LABEL: test_pmovzxbw: 4068 ; BTVER2-SSE: # %bb.0: 4069 ; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] 4070 ; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4071 ; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 4072 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4073 ; 4074 ; BTVER2-LABEL: test_pmovzxbw: 4075 ; BTVER2: # %bb.0: 4076 ; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] 4077 ; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] 4078 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4079 ; BTVER2-NEXT: retq # sched: [4:1.00] 4080 ; 4081 ; ZNVER1-SSE-LABEL: test_pmovzxbw: 4082 ; ZNVER1-SSE: # %bb.0: 4083 ; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] 4084 ; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] 4085 ; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] 4086 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4087 ; 4088 ; ZNVER1-LABEL: test_pmovzxbw: 4089 ; ZNVER1: # %bb.0: 4090 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] 4091 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] 4092 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 4093 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4094 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4095 %2 = zext <8 x i8> %1 to <8 x i16> 4096 %3 = load <8 x i8>, <8 x i8>* %a1, align 1 4097 %4 = zext <8 x i8> %3 to <8 x i16> 4098 %5 = add <8 x i16> %2, %4 4099 ret <8 x i16> %5 4100 } 4101 4102 define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { 4103 ; GENERIC-LABEL: test_pmovzxbd: 4104 ; GENERIC: # %bb.0: 4105 ; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] 4106 ; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] 4107 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4108 ; GENERIC-NEXT: retq # sched: [1:1.00] 4109 ; 4110 ; SLM-LABEL: test_pmovzxbd: 4111 ; SLM: # %bb.0: 4112 ; SLM-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [4:1.00] 4113 ; SLM-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4114 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4115 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 4116 ; SLM-NEXT: retq # sched: [4:1.00] 4117 ; 4118 ; SANDY-SSE-LABEL: test_pmovzxbd: 4119 ; SANDY-SSE: # %bb.0: 4120 ; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] 4121 ; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] 4122 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4123 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4124 ; 4125 ; SANDY-LABEL: test_pmovzxbd: 4126 ; SANDY: # %bb.0: 4127 ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] 4128 ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] 4129 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4130 ; SANDY-NEXT: retq # sched: [1:1.00] 4131 ; 4132 ; HASWELL-SSE-LABEL: test_pmovzxbd: 4133 ; HASWELL-SSE: # %bb.0: 4134 ; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4135 ; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4136 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4137 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4138 ; 4139 ; HASWELL-LABEL: test_pmovzxbd: 4140 ; HASWELL: # %bb.0: 4141 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4142 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4143 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4144 ; HASWELL-NEXT: retq # sched: [7:1.00] 4145 ; 4146 ; BROADWELL-SSE-LABEL: test_pmovzxbd: 4147 ; BROADWELL-SSE: # %bb.0: 4148 ; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4149 ; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4150 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4151 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4152 ; 4153 ; BROADWELL-LABEL: test_pmovzxbd: 4154 ; BROADWELL: # %bb.0: 4155 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4156 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4157 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4158 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4159 ; 4160 ; SKYLAKE-SSE-LABEL: test_pmovzxbd: 4161 ; SKYLAKE-SSE: # %bb.0: 4162 ; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4163 ; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4164 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 4165 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4166 ; 4167 ; SKYLAKE-LABEL: test_pmovzxbd: 4168 ; SKYLAKE: # %bb.0: 4169 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4170 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4171 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4172 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4173 ; 4174 ; SKX-SSE-LABEL: test_pmovzxbd: 4175 ; SKX-SSE: # %bb.0: 4176 ; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4177 ; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4178 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 4179 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4180 ; 4181 ; SKX-LABEL: test_pmovzxbd: 4182 ; SKX: # %bb.0: 4183 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4184 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4185 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4186 ; SKX-NEXT: retq # sched: [7:1.00] 4187 ; 4188 ; BTVER2-SSE-LABEL: test_pmovzxbd: 4189 ; BTVER2-SSE: # %bb.0: 4190 ; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] 4191 ; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4192 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4193 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4194 ; 4195 ; BTVER2-LABEL: test_pmovzxbd: 4196 ; BTVER2: # %bb.0: 4197 ; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] 4198 ; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] 4199 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4200 ; BTVER2-NEXT: retq # sched: [4:1.00] 4201 ; 4202 ; ZNVER1-SSE-LABEL: test_pmovzxbd: 4203 ; ZNVER1-SSE: # %bb.0: 4204 ; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] 4205 ; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] 4206 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 4207 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4208 ; 4209 ; ZNVER1-LABEL: test_pmovzxbd: 4210 ; ZNVER1: # %bb.0: 4211 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] 4212 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] 4213 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 4214 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4215 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4216 %2 = zext <4 x i8> %1 to <4 x i32> 4217 %3 = load <4 x i8>, <4 x i8>* %a1, align 1 4218 %4 = zext <4 x i8> %3 to <4 x i32> 4219 %5 = add <4 x i32> %2, %4 4220 ret <4 x i32> %5 4221 } 4222 4223 define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { 4224 ; GENERIC-LABEL: test_pmovzxbq: 4225 ; GENERIC: # %bb.0: 4226 ; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] 4227 ; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] 4228 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4229 ; GENERIC-NEXT: retq # sched: [1:1.00] 4230 ; 4231 ; SLM-LABEL: test_pmovzxbq: 4232 ; SLM: # %bb.0: 4233 ; SLM-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [4:1.00] 4234 ; SLM-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4235 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4236 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 4237 ; SLM-NEXT: retq # sched: [4:1.00] 4238 ; 4239 ; SANDY-SSE-LABEL: test_pmovzxbq: 4240 ; SANDY-SSE: # %bb.0: 4241 ; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] 4242 ; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] 4243 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4244 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4245 ; 4246 ; SANDY-LABEL: test_pmovzxbq: 4247 ; SANDY: # %bb.0: 4248 ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] 4249 ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] 4250 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4251 ; SANDY-NEXT: retq # sched: [1:1.00] 4252 ; 4253 ; HASWELL-SSE-LABEL: test_pmovzxbq: 4254 ; HASWELL-SSE: # %bb.0: 4255 ; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4256 ; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4257 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4258 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4259 ; 4260 ; HASWELL-LABEL: test_pmovzxbq: 4261 ; HASWELL: # %bb.0: 4262 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4263 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4264 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4265 ; HASWELL-NEXT: retq # sched: [7:1.00] 4266 ; 4267 ; BROADWELL-SSE-LABEL: test_pmovzxbq: 4268 ; BROADWELL-SSE: # %bb.0: 4269 ; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4270 ; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4271 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4272 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4273 ; 4274 ; BROADWELL-LABEL: test_pmovzxbq: 4275 ; BROADWELL: # %bb.0: 4276 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4277 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4278 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4279 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4280 ; 4281 ; SKYLAKE-SSE-LABEL: test_pmovzxbq: 4282 ; SKYLAKE-SSE: # %bb.0: 4283 ; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4284 ; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4285 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 4286 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4287 ; 4288 ; SKYLAKE-LABEL: test_pmovzxbq: 4289 ; SKYLAKE: # %bb.0: 4290 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4291 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4292 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4293 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4294 ; 4295 ; SKX-SSE-LABEL: test_pmovzxbq: 4296 ; SKX-SSE: # %bb.0: 4297 ; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4298 ; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4299 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 4300 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4301 ; 4302 ; SKX-LABEL: test_pmovzxbq: 4303 ; SKX: # %bb.0: 4304 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4305 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4306 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4307 ; SKX-NEXT: retq # sched: [7:1.00] 4308 ; 4309 ; BTVER2-SSE-LABEL: test_pmovzxbq: 4310 ; BTVER2-SSE: # %bb.0: 4311 ; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] 4312 ; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4313 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4314 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4315 ; 4316 ; BTVER2-LABEL: test_pmovzxbq: 4317 ; BTVER2: # %bb.0: 4318 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] 4319 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] 4320 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4321 ; BTVER2-NEXT: retq # sched: [4:1.00] 4322 ; 4323 ; ZNVER1-SSE-LABEL: test_pmovzxbq: 4324 ; ZNVER1-SSE: # %bb.0: 4325 ; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25] 4326 ; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] 4327 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 4328 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4329 ; 4330 ; ZNVER1-LABEL: test_pmovzxbq: 4331 ; ZNVER1: # %bb.0: 4332 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] 4333 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25] 4334 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 4335 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4336 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 4337 %2 = zext <2 x i8> %1 to <2 x i64> 4338 %3 = load <2 x i8>, <2 x i8>* %a1, align 1 4339 %4 = zext <2 x i8> %3 to <2 x i64> 4340 %5 = add <2 x i64> %2, %4 4341 ret <2 x i64> %5 4342 } 4343 4344 define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { 4345 ; GENERIC-LABEL: test_pmovzxdq: 4346 ; GENERIC: # %bb.0: 4347 ; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] 4348 ; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] 4349 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4350 ; GENERIC-NEXT: retq # sched: [1:1.00] 4351 ; 4352 ; SLM-LABEL: test_pmovzxdq: 4353 ; SLM: # %bb.0: 4354 ; SLM-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [4:1.00] 4355 ; SLM-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4356 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4357 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 4358 ; SLM-NEXT: retq # sched: [4:1.00] 4359 ; 4360 ; SANDY-SSE-LABEL: test_pmovzxdq: 4361 ; SANDY-SSE: # %bb.0: 4362 ; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] 4363 ; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] 4364 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4365 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4366 ; 4367 ; SANDY-LABEL: test_pmovzxdq: 4368 ; SANDY: # %bb.0: 4369 ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] 4370 ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] 4371 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4372 ; SANDY-NEXT: retq # sched: [1:1.00] 4373 ; 4374 ; HASWELL-SSE-LABEL: test_pmovzxdq: 4375 ; HASWELL-SSE: # %bb.0: 4376 ; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4377 ; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] 4378 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4379 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4380 ; 4381 ; HASWELL-LABEL: test_pmovzxdq: 4382 ; HASWELL: # %bb.0: 4383 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4384 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] 4385 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4386 ; HASWELL-NEXT: retq # sched: [7:1.00] 4387 ; 4388 ; BROADWELL-SSE-LABEL: test_pmovzxdq: 4389 ; BROADWELL-SSE: # %bb.0: 4390 ; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4391 ; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] 4392 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4393 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4394 ; 4395 ; BROADWELL-LABEL: test_pmovzxdq: 4396 ; BROADWELL: # %bb.0: 4397 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4398 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] 4399 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4400 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4401 ; 4402 ; SKYLAKE-SSE-LABEL: test_pmovzxdq: 4403 ; SKYLAKE-SSE: # %bb.0: 4404 ; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4405 ; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] 4406 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 4407 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4408 ; 4409 ; SKYLAKE-LABEL: test_pmovzxdq: 4410 ; SKYLAKE: # %bb.0: 4411 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4412 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] 4413 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4414 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4415 ; 4416 ; SKX-SSE-LABEL: test_pmovzxdq: 4417 ; SKX-SSE: # %bb.0: 4418 ; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4419 ; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] 4420 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 4421 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4422 ; 4423 ; SKX-LABEL: test_pmovzxdq: 4424 ; SKX: # %bb.0: 4425 ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] 4426 ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] 4427 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4428 ; SKX-NEXT: retq # sched: [7:1.00] 4429 ; 4430 ; BTVER2-SSE-LABEL: test_pmovzxdq: 4431 ; BTVER2-SSE: # %bb.0: 4432 ; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] 4433 ; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] 4434 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4435 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4436 ; 4437 ; BTVER2-LABEL: test_pmovzxdq: 4438 ; BTVER2: # %bb.0: 4439 ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] 4440 ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] 4441 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4442 ; BTVER2-NEXT: retq # sched: [4:1.00] 4443 ; 4444 ; ZNVER1-SSE-LABEL: test_pmovzxdq: 4445 ; ZNVER1-SSE: # %bb.0: 4446 ; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25] 4447 ; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50] 4448 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 4449 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4450 ; 4451 ; ZNVER1-LABEL: test_pmovzxdq: 4452 ; ZNVER1: # %bb.0: 4453 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50] 4454 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25] 4455 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 4456 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4457 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 4458 %2 = zext <2 x i32> %1 to <2 x i64> 4459 %3 = load <2 x i32>, <2 x i32>* %a1, align 1 4460 %4 = zext <2 x i32> %3 to <2 x i64> 4461 %5 = add <2 x i64> %2, %4 4462 ret <2 x i64> %5 4463 } 4464 4465 define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { 4466 ; GENERIC-LABEL: test_pmovzxwd: 4467 ; GENERIC: # %bb.0: 4468 ; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] 4469 ; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] 4470 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4471 ; GENERIC-NEXT: retq # sched: [1:1.00] 4472 ; 4473 ; SLM-LABEL: test_pmovzxwd: 4474 ; SLM: # %bb.0: 4475 ; SLM-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [4:1.00] 4476 ; SLM-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4477 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4478 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 4479 ; SLM-NEXT: retq # sched: [4:1.00] 4480 ; 4481 ; SANDY-SSE-LABEL: test_pmovzxwd: 4482 ; SANDY-SSE: # %bb.0: 4483 ; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] 4484 ; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] 4485 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4486 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4487 ; 4488 ; SANDY-LABEL: test_pmovzxwd: 4489 ; SANDY: # %bb.0: 4490 ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] 4491 ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] 4492 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4493 ; SANDY-NEXT: retq # sched: [1:1.00] 4494 ; 4495 ; HASWELL-SSE-LABEL: test_pmovzxwd: 4496 ; HASWELL-SSE: # %bb.0: 4497 ; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4498 ; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4499 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4500 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4501 ; 4502 ; HASWELL-LABEL: test_pmovzxwd: 4503 ; HASWELL: # %bb.0: 4504 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4505 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4506 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4507 ; HASWELL-NEXT: retq # sched: [7:1.00] 4508 ; 4509 ; BROADWELL-SSE-LABEL: test_pmovzxwd: 4510 ; BROADWELL-SSE: # %bb.0: 4511 ; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4512 ; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4513 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4514 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4515 ; 4516 ; BROADWELL-LABEL: test_pmovzxwd: 4517 ; BROADWELL: # %bb.0: 4518 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4519 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4520 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4521 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4522 ; 4523 ; SKYLAKE-SSE-LABEL: test_pmovzxwd: 4524 ; SKYLAKE-SSE: # %bb.0: 4525 ; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4526 ; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4527 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 4528 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4529 ; 4530 ; SKYLAKE-LABEL: test_pmovzxwd: 4531 ; SKYLAKE: # %bb.0: 4532 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4533 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4534 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4535 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4536 ; 4537 ; SKX-SSE-LABEL: test_pmovzxwd: 4538 ; SKX-SSE: # %bb.0: 4539 ; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4540 ; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4541 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 4542 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4543 ; 4544 ; SKX-LABEL: test_pmovzxwd: 4545 ; SKX: # %bb.0: 4546 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4547 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4548 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4549 ; SKX-NEXT: retq # sched: [7:1.00] 4550 ; 4551 ; BTVER2-SSE-LABEL: test_pmovzxwd: 4552 ; BTVER2-SSE: # %bb.0: 4553 ; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] 4554 ; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4555 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 4556 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4557 ; 4558 ; BTVER2-LABEL: test_pmovzxwd: 4559 ; BTVER2: # %bb.0: 4560 ; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] 4561 ; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] 4562 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4563 ; BTVER2-NEXT: retq # sched: [4:1.00] 4564 ; 4565 ; ZNVER1-SSE-LABEL: test_pmovzxwd: 4566 ; ZNVER1-SSE: # %bb.0: 4567 ; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25] 4568 ; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] 4569 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 4570 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4571 ; 4572 ; ZNVER1-LABEL: test_pmovzxwd: 4573 ; ZNVER1: # %bb.0: 4574 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] 4575 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25] 4576 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 4577 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4578 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4579 %2 = zext <4 x i16> %1 to <4 x i32> 4580 %3 = load <4 x i16>, <4 x i16>* %a1, align 1 4581 %4 = zext <4 x i16> %3 to <4 x i32> 4582 %5 = add <4 x i32> %2, %4 4583 ret <4 x i32> %5 4584 } 4585 4586 define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { 4587 ; GENERIC-LABEL: test_pmovzxwq: 4588 ; GENERIC: # %bb.0: 4589 ; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] 4590 ; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] 4591 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4592 ; GENERIC-NEXT: retq # sched: [1:1.00] 4593 ; 4594 ; SLM-LABEL: test_pmovzxwq: 4595 ; SLM: # %bb.0: 4596 ; SLM-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [4:1.00] 4597 ; SLM-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4598 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4599 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 4600 ; SLM-NEXT: retq # sched: [4:1.00] 4601 ; 4602 ; SANDY-SSE-LABEL: test_pmovzxwq: 4603 ; SANDY-SSE: # %bb.0: 4604 ; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] 4605 ; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] 4606 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4607 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4608 ; 4609 ; SANDY-LABEL: test_pmovzxwq: 4610 ; SANDY: # %bb.0: 4611 ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] 4612 ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] 4613 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4614 ; SANDY-NEXT: retq # sched: [1:1.00] 4615 ; 4616 ; HASWELL-SSE-LABEL: test_pmovzxwq: 4617 ; HASWELL-SSE: # %bb.0: 4618 ; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4619 ; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4620 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4621 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4622 ; 4623 ; HASWELL-LABEL: test_pmovzxwq: 4624 ; HASWELL: # %bb.0: 4625 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4626 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4627 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4628 ; HASWELL-NEXT: retq # sched: [7:1.00] 4629 ; 4630 ; BROADWELL-SSE-LABEL: test_pmovzxwq: 4631 ; BROADWELL-SSE: # %bb.0: 4632 ; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4633 ; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4634 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4635 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4636 ; 4637 ; BROADWELL-LABEL: test_pmovzxwq: 4638 ; BROADWELL: # %bb.0: 4639 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4640 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4641 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4642 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4643 ; 4644 ; SKYLAKE-SSE-LABEL: test_pmovzxwq: 4645 ; SKYLAKE-SSE: # %bb.0: 4646 ; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4647 ; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4648 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 4649 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4650 ; 4651 ; SKYLAKE-LABEL: test_pmovzxwq: 4652 ; SKYLAKE: # %bb.0: 4653 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4654 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4655 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4656 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4657 ; 4658 ; SKX-SSE-LABEL: test_pmovzxwq: 4659 ; SKX-SSE: # %bb.0: 4660 ; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4661 ; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4662 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 4663 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4664 ; 4665 ; SKX-LABEL: test_pmovzxwq: 4666 ; SKX: # %bb.0: 4667 ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] 4668 ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4669 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4670 ; SKX-NEXT: retq # sched: [7:1.00] 4671 ; 4672 ; BTVER2-SSE-LABEL: test_pmovzxwq: 4673 ; BTVER2-SSE: # %bb.0: 4674 ; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] 4675 ; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4676 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 4677 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4678 ; 4679 ; BTVER2-LABEL: test_pmovzxwq: 4680 ; BTVER2: # %bb.0: 4681 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] 4682 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] 4683 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4684 ; BTVER2-NEXT: retq # sched: [4:1.00] 4685 ; 4686 ; ZNVER1-SSE-LABEL: test_pmovzxwq: 4687 ; ZNVER1-SSE: # %bb.0: 4688 ; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25] 4689 ; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] 4690 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 4691 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4692 ; 4693 ; ZNVER1-LABEL: test_pmovzxwq: 4694 ; ZNVER1: # %bb.0: 4695 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] 4696 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25] 4697 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 4698 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4699 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 4700 %2 = zext <2 x i16> %1 to <2 x i64> 4701 %3 = load <2 x i16>, <2 x i16>* %a1, align 1 4702 %4 = zext <2 x i16> %3 to <2 x i64> 4703 %5 = add <2 x i64> %2, %4 4704 ret <2 x i64> %5 4705 } 4706 4707 define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 4708 ; GENERIC-LABEL: test_pmuldq: 4709 ; GENERIC: # %bb.0: 4710 ; GENERIC-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] 4711 ; GENERIC-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] 4712 ; GENERIC-NEXT: retq # sched: [1:1.00] 4713 ; 4714 ; SLM-LABEL: test_pmuldq: 4715 ; SLM: # %bb.0: 4716 ; SLM-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] 4717 ; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00] 4718 ; SLM-NEXT: retq # sched: [4:1.00] 4719 ; 4720 ; SANDY-SSE-LABEL: test_pmuldq: 4721 ; SANDY-SSE: # %bb.0: 4722 ; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] 4723 ; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] 4724 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4725 ; 4726 ; SANDY-LABEL: test_pmuldq: 4727 ; SANDY: # %bb.0: 4728 ; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4729 ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 4730 ; SANDY-NEXT: retq # sched: [1:1.00] 4731 ; 4732 ; HASWELL-SSE-LABEL: test_pmuldq: 4733 ; HASWELL-SSE: # %bb.0: 4734 ; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] 4735 ; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] 4736 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4737 ; 4738 ; HASWELL-LABEL: test_pmuldq: 4739 ; HASWELL: # %bb.0: 4740 ; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4741 ; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 4742 ; HASWELL-NEXT: retq # sched: [7:1.00] 4743 ; 4744 ; BROADWELL-SSE-LABEL: test_pmuldq: 4745 ; BROADWELL-SSE: # %bb.0: 4746 ; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] 4747 ; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:1.00] 4748 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4749 ; 4750 ; BROADWELL-LABEL: test_pmuldq: 4751 ; BROADWELL: # %bb.0: 4752 ; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4753 ; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 4754 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4755 ; 4756 ; SKYLAKE-SSE-LABEL: test_pmuldq: 4757 ; SKYLAKE-SSE: # %bb.0: 4758 ; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] 4759 ; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] 4760 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4761 ; 4762 ; SKYLAKE-LABEL: test_pmuldq: 4763 ; SKYLAKE: # %bb.0: 4764 ; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4765 ; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 4766 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4767 ; 4768 ; SKX-SSE-LABEL: test_pmuldq: 4769 ; SKX-SSE: # %bb.0: 4770 ; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] 4771 ; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] 4772 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4773 ; 4774 ; SKX-LABEL: test_pmuldq: 4775 ; SKX: # %bb.0: 4776 ; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4777 ; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 4778 ; SKX-NEXT: retq # sched: [7:1.00] 4779 ; 4780 ; BTVER2-SSE-LABEL: test_pmuldq: 4781 ; BTVER2-SSE: # %bb.0: 4782 ; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00] 4783 ; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00] 4784 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4785 ; 4786 ; BTVER2-LABEL: test_pmuldq: 4787 ; BTVER2: # %bb.0: 4788 ; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 4789 ; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 4790 ; BTVER2-NEXT: retq # sched: [4:1.00] 4791 ; 4792 ; ZNVER1-SSE-LABEL: test_pmuldq: 4793 ; ZNVER1-SSE: # %bb.0: 4794 ; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] 4795 ; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] 4796 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4797 ; 4798 ; ZNVER1-LABEL: test_pmuldq: 4799 ; ZNVER1: # %bb.0: 4800 ; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 4801 ; ZNVER1-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 4802 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4803 %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) 4804 %2 = bitcast <2 x i64> %1 to <4 x i32> 4805 %3 = load <4 x i32>, <4 x i32> *%a2, align 16 4806 %4 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %2, <4 x i32> %3) 4807 ret <2 x i64> %4 4808 } 4809 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 4810 4811 define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 4812 ; GENERIC-LABEL: test_pmulld: 4813 ; GENERIC: # %bb.0: 4814 ; GENERIC-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00] 4815 ; GENERIC-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] 4816 ; GENERIC-NEXT: retq # sched: [1:1.00] 4817 ; 4818 ; SLM-LABEL: test_pmulld: 4819 ; SLM: # %bb.0: 4820 ; SLM-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00] 4821 ; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00] 4822 ; SLM-NEXT: retq # sched: [4:1.00] 4823 ; 4824 ; SANDY-SSE-LABEL: test_pmulld: 4825 ; SANDY-SSE: # %bb.0: 4826 ; SANDY-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00] 4827 ; SANDY-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] 4828 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4829 ; 4830 ; SANDY-LABEL: test_pmulld: 4831 ; SANDY: # %bb.0: 4832 ; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4833 ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 4834 ; SANDY-NEXT: retq # sched: [1:1.00] 4835 ; 4836 ; HASWELL-SSE-LABEL: test_pmulld: 4837 ; HASWELL-SSE: # %bb.0: 4838 ; HASWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] 4839 ; HASWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:2.00] 4840 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4841 ; 4842 ; HASWELL-LABEL: test_pmulld: 4843 ; HASWELL: # %bb.0: 4844 ; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] 4845 ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00] 4846 ; HASWELL-NEXT: retq # sched: [7:1.00] 4847 ; 4848 ; BROADWELL-SSE-LABEL: test_pmulld: 4849 ; BROADWELL-SSE: # %bb.0: 4850 ; BROADWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] 4851 ; BROADWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [15:2.00] 4852 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4853 ; 4854 ; BROADWELL-LABEL: test_pmulld: 4855 ; BROADWELL: # %bb.0: 4856 ; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] 4857 ; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00] 4858 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4859 ; 4860 ; SKYLAKE-SSE-LABEL: test_pmulld: 4861 ; SKYLAKE-SSE: # %bb.0: 4862 ; SKYLAKE-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] 4863 ; SKYLAKE-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] 4864 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4865 ; 4866 ; SKYLAKE-LABEL: test_pmulld: 4867 ; SKYLAKE: # %bb.0: 4868 ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] 4869 ; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] 4870 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4871 ; 4872 ; SKX-SSE-LABEL: test_pmulld: 4873 ; SKX-SSE: # %bb.0: 4874 ; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] 4875 ; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] 4876 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4877 ; 4878 ; SKX-LABEL: test_pmulld: 4879 ; SKX: # %bb.0: 4880 ; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] 4881 ; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] 4882 ; SKX-NEXT: retq # sched: [7:1.00] 4883 ; 4884 ; BTVER2-SSE-LABEL: test_pmulld: 4885 ; BTVER2-SSE: # %bb.0: 4886 ; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:2.00] 4887 ; BTVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [9:2.00] 4888 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4889 ; 4890 ; BTVER2-LABEL: test_pmulld: 4891 ; BTVER2: # %bb.0: 4892 ; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:2.00] 4893 ; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 4894 ; BTVER2-NEXT: retq # sched: [4:1.00] 4895 ; 4896 ; ZNVER1-SSE-LABEL: test_pmulld: 4897 ; ZNVER1-SSE: # %bb.0: 4898 ; ZNVER1-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00] 4899 ; ZNVER1-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] 4900 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4901 ; 4902 ; ZNVER1-LABEL: test_pmulld: 4903 ; ZNVER1: # %bb.0: 4904 ; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 4905 ; ZNVER1-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 4906 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4907 %1 = mul <4 x i32> %a0, %a1 4908 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 4909 %3 = mul <4 x i32> %1, %2 4910 ret <4 x i32> %3 4911 } 4912 4913 define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 4914 ; GENERIC-LABEL: test_ptest: 4915 ; GENERIC: # %bb.0: 4916 ; GENERIC-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] 4917 ; GENERIC-NEXT: setb %al # sched: [1:0.50] 4918 ; GENERIC-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] 4919 ; GENERIC-NEXT: setb %cl # sched: [1:0.50] 4920 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] 4921 ; GENERIC-NEXT: movzbl %cl, %eax # sched: [1:0.33] 4922 ; GENERIC-NEXT: retq # sched: [1:1.00] 4923 ; 4924 ; SLM-LABEL: test_ptest: 4925 ; SLM: # %bb.0: 4926 ; SLM-NEXT: ptest %xmm1, %xmm0 # sched: [1:0.50] 4927 ; SLM-NEXT: setb %al # sched: [1:0.50] 4928 ; SLM-NEXT: ptest (%rdi), %xmm0 # sched: [4:1.00] 4929 ; SLM-NEXT: setb %cl # sched: [1:0.50] 4930 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50] 4931 ; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50] 4932 ; SLM-NEXT: retq # sched: [4:1.00] 4933 ; 4934 ; SANDY-SSE-LABEL: test_ptest: 4935 ; SANDY-SSE: # %bb.0: 4936 ; SANDY-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] 4937 ; SANDY-SSE-NEXT: setb %al # sched: [1:0.50] 4938 ; SANDY-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] 4939 ; SANDY-SSE-NEXT: setb %cl # sched: [1:0.50] 4940 ; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] 4941 ; SANDY-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33] 4942 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4943 ; 4944 ; SANDY-LABEL: test_ptest: 4945 ; SANDY: # %bb.0: 4946 ; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] 4947 ; SANDY-NEXT: setb %al # sched: [1:0.50] 4948 ; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] 4949 ; SANDY-NEXT: setb %cl # sched: [1:0.50] 4950 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] 4951 ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] 4952 ; SANDY-NEXT: retq # sched: [1:1.00] 4953 ; 4954 ; HASWELL-SSE-LABEL: test_ptest: 4955 ; HASWELL-SSE: # %bb.0: 4956 ; HASWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] 4957 ; HASWELL-SSE-NEXT: setb %al # sched: [1:0.50] 4958 ; HASWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] 4959 ; HASWELL-SSE-NEXT: setb %cl # sched: [1:0.50] 4960 ; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 4961 ; HASWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] 4962 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4963 ; 4964 ; HASWELL-LABEL: test_ptest: 4965 ; HASWELL: # %bb.0: 4966 ; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] 4967 ; HASWELL-NEXT: setb %al # sched: [1:0.50] 4968 ; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] 4969 ; HASWELL-NEXT: setb %cl # sched: [1:0.50] 4970 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] 4971 ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] 4972 ; HASWELL-NEXT: retq # sched: [7:1.00] 4973 ; 4974 ; BROADWELL-SSE-LABEL: test_ptest: 4975 ; BROADWELL-SSE: # %bb.0: 4976 ; BROADWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] 4977 ; BROADWELL-SSE-NEXT: setb %al # sched: [1:0.50] 4978 ; BROADWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [7:1.00] 4979 ; BROADWELL-SSE-NEXT: setb %cl # sched: [1:0.50] 4980 ; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 4981 ; BROADWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] 4982 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4983 ; 4984 ; BROADWELL-LABEL: test_ptest: 4985 ; BROADWELL: # %bb.0: 4986 ; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] 4987 ; BROADWELL-NEXT: setb %al # sched: [1:0.50] 4988 ; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [7:1.00] 4989 ; BROADWELL-NEXT: setb %cl # sched: [1:0.50] 4990 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] 4991 ; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] 4992 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4993 ; 4994 ; SKYLAKE-SSE-LABEL: test_ptest: 4995 ; SKYLAKE-SSE: # %bb.0: 4996 ; SKYLAKE-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] 4997 ; SKYLAKE-SSE-NEXT: setb %al # sched: [1:0.50] 4998 ; SKYLAKE-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] 4999 ; SKYLAKE-SSE-NEXT: setb %cl # sched: [1:0.50] 5000 ; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5001 ; SKYLAKE-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] 5002 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5003 ; 5004 ; SKYLAKE-LABEL: test_ptest: 5005 ; SKYLAKE: # %bb.0: 5006 ; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] 5007 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50] 5008 ; SKYLAKE-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00] 5009 ; SKYLAKE-NEXT: setb %cl # sched: [1:0.50] 5010 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] 5011 ; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25] 5012 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5013 ; 5014 ; SKX-SSE-LABEL: test_ptest: 5015 ; SKX-SSE: # %bb.0: 5016 ; SKX-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] 5017 ; SKX-SSE-NEXT: setb %al # sched: [1:0.50] 5018 ; SKX-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] 5019 ; SKX-SSE-NEXT: setb %cl # sched: [1:0.50] 5020 ; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5021 ; SKX-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] 5022 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5023 ; 5024 ; SKX-LABEL: test_ptest: 5025 ; SKX: # %bb.0: 5026 ; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] 5027 ; SKX-NEXT: setb %al # sched: [1:0.50] 5028 ; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00] 5029 ; SKX-NEXT: setb %cl # sched: [1:0.50] 5030 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] 5031 ; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25] 5032 ; SKX-NEXT: retq # sched: [7:1.00] 5033 ; 5034 ; BTVER2-SSE-LABEL: test_ptest: 5035 ; BTVER2-SSE: # %bb.0: 5036 ; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] 5037 ; BTVER2-SSE-NEXT: setb %al # sched: [1:0.50] 5038 ; BTVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] 5039 ; BTVER2-SSE-NEXT: setb %cl # sched: [1:0.50] 5040 ; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] 5041 ; BTVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50] 5042 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5043 ; 5044 ; BTVER2-LABEL: test_ptest: 5045 ; BTVER2: # %bb.0: 5046 ; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] 5047 ; BTVER2-NEXT: setb %al # sched: [1:0.50] 5048 ; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] 5049 ; BTVER2-NEXT: setb %cl # sched: [1:0.50] 5050 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] 5051 ; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50] 5052 ; BTVER2-NEXT: retq # sched: [4:1.00] 5053 ; 5054 ; ZNVER1-SSE-LABEL: test_ptest: 5055 ; ZNVER1-SSE: # %bb.0: 5056 ; ZNVER1-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00] 5057 ; ZNVER1-SSE-NEXT: setb %al # sched: [1:0.25] 5058 ; ZNVER1-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] 5059 ; ZNVER1-SSE-NEXT: setb %cl # sched: [1:0.25] 5060 ; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5061 ; ZNVER1-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] 5062 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5063 ; 5064 ; ZNVER1-LABEL: test_ptest: 5065 ; ZNVER1: # %bb.0: 5066 ; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00] 5067 ; ZNVER1-NEXT: setb %al # sched: [1:0.25] 5068 ; ZNVER1-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] 5069 ; ZNVER1-NEXT: setb %cl # sched: [1:0.25] 5070 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] 5071 ; ZNVER1-NEXT: movzbl %cl, %eax # sched: [1:0.25] 5072 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5073 %1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) 5074 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 5075 %3 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %2) 5076 %4 = and i32 %1, %3 5077 ret i32 %4 5078 } 5079 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 5080 5081 define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { 5082 ; GENERIC-LABEL: test_roundpd: 5083 ; GENERIC: # %bb.0: 5084 ; GENERIC-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] 5085 ; GENERIC-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] 5086 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 5087 ; GENERIC-NEXT: retq # sched: [1:1.00] 5088 ; 5089 ; SLM-LABEL: test_roundpd: 5090 ; SLM: # %bb.0: 5091 ; SLM-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [6:1.00] 5092 ; SLM-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [3:1.00] 5093 ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 5094 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 5095 ; SLM-NEXT: retq # sched: [4:1.00] 5096 ; 5097 ; SANDY-SSE-LABEL: test_roundpd: 5098 ; SANDY-SSE: # %bb.0: 5099 ; SANDY-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] 5100 ; SANDY-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] 5101 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 5102 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5103 ; 5104 ; SANDY-LABEL: test_roundpd: 5105 ; SANDY: # %bb.0: 5106 ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] 5107 ; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] 5108 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5109 ; SANDY-NEXT: retq # sched: [1:1.00] 5110 ; 5111 ; HASWELL-SSE-LABEL: test_roundpd: 5112 ; HASWELL-SSE: # %bb.0: 5113 ; HASWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [6:0.50] 5114 ; HASWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [12:2.00] 5115 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 5116 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5117 ; 5118 ; HASWELL-LABEL: test_roundpd: 5119 ; HASWELL: # %bb.0: 5120 ; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] 5121 ; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [12:2.00] 5122 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5123 ; HASWELL-NEXT: retq # sched: [7:1.00] 5124 ; 5125 ; BROADWELL-SSE-LABEL: test_roundpd: 5126 ; BROADWELL-SSE: # %bb.0: 5127 ; BROADWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [11:2.00] 5128 ; BROADWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [6:0.50] 5129 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 5130 ; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] 5131 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5132 ; 5133 ; BROADWELL-LABEL: test_roundpd: 5134 ; BROADWELL: # %bb.0: 5135 ; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00] 5136 ; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] 5137 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5138 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5139 ; 5140 ; SKYLAKE-SSE-LABEL: test_roundpd: 5141 ; SKYLAKE-SSE: # %bb.0: 5142 ; SKYLAKE-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] 5143 ; SKYLAKE-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] 5144 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 5145 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5146 ; 5147 ; SKYLAKE-LABEL: test_roundpd: 5148 ; SKYLAKE: # %bb.0: 5149 ; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] 5150 ; SKYLAKE-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00] 5151 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5152 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5153 ; 5154 ; SKX-SSE-LABEL: test_roundpd: 5155 ; SKX-SSE: # %bb.0: 5156 ; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] 5157 ; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] 5158 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 5159 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5160 ; 5161 ; SKX-LABEL: test_roundpd: 5162 ; SKX: # %bb.0: 5163 ; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] 5164 ; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00] 5165 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5166 ; SKX-NEXT: retq # sched: [7:1.00] 5167 ; 5168 ; BTVER2-SSE-LABEL: test_roundpd: 5169 ; BTVER2-SSE: # %bb.0: 5170 ; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] 5171 ; BTVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [8:1.00] 5172 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 5173 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5174 ; 5175 ; BTVER2-LABEL: test_roundpd: 5176 ; BTVER2: # %bb.0: 5177 ; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00] 5178 ; BTVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] 5179 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5180 ; BTVER2-NEXT: retq # sched: [4:1.00] 5181 ; 5182 ; ZNVER1-SSE-LABEL: test_roundpd: 5183 ; ZNVER1-SSE: # %bb.0: 5184 ; ZNVER1-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00] 5185 ; ZNVER1-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [11:1.00] 5186 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 5187 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5188 ; 5189 ; ZNVER1-LABEL: test_roundpd: 5190 ; ZNVER1: # %bb.0: 5191 ; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00] 5192 ; ZNVER1-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00] 5193 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5194 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5195 %1 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 5196 %2 = load <2 x double>, <2 x double> *%a1, align 16 5197 %3 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %2, i32 7) 5198 %4 = fadd <2 x double> %1, %3 5199 ret <2 x double> %4 5200 } 5201 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 5202 5203 define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { 5204 ; GENERIC-LABEL: test_roundps: 5205 ; GENERIC: # %bb.0: 5206 ; GENERIC-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] 5207 ; GENERIC-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] 5208 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5209 ; GENERIC-NEXT: retq # sched: [1:1.00] 5210 ; 5211 ; SLM-LABEL: test_roundps: 5212 ; SLM: # %bb.0: 5213 ; SLM-NEXT: roundps $7, (%rdi), %xmm1 # sched: [6:1.00] 5214 ; SLM-NEXT: roundps $7, %xmm0, %xmm0 # sched: [3:1.00] 5215 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 5216 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 5217 ; SLM-NEXT: retq # sched: [4:1.00] 5218 ; 5219 ; SANDY-SSE-LABEL: test_roundps: 5220 ; SANDY-SSE: # %bb.0: 5221 ; SANDY-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] 5222 ; SANDY-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] 5223 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5224 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5225 ; 5226 ; SANDY-LABEL: test_roundps: 5227 ; SANDY: # %bb.0: 5228 ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] 5229 ; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] 5230 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5231 ; SANDY-NEXT: retq # sched: [1:1.00] 5232 ; 5233 ; HASWELL-SSE-LABEL: test_roundps: 5234 ; HASWELL-SSE: # %bb.0: 5235 ; HASWELL-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [6:0.50] 5236 ; HASWELL-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [12:2.00] 5237 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5238 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5239 ; 5240 ; HASWELL-LABEL: test_roundps: 5241 ; HASWELL: # %bb.0: 5242 ; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] 5243 ; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [12:2.00] 5244 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5245 ; HASWELL-NEXT: retq # sched: [7:1.00] 5246 ; 5247 ; BROADWELL-SSE-LABEL: test_roundps: 5248 ; BROADWELL-SSE: # %bb.0: 5249 ; BROADWELL-SSE-NEXT: roundps $7, (%rdi), %xmm1 # sched: [11:2.00] 5250 ; BROADWELL-SSE-NEXT: roundps $7, %xmm0, %xmm0 # sched: [6:0.50] 5251 ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 5252 ; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] 5253 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5254 ; 5255 ; BROADWELL-LABEL: test_roundps: 5256 ; BROADWELL: # %bb.0: 5257 ; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00] 5258 ; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] 5259 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5260 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5261 ; 5262 ; SKYLAKE-SSE-LABEL: test_roundps: 5263 ; SKYLAKE-SSE: # %bb.0: 5264 ; SKYLAKE-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] 5265 ; SKYLAKE-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] 5266 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5267 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5268 ; 5269 ; SKYLAKE-LABEL: test_roundps: 5270 ; SKYLAKE: # %bb.0: 5271 ; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] 5272 ; SKYLAKE-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00] 5273 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5274 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5275 ; 5276 ; SKX-SSE-LABEL: test_roundps: 5277 ; SKX-SSE: # %bb.0: 5278 ; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] 5279 ; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] 5280 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5281 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5282 ; 5283 ; SKX-LABEL: test_roundps: 5284 ; SKX: # %bb.0: 5285 ; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] 5286 ; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00] 5287 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5288 ; SKX-NEXT: retq # sched: [7:1.00] 5289 ; 5290 ; BTVER2-SSE-LABEL: test_roundps: 5291 ; BTVER2-SSE: # %bb.0: 5292 ; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] 5293 ; BTVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [8:1.00] 5294 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5295 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5296 ; 5297 ; BTVER2-LABEL: test_roundps: 5298 ; BTVER2: # %bb.0: 5299 ; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00] 5300 ; BTVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] 5301 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5302 ; BTVER2-NEXT: retq # sched: [4:1.00] 5303 ; 5304 ; ZNVER1-SSE-LABEL: test_roundps: 5305 ; ZNVER1-SSE: # %bb.0: 5306 ; ZNVER1-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00] 5307 ; ZNVER1-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [11:1.00] 5308 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5309 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5310 ; 5311 ; ZNVER1-LABEL: test_roundps: 5312 ; ZNVER1: # %bb.0: 5313 ; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00] 5314 ; ZNVER1-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [4:1.00] 5315 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5316 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5317 %1 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 5318 %2 = load <4 x float>, <4 x float> *%a1, align 16 5319 %3 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %2, i32 7) 5320 %4 = fadd <4 x float> %1, %3 5321 ret <4 x float> %4 5322 } 5323 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 5324 5325 define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 5326 ; GENERIC-LABEL: test_roundsd: 5327 ; GENERIC: # %bb.0: 5328 ; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] 5329 ; GENERIC-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] 5330 ; GENERIC-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] 5331 ; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] 5332 ; GENERIC-NEXT: retq # sched: [1:1.00] 5333 ; 5334 ; SLM-LABEL: test_roundsd: 5335 ; SLM: # %bb.0: 5336 ; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] 5337 ; SLM-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [6:1.00] 5338 ; SLM-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] 5339 ; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] 5340 ; SLM-NEXT: retq # sched: [4:1.00] 5341 ; 5342 ; SANDY-SSE-LABEL: test_roundsd: 5343 ; SANDY-SSE: # %bb.0: 5344 ; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] 5345 ; SANDY-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] 5346 ; SANDY-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] 5347 ; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] 5348 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5349 ; 5350 ; SANDY-LABEL: test_roundsd: 5351 ; SANDY: # %bb.0: 5352 ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 5353 ; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 5354 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5355 ; SANDY-NEXT: retq # sched: [1:1.00] 5356 ; 5357 ; HASWELL-SSE-LABEL: test_roundsd: 5358 ; HASWELL-SSE: # %bb.0: 5359 ; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] 5360 ; HASWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] 5361 ; HASWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [12:2.00] 5362 ; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] 5363 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5364 ; 5365 ; HASWELL-LABEL: test_roundsd: 5366 ; HASWELL: # %bb.0: 5367 ; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] 5368 ; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 5369 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5370 ; HASWELL-NEXT: retq # sched: [7:1.00] 5371 ; 5372 ; BROADWELL-SSE-LABEL: test_roundsd: 5373 ; BROADWELL-SSE: # %bb.0: 5374 ; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] 5375 ; BROADWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:2.00] 5376 ; BROADWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] 5377 ; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] 5378 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5379 ; 5380 ; BROADWELL-LABEL: test_roundsd: 5381 ; BROADWELL: # %bb.0: 5382 ; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] 5383 ; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50] 5384 ; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] 5385 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5386 ; 5387 ; SKYLAKE-SSE-LABEL: test_roundsd: 5388 ; SKYLAKE-SSE: # %bb.0: 5389 ; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] 5390 ; SKYLAKE-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] 5391 ; SKYLAKE-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] 5392 ; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] 5393 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5394 ; 5395 ; SKYLAKE-LABEL: test_roundsd: 5396 ; SKYLAKE: # %bb.0: 5397 ; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] 5398 ; SKYLAKE-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] 5399 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 5400 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5401 ; 5402 ; SKX-SSE-LABEL: test_roundsd: 5403 ; SKX-SSE: # %bb.0: 5404 ; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] 5405 ; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] 5406 ; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] 5407 ; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] 5408 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5409 ; 5410 ; SKX-LABEL: test_roundsd: 5411 ; SKX: # %bb.0: 5412 ; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] 5413 ; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] 5414 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 5415 ; SKX-NEXT: retq # sched: [7:1.00] 5416 ; 5417 ; BTVER2-SSE-LABEL: test_roundsd: 5418 ; BTVER2-SSE: # %bb.0: 5419 ; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] 5420 ; BTVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [8:1.00] 5421 ; BTVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] 5422 ; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] 5423 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5424 ; 5425 ; BTVER2-LABEL: test_roundsd: 5426 ; BTVER2: # %bb.0: 5427 ; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 5428 ; BTVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 5429 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5430 ; BTVER2-NEXT: retq # sched: [4:1.00] 5431 ; 5432 ; ZNVER1-SSE-LABEL: test_roundsd: 5433 ; ZNVER1-SSE: # %bb.0: 5434 ; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25] 5435 ; ZNVER1-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:1.00] 5436 ; ZNVER1-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00] 5437 ; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] 5438 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5439 ; 5440 ; ZNVER1-LABEL: test_roundsd: 5441 ; ZNVER1: # %bb.0: 5442 ; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] 5443 ; ZNVER1-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 5444 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5445 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5446 %1 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) 5447 %2 = load <2 x double>, <2 x double>* %a2, align 16 5448 %3 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %2, i32 7) 5449 %4 = fadd <2 x double> %1, %3 5450 ret <2 x double> %4 5451 } 5452 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 5453 5454 define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 5455 ; GENERIC-LABEL: test_roundss: 5456 ; GENERIC: # %bb.0: 5457 ; GENERIC-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] 5458 ; GENERIC-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] 5459 ; GENERIC-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] 5460 ; GENERIC-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] 5461 ; GENERIC-NEXT: retq # sched: [1:1.00] 5462 ; 5463 ; SLM-LABEL: test_roundss: 5464 ; SLM: # %bb.0: 5465 ; SLM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] 5466 ; SLM-NEXT: roundss $7, (%rdi), %xmm0 # sched: [6:1.00] 5467 ; SLM-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] 5468 ; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] 5469 ; SLM-NEXT: retq # sched: [4:1.00] 5470 ; 5471 ; SANDY-SSE-LABEL: test_roundss: 5472 ; SANDY-SSE: # %bb.0: 5473 ; SANDY-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] 5474 ; SANDY-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] 5475 ; SANDY-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] 5476 ; SANDY-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] 5477 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5478 ; 5479 ; SANDY-LABEL: test_roundss: 5480 ; SANDY: # %bb.0: 5481 ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 5482 ; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 5483 ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5484 ; SANDY-NEXT: retq # sched: [1:1.00] 5485 ; 5486 ; HASWELL-SSE-LABEL: test_roundss: 5487 ; HASWELL-SSE: # %bb.0: 5488 ; HASWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] 5489 ; HASWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] 5490 ; HASWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [12:2.00] 5491 ; HASWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] 5492 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5493 ; 5494 ; HASWELL-LABEL: test_roundss: 5495 ; HASWELL: # %bb.0: 5496 ; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] 5497 ; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 5498 ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5499 ; HASWELL-NEXT: retq # sched: [7:1.00] 5500 ; 5501 ; BROADWELL-SSE-LABEL: test_roundss: 5502 ; BROADWELL-SSE: # %bb.0: 5503 ; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] 5504 ; BROADWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:2.00] 5505 ; BROADWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] 5506 ; BROADWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] 5507 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5508 ; 5509 ; BROADWELL-LABEL: test_roundss: 5510 ; BROADWELL: # %bb.0: 5511 ; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] 5512 ; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50] 5513 ; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00] 5514 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5515 ; 5516 ; SKYLAKE-SSE-LABEL: test_roundss: 5517 ; SKYLAKE-SSE: # %bb.0: 5518 ; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] 5519 ; SKYLAKE-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] 5520 ; SKYLAKE-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] 5521 ; SKYLAKE-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] 5522 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5523 ; 5524 ; SKYLAKE-LABEL: test_roundss: 5525 ; SKYLAKE: # %bb.0: 5526 ; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] 5527 ; SKYLAKE-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] 5528 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 5529 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5530 ; 5531 ; SKX-SSE-LABEL: test_roundss: 5532 ; SKX-SSE: # %bb.0: 5533 ; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] 5534 ; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] 5535 ; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] 5536 ; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] 5537 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5538 ; 5539 ; SKX-LABEL: test_roundss: 5540 ; SKX: # %bb.0: 5541 ; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] 5542 ; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] 5543 ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 5544 ; SKX-NEXT: retq # sched: [7:1.00] 5545 ; 5546 ; BTVER2-SSE-LABEL: test_roundss: 5547 ; BTVER2-SSE: # %bb.0: 5548 ; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] 5549 ; BTVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [8:1.00] 5550 ; BTVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] 5551 ; BTVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] 5552 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5553 ; 5554 ; BTVER2-LABEL: test_roundss: 5555 ; BTVER2: # %bb.0: 5556 ; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 5557 ; BTVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 5558 ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5559 ; BTVER2-NEXT: retq # sched: [4:1.00] 5560 ; 5561 ; ZNVER1-SSE-LABEL: test_roundss: 5562 ; ZNVER1-SSE: # %bb.0: 5563 ; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.25] 5564 ; ZNVER1-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:1.00] 5565 ; ZNVER1-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00] 5566 ; ZNVER1-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] 5567 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5568 ; 5569 ; ZNVER1-LABEL: test_roundss: 5570 ; ZNVER1: # %bb.0: 5571 ; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] 5572 ; ZNVER1-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 5573 ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 5574 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5575 %1 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) 5576 %2 = load <4 x float>, <4 x float> *%a2, align 16 5577 %3 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %2, i32 7) 5578 %4 = fadd <4 x float> %1, %3 5579 ret <4 x float> %4 5580 } 5581 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 5582