1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1 5 6 define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) { 7 ; GENERIC-LABEL: test_extrq: 8 ; GENERIC: # %bb.0: 9 ; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50] 10 ; GENERIC-NEXT: retq # sched: [1:1.00] 11 ; 12 ; BTVER2-LABEL: test_extrq: 13 ; BTVER2: # %bb.0: 14 ; BTVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50] 15 ; BTVER2-NEXT: retq # sched: [4:1.00] 16 ; 17 ; ZNVER1-LABEL: test_extrq: 18 ; ZNVER1: # %bb.0: 19 ; ZNVER1-NEXT: extrq %xmm1, %xmm0 # sched: [2:1.00] 20 ; ZNVER1-NEXT: retq # sched: [1:0.50] 21 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %a0, <16 x i8> %a1) 22 ret <2 x i64> %1 23 } 24 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) 25 26 define <2 x i64> @test_extrqi(<2 x i64> %a0) { 27 ; GENERIC-LABEL: test_extrqi: 28 ; GENERIC: # %bb.0: 29 ; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50] 30 ; GENERIC-NEXT: retq # sched: [1:1.00] 31 ; 32 ; BTVER2-LABEL: test_extrqi: 33 ; BTVER2: # %bb.0: 34 ; BTVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50] 35 ; BTVER2-NEXT: retq # sched: [4:1.00] 36 ; 37 ; ZNVER1-LABEL: test_extrqi: 38 ; ZNVER1: # %bb.0: 39 ; ZNVER1-NEXT: extrq $2, $3, %xmm0 # sched: [2:1.00] 40 ; ZNVER1-NEXT: retq # sched: [1:0.50] 41 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a0, i8 3, i8 2) 42 ret <2 x i64> %1 43 } 44 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) 45 46 define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) { 47 ; GENERIC-LABEL: test_insertq: 48 ; GENERIC: # %bb.0: 49 ; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50] 50 ; GENERIC-NEXT: retq # sched: [1:1.00] 51 ; 52 ; BTVER2-LABEL: test_insertq: 53 ; BTVER2: # %bb.0: 54 ; BTVER2-NEXT: insertq %xmm1, %xmm0 # sched: [2:2.00] 55 ; BTVER2-NEXT: retq # sched: [4:1.00] 56 ; 57 ; ZNVER1-LABEL: test_insertq: 58 ; ZNVER1: # %bb.0: 59 ; ZNVER1-NEXT: insertq %xmm1, %xmm0 # sched: [4:1.00] 60 ; ZNVER1-NEXT: retq # sched: [1:0.50] 61 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %a0, <2 x i64> %a1) 62 ret <2 x i64> %1 63 } 64 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) 65 66 define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) { 67 ; GENERIC-LABEL: test_insertqi: 68 ; GENERIC: # %bb.0: 69 ; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50] 70 ; GENERIC-NEXT: retq # sched: [1:1.00] 71 ; 72 ; BTVER2-LABEL: test_insertqi: 73 ; BTVER2: # %bb.0: 74 ; BTVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [2:2.00] 75 ; BTVER2-NEXT: retq # sched: [4:1.00] 76 ; 77 ; ZNVER1-LABEL: test_insertqi: 78 ; ZNVER1: # %bb.0: 79 ; ZNVER1-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [4:1.00] 80 ; ZNVER1-NEXT: retq # sched: [1:0.50] 81 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 6) 82 ret <2 x i64> %1 83 } 84 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) 85 86 define void @test_movntsd(i8* %p, <2 x double> %a) { 87 ; GENERIC-LABEL: test_movntsd: 88 ; GENERIC: # %bb.0: 89 ; GENERIC-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00] 90 ; GENERIC-NEXT: retq # sched: [1:1.00] 91 ; 92 ; BTVER2-LABEL: test_movntsd: 93 ; BTVER2: # %bb.0: 94 ; BTVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00] 95 ; BTVER2-NEXT: retq # sched: [4:1.00] 96 ; 97 ; ZNVER1-LABEL: test_movntsd: 98 ; ZNVER1: # %bb.0: 99 ; ZNVER1-NEXT: movntsd %xmm0, (%rdi) # sched: [8:1.00] 100 ; ZNVER1-NEXT: retq # sched: [1:0.50] 101 tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a) 102 ret void 103 } 104 declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>) 105 106 define void @test_movntss(i8* %p, <4 x float> %a) { 107 ; GENERIC-LABEL: test_movntss: 108 ; GENERIC: # %bb.0: 109 ; GENERIC-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00] 110 ; GENERIC-NEXT: retq # sched: [1:1.00] 111 ; 112 ; BTVER2-LABEL: test_movntss: 113 ; BTVER2: # %bb.0: 114 ; BTVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00] 115 ; BTVER2-NEXT: retq # sched: [4:1.00] 116 ; 117 ; ZNVER1-LABEL: test_movntss: 118 ; ZNVER1: # %bb.0: 119 ; ZNVER1-NEXT: movntss %xmm0, (%rdi) # sched: [8:1.00] 120 ; ZNVER1-NEXT: retq # sched: [1:0.50] 121 tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a) 122 ret void 123 } 124 declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>) 125 126