1 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE --check-prefix=ALL 2 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=ALL 3 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL 4 5 ; Verify that fast-isel knows how to select aligned/unaligned vector loads. 6 ; Also verify that the selected load instruction is in the correct domain. 7 8 define <16 x i8> @test_v16i8(<16 x i8>* %V) { 9 ; ALL-LABEL: test_v16i8: 10 ; SSE: movdqa (%rdi), %xmm0 11 ; AVX: vmovdqa (%rdi), %xmm0 12 ; ALL-NEXT: retq 13 entry: 14 %0 = load <16 x i8>, <16 x i8>* %V, align 16 15 ret <16 x i8> %0 16 } 17 18 define <8 x i16> @test_v8i16(<8 x i16>* %V) { 19 ; ALL-LABEL: test_v8i16: 20 ; SSE: movdqa (%rdi), %xmm0 21 ; AVX: vmovdqa (%rdi), %xmm0 22 ; ALL-NEXT: retq 23 entry: 24 %0 = load <8 x i16>, <8 x i16>* %V, align 16 25 ret <8 x i16> %0 26 } 27 28 define <4 x i32> @test_v4i32(<4 x i32>* %V) { 29 ; ALL-LABEL: test_v4i32: 30 ; SSE: movdqa (%rdi), %xmm0 31 ; AVX: vmovdqa (%rdi), %xmm0 32 ; ALL-NEXT: retq 33 entry: 34 %0 = load <4 x i32>, <4 x i32>* %V, align 16 35 ret <4 x i32> %0 36 } 37 38 define <2 x i64> @test_v2i64(<2 x i64>* %V) { 39 ; ALL-LABEL: test_v2i64: 40 ; SSE: movdqa (%rdi), %xmm0 41 ; AVX: vmovdqa (%rdi), %xmm0 42 ; ALL-NEXT: retq 43 entry: 44 %0 = load <2 x i64>, <2 x i64>* %V, align 16 45 ret <2 x i64> %0 46 } 47 48 define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) { 49 ; ALL-LABEL: test_v16i8_unaligned: 50 ; SSE: movdqu (%rdi), %xmm0 51 ; AVX: vmovdqu (%rdi), %xmm0 52 ; ALL-NEXT: retq 53 entry: 54 %0 = load <16 x i8>, <16 x i8>* %V, align 4 55 ret <16 x i8> %0 56 } 57 58 define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) { 59 ; ALL-LABEL: test_v8i16_unaligned: 60 ; SSE: movdqu (%rdi), %xmm0 61 ; AVX: vmovdqu (%rdi), %xmm0 62 ; ALL-NEXT: retq 63 entry: 64 %0 = load <8 x i16>, <8 x i16>* %V, align 4 65 ret <8 x i16> %0 66 } 67 68 define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) { 69 ; ALL-LABEL: test_v4i32_unaligned: 70 ; SSE: movdqu (%rdi), %xmm0 71 ; AVX: vmovdqu (%rdi), %xmm0 72 ; ALL-NEXT: retq 73 entry: 74 %0 = load <4 x i32>, <4 x i32>* %V, align 4 75 ret <4 x i32> %0 76 } 77 78 define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) { 79 ; ALL-LABEL: test_v2i64_unaligned: 80 ; SSE: movdqu (%rdi), %xmm0 81 ; AVX: vmovdqu (%rdi), %xmm0 82 ; ALL-NEXT: retq 83 entry: 84 %0 = load <2 x i64>, <2 x i64>* %V, align 4 85 ret <2 x i64> %0 86 } 87 88 define <4 x float> @test_v4f32(<4 x float>* %V) { 89 ; ALL-LABEL: test_v4f32: 90 ; SSE: movaps (%rdi), %xmm0 91 ; AVX: vmovaps (%rdi), %xmm0 92 ; ALL-NEXT: retq 93 entry: 94 %0 = load <4 x float>, <4 x float>* %V, align 16 95 ret <4 x float> %0 96 } 97 98 define <2 x double> @test_v2f64(<2 x double>* %V) { 99 ; ALL-LABEL: test_v2f64: 100 ; SSE: movapd (%rdi), %xmm0 101 ; AVX: vmovapd (%rdi), %xmm0 102 ; ALL-NEXT: retq 103 entry: 104 %0 = load <2 x double>, <2 x double>* %V, align 16 105 ret <2 x double> %0 106 } 107 108 define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) { 109 ; ALL-LABEL: test_v4f32_unaligned: 110 ; SSE: movups (%rdi), %xmm0 111 ; AVX: vmovups (%rdi), %xmm0 112 ; ALL-NEXT: retq 113 entry: 114 %0 = load <4 x float>, <4 x float>* %V, align 4 115 ret <4 x float> %0 116 } 117 118 define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) { 119 ; ALL-LABEL: test_v2f64_unaligned: 120 ; SSE: movupd (%rdi), %xmm0 121 ; AVX: vmovupd (%rdi), %xmm0 122 ; ALL-NEXT: retq 123 entry: 124 %0 = load <2 x double>, <2 x double>* %V, align 4 125 ret <2 x double> %0 126 } 127 128 define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) { 129 ; ALL-LABEL: test_v16i8_abi_alignment: 130 ; SSE: movdqa (%rdi), %xmm0 131 ; AVX: vmovdqa (%rdi), %xmm0 132 ; ALL-NEXT: retq 133 entry: 134 %0 = load <16 x i8>, <16 x i8>* %V 135 ret <16 x i8> %0 136 } 137 138 define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) { 139 ; ALL-LABEL: test_v8i16_abi_alignment: 140 ; SSE: movdqa (%rdi), %xmm0 141 ; AVX: vmovdqa (%rdi), %xmm0 142 ; ALL-NEXT: retq 143 entry: 144 %0 = load <8 x i16>, <8 x i16>* %V 145 ret <8 x i16> %0 146 } 147 148 define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) { 149 ; ALL-LABEL: test_v4i32_abi_alignment: 150 ; SSE: movdqa (%rdi), %xmm0 151 ; AVX: vmovdqa (%rdi), %xmm0 152 ; ALL-NEXT: retq 153 entry: 154 %0 = load <4 x i32>, <4 x i32>* %V 155 ret <4 x i32> %0 156 } 157 158 define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) { 159 ; ALL-LABEL: test_v2i64_abi_alignment: 160 ; SSE: movdqa (%rdi), %xmm0 161 ; AVX: vmovdqa (%rdi), %xmm0 162 ; ALL-NEXT: retq 163 entry: 164 %0 = load <2 x i64>, <2 x i64>* %V 165 ret <2 x i64> %0 166 } 167 168 define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) { 169 ; ALL-LABEL: test_v4f32_abi_alignment: 170 ; SSE: movaps (%rdi), %xmm0 171 ; AVX: vmovaps (%rdi), %xmm0 172 ; ALL-NEXT: retq 173 entry: 174 %0 = load <4 x float>, <4 x float>* %V 175 ret <4 x float> %0 176 } 177 178 define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) { 179 ; ALL-LABEL: test_v2f64_abi_alignment: 180 ; SSE: movapd (%rdi), %xmm0 181 ; AVX: vmovapd (%rdi), %xmm0 182 ; ALL-NEXT: retq 183 entry: 184 %0 = load <2 x double>, <2 x double>* %V 185 ret <2 x double> %0 186 } 187 188 define <8 x i64> @test_v8i64_alignment(<8 x i64>* %V) { 189 ; KNL-LABEL: test_v8i64_alignment: 190 ; KNL: # BB#0: # %entry 191 ; KNL-NEXT: vmovdqa64 (%rdi), %zmm0 192 ; KNL-NEXT: retq 193 entry: 194 %0 = load <8 x i64>, <8 x i64>* %V, align 64 195 ret <8 x i64> %0 196 } 197 198 define <8 x i64> @test_v8i64(<8 x i64>* %V) { 199 ; KNL-LABEL: test_v8i64: 200 ; KNL: # BB#0: # %entry 201 ; KNL-NEXT: vmovdqu64 (%rdi), %zmm0 202 ; KNL-NEXT: retq 203 entry: 204 %0 = load <8 x i64>, <8 x i64>* %V, align 4 205 ret <8 x i64> %0 206 } 207