1 ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s 2 ; PR14824. The test is presented by Jiangning Liu. If the ld/st optimization algorithm is changed, this test case may fail. 3 ; Also if the machine code for ld/st optimizor is changed, this test case may fail. If so, remove this test. 4 5 define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind { 6 ; CHECK: sample_test 7 ; CHECK-NOT: vldmia 8 ; CHECK: add 9 entry: 10 11 ; Load %source 12 %s0 = load <8 x i64> * %source, align 64 13 %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6 14 %s120 = load <8 x i64> * %arrayidx64, align 64 15 %s122 = bitcast <8 x i64> %s120 to i512 16 %data.i.i677.48.extract.shift = lshr i512 %s122, 384 17 %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64 18 %s123 = insertelement <8 x i64> undef, i64 %data.i.i677.48.extract.trunc, i32 0 19 %data.i.i677.32.extract.shift = lshr i512 %s122, 256 20 %data.i.i677.32.extract.trunc = trunc i512 %data.i.i677.32.extract.shift to i64 21 %s124 = insertelement <8 x i64> %s123, i64 %data.i.i677.32.extract.trunc, i32 1 22 %data.i.i677.16.extract.shift = lshr i512 %s122, 128 23 %data.i.i677.16.extract.trunc = trunc i512 %data.i.i677.16.extract.shift to i64 24 %s125 = insertelement <8 x i64> %s124, i64 %data.i.i677.16.extract.trunc, i32 2 25 %data.i.i677.56.extract.shift = lshr i512 %s122, 448 26 %data.i.i677.56.extract.trunc = trunc i512 %data.i.i677.56.extract.shift to i64 27 %s126 = insertelement <8 x i64> %s125, i64 %data.i.i677.56.extract.trunc, i32 3 28 %data.i.i677.24.extract.shift = lshr i512 %s122, 192 29 %data.i.i677.24.extract.trunc = trunc i512 %data.i.i677.24.extract.shift to i64 30 %s127 = insertelement <8 x i64> %s126, i64 %data.i.i677.24.extract.trunc, i32 4 31 %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5 32 %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6 33 %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7 34 35 ; Load %secondSource 36 %s1 = load <8 x i64> * %secondSource, align 64 37 %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6 38 %s121 = load <8 x i64> * %arrayidx67, align 64 39 %s131 = bitcast <8 x i64> %s121 to i512 40 %data.i1.i676.48.extract.shift = lshr i512 %s131, 384 41 %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64 42 %s132 = insertelement <8 x i64> undef, i64 %data.i1.i676.48.extract.trunc, i32 0 43 %data.i1.i676.32.extract.shift = lshr i512 %s131, 256 44 %data.i1.i676.32.extract.trunc = trunc i512 %data.i1.i676.32.extract.shift to i64 45 %s133 = insertelement <8 x i64> %s132, i64 %data.i1.i676.32.extract.trunc, i32 1 46 %data.i1.i676.16.extract.shift = lshr i512 %s131, 128 47 %data.i1.i676.16.extract.trunc = trunc i512 %data.i1.i676.16.extract.shift to i64 48 %s134 = insertelement <8 x i64> %s133, i64 %data.i1.i676.16.extract.trunc, i32 2 49 %data.i1.i676.56.extract.shift = lshr i512 %s131, 448 50 %data.i1.i676.56.extract.trunc = trunc i512 %data.i1.i676.56.extract.shift to i64 51 %s135 = insertelement <8 x i64> %s134, i64 %data.i1.i676.56.extract.trunc, i32 3 52 %data.i1.i676.24.extract.shift = lshr i512 %s131, 192 53 %data.i1.i676.24.extract.trunc = trunc i512 %data.i1.i676.24.extract.shift to i64 54 %s136 = insertelement <8 x i64> %s135, i64 %data.i1.i676.24.extract.trunc, i32 4 55 %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5 56 %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6 57 %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7 58 59 ; Operations about %Source and %secondSource 60 %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef> 61 %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef> 62 %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef> 63 %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 64 %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6 65 store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64 66 %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7 67 %s141 = load <8 x i64> * %arrayidx78, align 64 68 %s151 = bitcast <8 x i64> %s141 to i512 69 %data.i1.i649.32.extract.shift = lshr i512 %s151, 256 70 %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64 71 %s152 = insertelement <8 x i64> undef, i64 %data.i1.i649.32.extract.trunc, i32 0 72 %s153 = insertelement <8 x i64> %s152, i64 %data.i1.i649.32.extract.trunc, i32 1 73 %data.i1.i649.16.extract.shift = lshr i512 %s151, 128 74 %data.i1.i649.16.extract.trunc = trunc i512 %data.i1.i649.16.extract.shift to i64 75 %s154 = insertelement <8 x i64> %s153, i64 %data.i1.i649.16.extract.trunc, i32 2 76 %data.i1.i649.8.extract.shift = lshr i512 %s151, 64 77 %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64 78 %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3 79 %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7 80 store <8 x i64> %s155, <8 x i64> * %arrayidx83, align 64 81 ret void 82 } 83