Home | History | Annotate | Download | only in PowerPC
      1 ; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s
      2 
      3 ;; This test ensures that MergeConsecutiveStores does not attempt to
      4 ;; merge stores or loads when doing so would result in unaligned
      5 ;; memory operations (unless the target supports those, e.g. X86).
      6 
      7 ;; This issue happen in other situations for other targets, but PPC
      8 ;; with Altivec extensions was chosen for the test because it does not
      9 ;; support unaligned access with AltiVec instructions. If the 4
     10 ;; load/stores get merged to an v4i32 vector type severely bad code
     11 ;; gets generated: it painstakingly copies the values to a temporary
     12 ;; location on the stack, with vector ops, in order to then use
     13 ;; integer ops to load from the temporary stack location and store to
     14 ;; the final location. Yuck!
     15 
     16 %struct.X = type { i32, i32, i32, i32 }
     17 
     18 @fx = common global %struct.X zeroinitializer, align 4
     19 @fy = common global %struct.X zeroinitializer, align 4
     20 
     21 ;; In this test case, lvx and stvx instructions should NOT be
     22 ;; generated, as the alignment is not sufficient for it to be
     23 ;; worthwhile.
     24 
     25 ;; CHECK-LABEL: f:
     26 ;; CHECK:      lwzu
     27 ;; CHECK-NEXT: lwz
     28 ;; CHECK-NEXT: lwz
     29 ;; CHECK-NEXT: lwz
     30 ;; CHECK-NEXT: stwu
     31 ;; CHECK-NEXT: stw
     32 ;; CHECK-NEXT: stw
     33 ;; CHECK-NEXT: stw
     34 ;; CHECK-NEXT: blr
     35 define void @f() {
     36 entry:
     37   %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
     38   %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
     39   %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
     40   %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
     41   store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
     42   store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
     43   store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
     44   store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
     45   ret void
     46 }
     47 
     48 @gx = common global %struct.X zeroinitializer, align 16
     49 @gy = common global %struct.X zeroinitializer, align 16
     50 
     51 ;; In this test, lvx and stvx instructions SHOULD be generated, as
     52 ;; the 16-byte alignment of the new load/store is acceptable.
     53 ;; CHECK-LABEL: g:
     54 ;; CHECK: lvx
     55 ;; CHECK: stvx
     56 ;; CHECK: blr
     57 define void @g() {
     58 entry:
     59   %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
     60   %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
     61   %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
     62   %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
     63   store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
     64   store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
     65   store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
     66   store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
     67   ret void
     68 }
     69