Home | History | Annotate | Download | only in X86
      1 ; Intel chips with slow unaligned memory accesses
      2 
      3 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3      2>&1 | FileCheck %s --check-prefix=SLOW
      4 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m     2>&1 | FileCheck %s --check-prefix=SLOW
      5 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m     2>&1 | FileCheck %s --check-prefix=SLOW
      6 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4      2>&1 | FileCheck %s --check-prefix=SLOW
      7 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m     2>&1 | FileCheck %s --check-prefix=SLOW
      8 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah         2>&1 | FileCheck %s --check-prefix=SLOW
      9 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott      2>&1 | FileCheck %s --check-prefix=SLOW
     10 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona        2>&1 | FileCheck %s --check-prefix=SLOW
     11 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2         2>&1 | FileCheck %s --check-prefix=SLOW
     12 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn        2>&1 | FileCheck %s --check-prefix=SLOW
     13 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell       2>&1 | FileCheck %s --check-prefix=SLOW
     14 
     15 ; Intel chips with fast unaligned memory accesses
     16 
     17 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont    2>&1 | FileCheck %s --check-prefix=FAST
     18 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem       2>&1 | FileCheck %s --check-prefix=FAST
     19 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere      2>&1 | FileCheck %s --check-prefix=FAST
     20 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge   2>&1 | FileCheck %s --check-prefix=FAST
     21 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge     2>&1 | FileCheck %s --check-prefix=FAST
     22 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell       2>&1 | FileCheck %s --check-prefix=FAST
     23 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell     2>&1 | FileCheck %s --check-prefix=FAST
     24 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl           2>&1 | FileCheck %s --check-prefix=FAST
     25 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake       2>&1 | FileCheck %s --check-prefix=FAST
     26 
     27 ; AMD chips with slow unaligned memory accesses
     28 
     29 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4      2>&1 | FileCheck %s --check-prefix=SLOW
     30 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp     2>&1 | FileCheck %s --check-prefix=SLOW
     31 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8            2>&1 | FileCheck %s --check-prefix=SLOW
     32 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron       2>&1 | FileCheck %s --check-prefix=SLOW
     33 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64      2>&1 | FileCheck %s --check-prefix=SLOW
     34 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx     2>&1 | FileCheck %s --check-prefix=SLOW
     35 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3       2>&1 | FileCheck %s --check-prefix=SLOW
     36 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3  2>&1 | FileCheck %s --check-prefix=SLOW
     37 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
     38 
     39 ; AMD chips with fast unaligned memory accesses
     40 
     41 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10      2>&1 | FileCheck %s --check-prefix=FAST
     42 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona     2>&1 | FileCheck %s --check-prefix=FAST
     43 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1        2>&1 | FileCheck %s --check-prefix=FAST
     44 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2        2>&1 | FileCheck %s --check-prefix=FAST
     45 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1        2>&1 | FileCheck %s --check-prefix=FAST
     46 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2        2>&1 | FileCheck %s --check-prefix=FAST
     47 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
     48 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
     49 
     50 ; Other chips with slow unaligned memory accesses
     51 
     52 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2          2>&1 | FileCheck %s --check-prefix=SLOW
     53 
     54 ; Verify that the slow/fast unaligned memory attribute is set correctly for each CPU model.
     55 ; Slow chips use 4-byte stores. Fast chips with SSE or later use something other than 4-byte stores.
     56 ; Chips that don't have SSE use 4-byte stores either way, so they're not tested.
     57 
     58 ; Also verify that SSE4.2 or SSE4a imply fast unaligned accesses.
     59 
     60 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2       2>&1 | FileCheck %s --check-prefix=FAST
     61 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a        2>&1 | FileCheck %s --check-prefix=FAST
     62 
     63 define void @store_zeros(i8* %a) {
     64 ; SLOW-NOT: not a recognized processor
     65 ; SLOW-LABEL: store_zeros:
     66 ; SLOW:       # BB#0:
     67 ; SLOW-NEXT:    movl
     68 ; SLOW-NEXT:    movl
     69 ; SLOW-NEXT:    movl
     70 ; SLOW-NEXT:    movl
     71 ; SLOW-NEXT:    movl
     72 ; SLOW-NEXT:    movl
     73 ; SLOW-NEXT:    movl
     74 ; SLOW-NEXT:    movl
     75 ; SLOW-NEXT:    movl
     76 ; SLOW-NEXT:    movl
     77 ; SLOW-NEXT:    movl
     78 ; SLOW-NEXT:    movl
     79 ; SLOW-NEXT:    movl
     80 ; SLOW-NEXT:    movl
     81 ; SLOW-NEXT:    movl
     82 ; SLOW-NEXT:    movl
     83 ; SLOW-NEXT:    movl
     84 ;
     85 ; FAST-NOT: not a recognized processor
     86 ; FAST-LABEL: store_zeros:
     87 ; FAST:       # BB#0:
     88 ; FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
     89 ; FAST-NOT:     movl
     90   call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false)
     91   ret void
     92 }
     93 
     94 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
     95 
     96