Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=i686-windows -mattr=+sse2 < %s | FileCheck %s
      2 
      3 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
      4 target triple = "i686-pc-windows-msvc"
      5 
      6 ; There is a conflict between lowering the X86 memory intrinsics and the "base"
      7 ; register used to address stack locals.  See X86RegisterInfo::hasBaseRegister
      8 ; for when this is necessary. Typically, we chose ESI for the base register,
      9 ; which all of the X86 string instructions use.
     10 
     11 declare void @escape_vla_and_icmp(i8*, i1 zeroext)
     12 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
     13 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
     14 
     15 define i32 @memcpy_novla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
     16   %foo = alloca <4 x i32>, align 16
     17   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %a, i8* align 4 %b, i32 128, i1 false)
     18   br i1 %cond, label %spill_vectors, label %no_vectors
     19 
     20 no_vectors:
     21   ret i32 0
     22 
     23 spill_vectors:
     24   %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
     25   %v0 = load <4 x i32>, <4 x i32>* %vp0
     26   %v1 = load <4 x i32>, <4 x i32>* %vp1
     27   %vicmp = icmp slt <4 x i32> %v0, %v1
     28   %icmp = extractelement <4 x i1> %vicmp, i32 0
     29   call void @escape_vla_and_icmp(i8* null, i1 zeroext %icmp)
     30   %r = extractelement <4 x i32> %v0, i32 0
     31   ret i32 %r
     32 }
     33 
     34 ; CHECK-LABEL: _memcpy_novla_vector:
     35 ; CHECK: andl $-16, %esp
     36 ; CHECK-DAG: movl $32, %ecx
     37 ; CHECK-DAG: movl {{.*}}, %esi
     38 ; CHECK-DAG: movl {{.*}}, %edi
     39 ; CHECK: rep;movsl
     40 
     41 define i32 @memcpy_vla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
     42   %foo = alloca <4 x i32>, align 16
     43   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %a, i8* align 4 %b, i32 128, i1 false)
     44   br i1 %cond, label %spill_vectors, label %no_vectors
     45 
     46 no_vectors:
     47   ret i32 0
     48 
     49 spill_vectors:
     50   %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
     51   %v0 = load <4 x i32>, <4 x i32>* %vp0
     52   %v1 = load <4 x i32>, <4 x i32>* %vp1
     53   %vicmp = icmp slt <4 x i32> %v0, %v1
     54   %icmp = extractelement <4 x i1> %vicmp, i32 0
     55   %vla = alloca i8, i32 %n
     56   call void @escape_vla_and_icmp(i8* %vla, i1 zeroext %icmp)
     57   %r = extractelement <4 x i32> %v0, i32 0
     58   ret i32 %r
     59 }
     60 
     61 ; CHECK-LABEL: _memcpy_vla_vector:
     62 ; CHECK: andl $-16, %esp
     63 ; CHECK: movl %esp, %esi
     64 ; CHECK: pushl $128
     65 ; CHECK: calll _memcpy
     66 ; CHECK: calll __chkstk
     67 
     68 ; stosd doesn't clobber esi, so we can use it.
     69 
     70 define i32 @memset_vla_vector(<4 x i32>* %vp0, i8* %a, i32 %n, i1 zeroext %cond) {
     71   %foo = alloca <4 x i32>, align 16
     72   call void @llvm.memset.p0i8.i32(i8* align 4 %a, i8 42, i32 128, i1 false)
     73   br i1 %cond, label %spill_vectors, label %no_vectors
     74 
     75 no_vectors:
     76   ret i32 0
     77 
     78 spill_vectors:
     79   %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
     80   %v0 = load <4 x i32>, <4 x i32>* %vp0
     81   %v1 = load <4 x i32>, <4 x i32>* %vp1
     82   %vicmp = icmp slt <4 x i32> %v0, %v1
     83   %icmp = extractelement <4 x i1> %vicmp, i32 0
     84   %vla = alloca i8, i32 %n
     85   call void @escape_vla_and_icmp(i8* %vla, i1 zeroext %icmp)
     86   %r = extractelement <4 x i32> %v0, i32 0
     87   ret i32 %r
     88 }
     89 
     90 ; CHECK-LABEL: _memset_vla_vector:
     91 ; CHECK: andl $-16, %esp
     92 ; CHECK: movl %esp, %esi
     93 ; CHECK-DAG: movl $707406378, %eax        # imm = 0x2A2A2A2A
     94 ; CHECK-DAG: movl $32, %ecx
     95 ; CHECK-DAG: movl {{.*}}, %edi
     96 ; CHECK-NOT: movl {{.*}}, %esi
     97 ; CHECK: rep;stosl
     98 
     99 ; Add a test for memcmp if we ever add a special lowering for it.
    100