Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mcpu=core2 < %s | FileCheck %s
      2 ; ModuleID = '<stdin>'
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
      4 target triple = "x86_64-apple-macosx10.6.6"
      5 
      6 %0 = type { double }
      7 %union.anon = type { float }
      8 
      9 define i32 @double_signbit(double %d1) nounwind uwtable readnone ssp {
     10 entry:
     11   %__x.addr.i = alloca double, align 8
     12   %__u.i = alloca %0, align 8
     13   %0 = bitcast double* %__x.addr.i to i8*
     14   %1 = bitcast %0* %__u.i to i8*
     15   store double %d1, double* %__x.addr.i, align 8
     16   %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
     17   store double %d1, double* %__f.i, align 8
     18   %tmp = bitcast double %d1 to i64
     19 ; CHECK-NOT: shr
     20 ; CHECK: movmskpd
     21 ; CHECK-NEXT: and
     22   %tmp1 = lshr i64 %tmp, 63
     23   %shr.i = trunc i64 %tmp1 to i32
     24   ret i32 %shr.i
     25 }
     26 
     27 define i32 @double_add_signbit(double %d1, double %d2) nounwind uwtable readnone ssp {
     28 entry:
     29   %__x.addr.i = alloca double, align 8
     30   %__u.i = alloca %0, align 8
     31   %add = fadd double %d1, %d2
     32   %0 = bitcast double* %__x.addr.i to i8*
     33   %1 = bitcast %0* %__u.i to i8*
     34   store double %add, double* %__x.addr.i, align 8
     35   %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
     36   store double %add, double* %__f.i, align 8
     37   %tmp = bitcast double %add to i64
     38 ; CHECK-NOT: shr
     39 ; CHECK: movmskpd
     40 ; CHECK-NEXT: and
     41   %tmp1 = lshr i64 %tmp, 63
     42   %shr.i = trunc i64 %tmp1 to i32
     43   ret i32 %shr.i
     44 }
     45 
     46 define i32 @float_signbit(float %f1) nounwind uwtable readnone ssp {
     47 entry:
     48   %__x.addr.i = alloca float, align 4
     49   %__u.i = alloca %union.anon, align 4
     50   %0 = bitcast float* %__x.addr.i to i8*
     51   %1 = bitcast %union.anon* %__u.i to i8*
     52   store float %f1, float* %__x.addr.i, align 4
     53   %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
     54   store float %f1, float* %__f.i, align 4
     55   %2 = bitcast float %f1 to i32
     56 ; CHECK-NOT: shr
     57 ; CHECK: movmskps
     58 ; CHECK-NEXT: and
     59   %shr.i = lshr i32 %2, 31
     60   ret i32 %shr.i
     61 }
     62 
     63 define i32 @float_add_signbit(float %f1, float %f2) nounwind uwtable readnone ssp {
     64 entry:
     65   %__x.addr.i = alloca float, align 4
     66   %__u.i = alloca %union.anon, align 4
     67   %add = fadd float %f1, %f2
     68   %0 = bitcast float* %__x.addr.i to i8*
     69   %1 = bitcast %union.anon* %__u.i to i8*
     70   store float %add, float* %__x.addr.i, align 4
     71   %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
     72   store float %add, float* %__f.i, align 4
     73   %2 = bitcast float %add to i32
     74 ; CHECK-NOT: shr
     75 ; CHECK: movmskps
     76 ; CHECK-NEXT: and
     77   %shr.i = lshr i32 %2, 31
     78   ret i32 %shr.i
     79 }
     80 
     81 ; rdar://10247336
     82 ; movmskp{s|d} only set low 4/2 bits, high bits are known zero
     83 
     84 define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
     85 entry:
     86 ; CHECK: t1:
     87 ; CHECK: movmskps
     88 ; CHECK-NOT: movslq
     89   %0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
     90   %idxprom = sext i32 %0 to i64
     91   %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
     92   %1 = load i32* %arrayidx, align 4
     93   ret i32 %1
     94 }
     95 
     96 define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
     97 entry:
     98 ; CHECK: t2:
     99 ; CHECK: movmskpd
    100 ; CHECK-NOT: movslq
    101   %0 = bitcast <4 x float> %x to <2 x double>
    102   %1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
    103   %idxprom = sext i32 %1 to i64
    104   %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
    105   %2 = load i32* %arrayidx, align 4
    106   ret i32 %2
    107 }
    108 
    109 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
    110 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
    111