Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
      3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
      4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
      5 
      6 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      7 
      8 @src64 = common global [4 x i64] zeroinitializer, align 32
      9 @dst64 = common global [4 x i64] zeroinitializer, align 32
     10 @src32 = common global [8 x i32] zeroinitializer, align 32
     11 @dst32 = common global [8 x i32] zeroinitializer, align 32
     12 @src16 = common global [16 x i16] zeroinitializer, align 32
     13 @dst16 = common global [16 x i16] zeroinitializer, align 32
     14 @src8  = common global [32 x i8] zeroinitializer, align 32
     15 @dst8  = common global [32 x i8] zeroinitializer, align 32
     16 
     17 declare i64 @llvm.cttz.i64(i64, i1)
     18 declare i32 @llvm.cttz.i32(i32, i1)
     19 declare i16 @llvm.cttz.i16(i16, i1)
     20 declare  i8 @llvm.cttz.i8(i8, i1)
     21 
     22 ;
     23 ; CTTZ
     24 ;
     25 
     26 define void @cttz_2i64() #0 {
     27 ; CHECK-LABEL: @cttz_2i64(
     28 ; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
     29 ; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
     30 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
     31 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
     32 ; CHECK-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
     33 ; CHECK-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
     34 ; CHECK-NEXT:    ret void
     35 ;
     36   %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
     37   %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
     38   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
     39   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
     40   store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
     41   store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
     42   ret void
     43 }
     44 
     45 define void @cttz_4i64() #0 {
     46 ; CHECK-LABEL: @cttz_4i64(
     47 ; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
     48 ; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
     49 ; CHECK-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
     50 ; CHECK-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
     51 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
     52 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
     53 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 false)
     54 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 false)
     55 ; CHECK-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
     56 ; CHECK-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
     57 ; CHECK-NEXT:    store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
     58 ; CHECK-NEXT:    store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
     59 ; CHECK-NEXT:    ret void
     60 ;
     61   %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
     62   %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
     63   %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
     64   %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
     65   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
     66   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
     67   %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 0)
     68   %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 0)
     69   store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
     70   store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
     71   store i64 %cttz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
     72   store i64 %cttz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
     73   ret void
     74 }
     75 
     76 define void @cttz_4i32() #0 {
     77 ; CHECK-LABEL: @cttz_4i32(
     78 ; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
     79 ; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
     80 ; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
     81 ; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
     82 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
     83 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
     84 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
     85 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
     86 ; CHECK-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
     87 ; CHECK-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
     88 ; CHECK-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
     89 ; CHECK-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
     90 ; CHECK-NEXT:    ret void
     91 ;
     92   %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
     93   %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
     94   %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
     95   %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
     96   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
     97   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
     98   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
     99   %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 0)
    100   store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
    101   store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
    102   store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
    103   store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
    104   ret void
    105 }
    106 
    107 define void @cttz_8i32() #0 {
    108 ; CHECK-LABEL: @cttz_8i32(
    109 ; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
    110 ; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
    111 ; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
    112 ; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
    113 ; CHECK-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
    114 ; CHECK-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
    115 ; CHECK-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
    116 ; CHECK-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
    117 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
    118 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
    119 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
    120 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
    121 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false)
    122 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
    123 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
    124 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
    125 ; CHECK-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
    126 ; CHECK-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
    127 ; CHECK-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
    128 ; CHECK-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
    129 ; CHECK-NEXT:    store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
    130 ; CHECK-NEXT:    store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
    131 ; CHECK-NEXT:    store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
    132 ; CHECK-NEXT:    store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
    133 ; CHECK-NEXT:    ret void
    134 ;
    135   %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
    136   %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
    137   %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
    138   %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
    139   %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
    140   %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
    141   %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
    142   %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
    143   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
    144   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
    145   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
    146   %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 0)
    147   %cttz4 = call i32 @llvm.cttz.i32(i32 %ld4, i1 0)
    148   %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 0)
    149   %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 0)
    150   %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 0)
    151   store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
    152   store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
    153   store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
    154   store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
    155   store i32 %cttz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
    156   store i32 %cttz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
    157   store i32 %cttz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
    158   store i32 %cttz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
    159   ret void
    160 }
    161 
    162 define void @cttz_8i16() #0 {
    163 ; CHECK-LABEL: @cttz_8i16(
    164 ; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
    165 ; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
    166 ; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
    167 ; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
    168 ; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
    169 ; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
    170 ; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
    171 ; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
    172 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 false)
    173 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 false)
    174 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 false)
    175 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 false)
    176 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 false)
    177 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 false)
    178 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 false)
    179 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 false)
    180 ; CHECK-NEXT:    store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
    181 ; CHECK-NEXT:    store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
    182 ; CHECK-NEXT:    store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
    183 ; CHECK-NEXT:    store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
    184 ; CHECK-NEXT:    store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
    185 ; CHECK-NEXT:    store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
    186 ; CHECK-NEXT:    store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
    187 ; CHECK-NEXT:    store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
    188 ; CHECK-NEXT:    ret void
    189 ;
    190   %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
    191   %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
    192   %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
    193   %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
    194   %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
    195   %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
    196   %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
    197   %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
    198   %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
    199   %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
    200   %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
    201   %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 0)
    202   %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 0)
    203   %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 0)
    204   %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 0)
    205   %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 0)
    206   store i16 %cttz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
    207   store i16 %cttz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
    208   store i16 %cttz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
    209   store i16 %cttz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
    210   store i16 %cttz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
    211   store i16 %cttz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
    212   store i16 %cttz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
    213   store i16 %cttz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
    214   ret void
    215 }
    216 
    217 define void @cttz_16i16() #0 {
    218 ; CHECK-LABEL: @cttz_16i16(
    219 ; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
    220 ; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
    221 ; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
    222 ; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
    223 ; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
    224 ; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
    225 ; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
    226 ; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
    227 ; CHECK-NEXT:    [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
    228 ; CHECK-NEXT:    [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
    229 ; CHECK-NEXT:    [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
    230 ; CHECK-NEXT:    [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
    231 ; CHECK-NEXT:    [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
    232 ; CHECK-NEXT:    [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
    233 ; CHECK-NEXT:    [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
    234 ; CHECK-NEXT:    [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
    235 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 false)
    236 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 false)
    237 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 false)
    238 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 false)
    239 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 false)
    240 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 false)
    241 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 false)
    242 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 false)
    243 ; CHECK-NEXT:    [[CTTZ8:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD8]], i1 false)
    244 ; CHECK-NEXT:    [[CTTZ9:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD9]], i1 false)
    245 ; CHECK-NEXT:    [[CTTZ10:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD10]], i1 false)
    246 ; CHECK-NEXT:    [[CTTZ11:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD11]], i1 false)
    247 ; CHECK-NEXT:    [[CTTZ12:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD12]], i1 false)
    248 ; CHECK-NEXT:    [[CTTZ13:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD13]], i1 false)
    249 ; CHECK-NEXT:    [[CTTZ14:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD14]], i1 false)
    250 ; CHECK-NEXT:    [[CTTZ15:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD15]], i1 false)
    251 ; CHECK-NEXT:    store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
    252 ; CHECK-NEXT:    store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
    253 ; CHECK-NEXT:    store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
    254 ; CHECK-NEXT:    store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
    255 ; CHECK-NEXT:    store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
    256 ; CHECK-NEXT:    store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
    257 ; CHECK-NEXT:    store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
    258 ; CHECK-NEXT:    store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
    259 ; CHECK-NEXT:    store i16 [[CTTZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
    260 ; CHECK-NEXT:    store i16 [[CTTZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
    261 ; CHECK-NEXT:    store i16 [[CTTZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
    262 ; CHECK-NEXT:    store i16 [[CTTZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
    263 ; CHECK-NEXT:    store i16 [[CTTZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
    264 ; CHECK-NEXT:    store i16 [[CTTZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
    265 ; CHECK-NEXT:    store i16 [[CTTZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
    266 ; CHECK-NEXT:    store i16 [[CTTZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
    267 ; CHECK-NEXT:    ret void
    268 ;
    269   %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
    270   %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
    271   %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
    272   %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
    273   %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
    274   %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
    275   %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
    276   %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
    277   %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
    278   %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
    279   %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
    280   %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
    281   %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
    282   %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
    283   %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
    284   %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
    285   %cttz0  = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
    286   %cttz1  = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
    287   %cttz2  = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
    288   %cttz3  = call i16 @llvm.cttz.i16(i16 %ld3, i1 0)
    289   %cttz4  = call i16 @llvm.cttz.i16(i16 %ld4, i1 0)
    290   %cttz5  = call i16 @llvm.cttz.i16(i16 %ld5, i1 0)
    291   %cttz6  = call i16 @llvm.cttz.i16(i16 %ld6, i1 0)
    292   %cttz7  = call i16 @llvm.cttz.i16(i16 %ld7, i1 0)
    293   %cttz8  = call i16 @llvm.cttz.i16(i16 %ld8, i1 0)
    294   %cttz9  = call i16 @llvm.cttz.i16(i16 %ld9, i1 0)
    295   %cttz10 = call i16 @llvm.cttz.i16(i16 %ld10, i1 0)
    296   %cttz11 = call i16 @llvm.cttz.i16(i16 %ld11, i1 0)
    297   %cttz12 = call i16 @llvm.cttz.i16(i16 %ld12, i1 0)
    298   %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 0)
    299   %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 0)
    300   %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 0)
    301   store i16 %cttz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
    302   store i16 %cttz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
    303   store i16 %cttz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
    304   store i16 %cttz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
    305   store i16 %cttz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
    306   store i16 %cttz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
    307   store i16 %cttz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
    308   store i16 %cttz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
    309   store i16 %cttz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
    310   store i16 %cttz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
    311   store i16 %cttz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
    312   store i16 %cttz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
    313   store i16 %cttz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
    314   store i16 %cttz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
    315   store i16 %cttz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
    316   store i16 %cttz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
    317   ret void
    318 }
    319 
    320 define void @cttz_16i8() #0 {
    321 ; CHECK-LABEL: @cttz_16i8(
    322 ; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
    323 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
    324 ; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
    325 ; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
    326 ; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
    327 ; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
    328 ; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
    329 ; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
    330 ; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
    331 ; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
    332 ; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
    333 ; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
    334 ; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
    335 ; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
    336 ; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
    337 ; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
    338 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 false)
    339 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 false)
    340 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 false)
    341 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 false)
    342 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 false)
    343 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 false)
    344 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 false)
    345 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 false)
    346 ; CHECK-NEXT:    [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 false)
    347 ; CHECK-NEXT:    [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 false)
    348 ; CHECK-NEXT:    [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 false)
    349 ; CHECK-NEXT:    [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 false)
    350 ; CHECK-NEXT:    [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 false)
    351 ; CHECK-NEXT:    [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 false)
    352 ; CHECK-NEXT:    [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 false)
    353 ; CHECK-NEXT:    [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 false)
    354 ; CHECK-NEXT:    store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
    355 ; CHECK-NEXT:    store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
    356 ; CHECK-NEXT:    store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
    357 ; CHECK-NEXT:    store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
    358 ; CHECK-NEXT:    store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
    359 ; CHECK-NEXT:    store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
    360 ; CHECK-NEXT:    store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
    361 ; CHECK-NEXT:    store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
    362 ; CHECK-NEXT:    store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
    363 ; CHECK-NEXT:    store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
    364 ; CHECK-NEXT:    store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
    365 ; CHECK-NEXT:    store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
    366 ; CHECK-NEXT:    store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
    367 ; CHECK-NEXT:    store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
    368 ; CHECK-NEXT:    store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
    369 ; CHECK-NEXT:    store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
    370 ; CHECK-NEXT:    ret void
    371 ;
    372   %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
    373   %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
    374   %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
    375   %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
    376   %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
    377   %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
    378   %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
    379   %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
    380   %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
    381   %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
    382   %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
    383   %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
    384   %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
    385   %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
    386   %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
    387   %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
    388   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
    389   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
    390   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
    391   %cttz3  = call i8 @llvm.cttz.i8(i8 %ld3, i1 0)
    392   %cttz4  = call i8 @llvm.cttz.i8(i8 %ld4, i1 0)
    393   %cttz5  = call i8 @llvm.cttz.i8(i8 %ld5, i1 0)
    394   %cttz6  = call i8 @llvm.cttz.i8(i8 %ld6, i1 0)
    395   %cttz7  = call i8 @llvm.cttz.i8(i8 %ld7, i1 0)
    396   %cttz8  = call i8 @llvm.cttz.i8(i8 %ld8, i1 0)
    397   %cttz9  = call i8 @llvm.cttz.i8(i8 %ld9, i1 0)
    398   %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 0)
    399   %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 0)
    400   %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 0)
    401   %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 0)
    402   %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 0)
    403   %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 0)
    404   store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
    405   store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
    406   store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
    407   store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
    408   store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
    409   store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
    410   store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
    411   store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
    412   store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
    413   store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
    414   store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
    415   store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
    416   store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
    417   store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
    418   store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
    419   store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
    420   ret void
    421 }
    422 
    423 define void @cttz_32i8() #0 {
    424 ; CHECK-LABEL: @cttz_32i8(
    425 ; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
    426 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
    427 ; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
    428 ; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
    429 ; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
    430 ; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
    431 ; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
    432 ; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
    433 ; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
    434 ; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
    435 ; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
    436 ; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
    437 ; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
    438 ; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
    439 ; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
    440 ; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
    441 ; CHECK-NEXT:    [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
    442 ; CHECK-NEXT:    [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
    443 ; CHECK-NEXT:    [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
    444 ; CHECK-NEXT:    [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
    445 ; CHECK-NEXT:    [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
    446 ; CHECK-NEXT:    [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
    447 ; CHECK-NEXT:    [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
    448 ; CHECK-NEXT:    [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
    449 ; CHECK-NEXT:    [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
    450 ; CHECK-NEXT:    [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
    451 ; CHECK-NEXT:    [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
    452 ; CHECK-NEXT:    [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
    453 ; CHECK-NEXT:    [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
    454 ; CHECK-NEXT:    [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
    455 ; CHECK-NEXT:    [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
    456 ; CHECK-NEXT:    [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
    457 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 false)
    458 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 false)
    459 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 false)
    460 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 false)
    461 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 false)
    462 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 false)
    463 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 false)
    464 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 false)
    465 ; CHECK-NEXT:    [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 false)
    466 ; CHECK-NEXT:    [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 false)
    467 ; CHECK-NEXT:    [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 false)
    468 ; CHECK-NEXT:    [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 false)
    469 ; CHECK-NEXT:    [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 false)
    470 ; CHECK-NEXT:    [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 false)
    471 ; CHECK-NEXT:    [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 false)
    472 ; CHECK-NEXT:    [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 false)
    473 ; CHECK-NEXT:    [[CTTZ16:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD16]], i1 false)
    474 ; CHECK-NEXT:    [[CTTZ17:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD17]], i1 false)
    475 ; CHECK-NEXT:    [[CTTZ18:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD18]], i1 false)
    476 ; CHECK-NEXT:    [[CTTZ19:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD19]], i1 false)
    477 ; CHECK-NEXT:    [[CTTZ20:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD20]], i1 false)
    478 ; CHECK-NEXT:    [[CTTZ21:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD21]], i1 false)
    479 ; CHECK-NEXT:    [[CTTZ22:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD22]], i1 false)
    480 ; CHECK-NEXT:    [[CTTZ23:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD23]], i1 false)
    481 ; CHECK-NEXT:    [[CTTZ24:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD24]], i1 false)
    482 ; CHECK-NEXT:    [[CTTZ25:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD25]], i1 false)
    483 ; CHECK-NEXT:    [[CTTZ26:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD26]], i1 false)
    484 ; CHECK-NEXT:    [[CTTZ27:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD27]], i1 false)
    485 ; CHECK-NEXT:    [[CTTZ28:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD28]], i1 false)
    486 ; CHECK-NEXT:    [[CTTZ29:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD29]], i1 false)
    487 ; CHECK-NEXT:    [[CTTZ30:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD30]], i1 false)
    488 ; CHECK-NEXT:    [[CTTZ31:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD31]], i1 false)
    489 ; CHECK-NEXT:    store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
    490 ; CHECK-NEXT:    store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
    491 ; CHECK-NEXT:    store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
    492 ; CHECK-NEXT:    store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
    493 ; CHECK-NEXT:    store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
    494 ; CHECK-NEXT:    store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
    495 ; CHECK-NEXT:    store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
    496 ; CHECK-NEXT:    store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
    497 ; CHECK-NEXT:    store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
    498 ; CHECK-NEXT:    store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
    499 ; CHECK-NEXT:    store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
    500 ; CHECK-NEXT:    store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
    501 ; CHECK-NEXT:    store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
    502 ; CHECK-NEXT:    store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
    503 ; CHECK-NEXT:    store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
    504 ; CHECK-NEXT:    store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
    505 ; CHECK-NEXT:    store i8 [[CTTZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
    506 ; CHECK-NEXT:    store i8 [[CTTZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
    507 ; CHECK-NEXT:    store i8 [[CTTZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
    508 ; CHECK-NEXT:    store i8 [[CTTZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
    509 ; CHECK-NEXT:    store i8 [[CTTZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
    510 ; CHECK-NEXT:    store i8 [[CTTZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
    511 ; CHECK-NEXT:    store i8 [[CTTZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
    512 ; CHECK-NEXT:    store i8 [[CTTZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
    513 ; CHECK-NEXT:    store i8 [[CTTZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
    514 ; CHECK-NEXT:    store i8 [[CTTZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
    515 ; CHECK-NEXT:    store i8 [[CTTZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
    516 ; CHECK-NEXT:    store i8 [[CTTZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
    517 ; CHECK-NEXT:    store i8 [[CTTZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
    518 ; CHECK-NEXT:    store i8 [[CTTZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
    519 ; CHECK-NEXT:    store i8 [[CTTZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
    520 ; CHECK-NEXT:    store i8 [[CTTZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
    521 ; CHECK-NEXT:    ret void
    522 ;
    523   %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
    524   %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
    525   %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
    526   %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
    527   %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
    528   %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
    529   %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
    530   %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
    531   %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
    532   %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
    533   %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
    534   %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
    535   %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
    536   %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
    537   %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
    538   %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
    539   %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
    540   %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
    541   %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
    542   %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
    543   %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
    544   %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
    545   %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
    546   %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
    547   %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
    548   %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
    549   %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
    550   %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
    551   %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
    552   %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
    553   %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
    554   %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
    555   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
    556   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
    557   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
    558   %cttz3  = call i8 @llvm.cttz.i8(i8 %ld3, i1 0)
    559   %cttz4  = call i8 @llvm.cttz.i8(i8 %ld4, i1 0)
    560   %cttz5  = call i8 @llvm.cttz.i8(i8 %ld5, i1 0)
    561   %cttz6  = call i8 @llvm.cttz.i8(i8 %ld6, i1 0)
    562   %cttz7  = call i8 @llvm.cttz.i8(i8 %ld7, i1 0)
    563   %cttz8  = call i8 @llvm.cttz.i8(i8 %ld8, i1 0)
    564   %cttz9  = call i8 @llvm.cttz.i8(i8 %ld9, i1 0)
    565   %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 0)
    566   %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 0)
    567   %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 0)
    568   %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 0)
    569   %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 0)
    570   %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 0)
    571   %cttz16 = call i8 @llvm.cttz.i8(i8 %ld16, i1 0)
    572   %cttz17 = call i8 @llvm.cttz.i8(i8 %ld17, i1 0)
    573   %cttz18 = call i8 @llvm.cttz.i8(i8 %ld18, i1 0)
    574   %cttz19 = call i8 @llvm.cttz.i8(i8 %ld19, i1 0)
    575   %cttz20 = call i8 @llvm.cttz.i8(i8 %ld20, i1 0)
    576   %cttz21 = call i8 @llvm.cttz.i8(i8 %ld21, i1 0)
    577   %cttz22 = call i8 @llvm.cttz.i8(i8 %ld22, i1 0)
    578   %cttz23 = call i8 @llvm.cttz.i8(i8 %ld23, i1 0)
    579   %cttz24 = call i8 @llvm.cttz.i8(i8 %ld24, i1 0)
    580   %cttz25 = call i8 @llvm.cttz.i8(i8 %ld25, i1 0)
    581   %cttz26 = call i8 @llvm.cttz.i8(i8 %ld26, i1 0)
    582   %cttz27 = call i8 @llvm.cttz.i8(i8 %ld27, i1 0)
    583   %cttz28 = call i8 @llvm.cttz.i8(i8 %ld28, i1 0)
    584   %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 0)
    585   %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 0)
    586   %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 0)
    587   store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
    588   store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
    589   store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
    590   store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
    591   store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
    592   store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
    593   store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
    594   store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
    595   store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
    596   store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
    597   store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
    598   store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
    599   store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
    600   store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
    601   store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
    602   store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
    603   store i8 %cttz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
    604   store i8 %cttz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
    605   store i8 %cttz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
    606   store i8 %cttz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
    607   store i8 %cttz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
    608   store i8 %cttz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
    609   store i8 %cttz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
    610   store i8 %cttz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
    611   store i8 %cttz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
    612   store i8 %cttz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
    613   store i8 %cttz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
    614   store i8 %cttz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
    615   store i8 %cttz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
    616   store i8 %cttz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
    617   store i8 %cttz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
    618   store i8 %cttz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
    619   ret void
    620 }
    621 
    622 ;
    623 ; CTTZ_ZERO_UNDEF
    624 ;
    625 
    626 define void @cttz_undef_2i64() #0 {
    627 ; CHECK-LABEL: @cttz_undef_2i64(
    628 ; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
    629 ; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
    630 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
    631 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
    632 ; CHECK-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
    633 ; CHECK-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
    634 ; CHECK-NEXT:    ret void
    635 ;
    636   %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
    637   %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
    638   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
    639   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
    640   store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
    641   store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
    642   ret void
    643 }
    644 
    645 define void @cttz_undef_4i64() #0 {
    646 ; CHECK-LABEL: @cttz_undef_4i64(
    647 ; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
    648 ; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
    649 ; CHECK-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
    650 ; CHECK-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
    651 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
    652 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
    653 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 true)
    654 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 true)
    655 ; CHECK-NEXT:    store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
    656 ; CHECK-NEXT:    store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
    657 ; CHECK-NEXT:    store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
    658 ; CHECK-NEXT:    store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
    659 ; CHECK-NEXT:    ret void
    660 ;
    661   %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
    662   %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
    663   %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
    664   %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
    665   %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
    666   %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
    667   %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 -1)
    668   %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 -1)
    669   store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
    670   store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
    671   store i64 %cttz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
    672   store i64 %cttz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
    673   ret void
    674 }
    675 
    676 define void @cttz_undef_4i32() #0 {
    677 ; CHECK-LABEL: @cttz_undef_4i32(
    678 ; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
    679 ; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
    680 ; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
    681 ; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
    682 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
    683 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
    684 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
    685 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
    686 ; CHECK-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
    687 ; CHECK-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
    688 ; CHECK-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
    689 ; CHECK-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
    690 ; CHECK-NEXT:    ret void
    691 ;
    692   %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
    693   %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
    694   %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
    695   %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
    696   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
    697   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
    698   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
    699   %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 -1)
    700   store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
    701   store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
    702   store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
    703   store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
    704   ret void
    705 }
    706 
    707 define void @cttz_undef_8i32() #0 {
    708 ; CHECK-LABEL: @cttz_undef_8i32(
    709 ; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
    710 ; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
    711 ; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
    712 ; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
    713 ; CHECK-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
    714 ; CHECK-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
    715 ; CHECK-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
    716 ; CHECK-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
    717 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
    718 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
    719 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
    720 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
    721 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 true)
    722 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
    723 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
    724 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
    725 ; CHECK-NEXT:    store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
    726 ; CHECK-NEXT:    store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
    727 ; CHECK-NEXT:    store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
    728 ; CHECK-NEXT:    store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
    729 ; CHECK-NEXT:    store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
    730 ; CHECK-NEXT:    store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
    731 ; CHECK-NEXT:    store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
    732 ; CHECK-NEXT:    store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
    733 ; CHECK-NEXT:    ret void
    734 ;
    735   %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
    736   %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
    737   %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
    738   %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
    739   %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
    740   %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
    741   %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
    742   %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
    743   %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
    744   %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
    745   %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
    746   %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 -1)
    747   %cttz4 = call i32 @llvm.cttz.i32(i32 %ld4, i1 -1)
    748   %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 -1)
    749   %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 -1)
    750   %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 -1)
    751   store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
    752   store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
    753   store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
    754   store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
    755   store i32 %cttz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
    756   store i32 %cttz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
    757   store i32 %cttz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
    758   store i32 %cttz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
    759   ret void
    760 }
    761 
    762 define void @cttz_undef_8i16() #0 {
    763 ; CHECK-LABEL: @cttz_undef_8i16(
    764 ; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
    765 ; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
    766 ; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
    767 ; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
    768 ; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
    769 ; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
    770 ; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
    771 ; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
    772 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 true)
    773 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 true)
    774 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 true)
    775 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 true)
    776 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 true)
    777 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 true)
    778 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 true)
    779 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 true)
    780 ; CHECK-NEXT:    store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
    781 ; CHECK-NEXT:    store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
    782 ; CHECK-NEXT:    store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
    783 ; CHECK-NEXT:    store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
    784 ; CHECK-NEXT:    store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
    785 ; CHECK-NEXT:    store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
    786 ; CHECK-NEXT:    store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
    787 ; CHECK-NEXT:    store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
    788 ; CHECK-NEXT:    ret void
    789 ;
    790   %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
    791   %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
    792   %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
    793   %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
    794   %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
    795   %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
    796   %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
    797   %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
    798   %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
    799   %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
    800   %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
    801   %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 -1)
    802   %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 -1)
    803   %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 -1)
    804   %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 -1)
    805   %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 -1)
    806   store i16 %cttz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
    807   store i16 %cttz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
    808   store i16 %cttz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
    809   store i16 %cttz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
    810   store i16 %cttz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
    811   store i16 %cttz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
    812   store i16 %cttz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
    813   store i16 %cttz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
    814   ret void
    815 }
    816 
    817 define void @cttz_undef_16i16() #0 {
    818 ; CHECK-LABEL: @cttz_undef_16i16(
    819 ; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
    820 ; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
    821 ; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
    822 ; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
    823 ; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
    824 ; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
    825 ; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
    826 ; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
    827 ; CHECK-NEXT:    [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
    828 ; CHECK-NEXT:    [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
    829 ; CHECK-NEXT:    [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
    830 ; CHECK-NEXT:    [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
    831 ; CHECK-NEXT:    [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
    832 ; CHECK-NEXT:    [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
    833 ; CHECK-NEXT:    [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
    834 ; CHECK-NEXT:    [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
    835 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 true)
    836 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 true)
    837 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 true)
    838 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 true)
    839 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 true)
    840 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 true)
    841 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 true)
    842 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 true)
    843 ; CHECK-NEXT:    [[CTTZ8:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD8]], i1 true)
    844 ; CHECK-NEXT:    [[CTTZ9:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD9]], i1 true)
    845 ; CHECK-NEXT:    [[CTTZ10:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD10]], i1 true)
    846 ; CHECK-NEXT:    [[CTTZ11:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD11]], i1 true)
    847 ; CHECK-NEXT:    [[CTTZ12:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD12]], i1 true)
    848 ; CHECK-NEXT:    [[CTTZ13:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD13]], i1 true)
    849 ; CHECK-NEXT:    [[CTTZ14:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD14]], i1 true)
    850 ; CHECK-NEXT:    [[CTTZ15:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD15]], i1 true)
    851 ; CHECK-NEXT:    store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
    852 ; CHECK-NEXT:    store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
    853 ; CHECK-NEXT:    store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
    854 ; CHECK-NEXT:    store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
    855 ; CHECK-NEXT:    store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
    856 ; CHECK-NEXT:    store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
    857 ; CHECK-NEXT:    store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
    858 ; CHECK-NEXT:    store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
    859 ; CHECK-NEXT:    store i16 [[CTTZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
    860 ; CHECK-NEXT:    store i16 [[CTTZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
    861 ; CHECK-NEXT:    store i16 [[CTTZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
    862 ; CHECK-NEXT:    store i16 [[CTTZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
    863 ; CHECK-NEXT:    store i16 [[CTTZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
    864 ; CHECK-NEXT:    store i16 [[CTTZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
    865 ; CHECK-NEXT:    store i16 [[CTTZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
    866 ; CHECK-NEXT:    store i16 [[CTTZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
    867 ; CHECK-NEXT:    ret void
    868 ;
    869   %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
    870   %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
    871   %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
    872   %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
    873   %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
    874   %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
    875   %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
    876   %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
    877   %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
    878   %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
    879   %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
    880   %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
    881   %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
    882   %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
    883   %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
    884   %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
    885   %cttz0  = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
    886   %cttz1  = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
    887   %cttz2  = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
    888   %cttz3  = call i16 @llvm.cttz.i16(i16 %ld3, i1 -1)
    889   %cttz4  = call i16 @llvm.cttz.i16(i16 %ld4, i1 -1)
    890   %cttz5  = call i16 @llvm.cttz.i16(i16 %ld5, i1 -1)
    891   %cttz6  = call i16 @llvm.cttz.i16(i16 %ld6, i1 -1)
    892   %cttz7  = call i16 @llvm.cttz.i16(i16 %ld7, i1 -1)
    893   %cttz8  = call i16 @llvm.cttz.i16(i16 %ld8, i1 -1)
    894   %cttz9  = call i16 @llvm.cttz.i16(i16 %ld9, i1 -1)
    895   %cttz10 = call i16 @llvm.cttz.i16(i16 %ld10, i1 -1)
    896   %cttz11 = call i16 @llvm.cttz.i16(i16 %ld11, i1 -1)
    897   %cttz12 = call i16 @llvm.cttz.i16(i16 %ld12, i1 -1)
    898   %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 -1)
    899   %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 -1)
    900   %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 -1)
    901   store i16 %cttz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
    902   store i16 %cttz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
    903   store i16 %cttz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
    904   store i16 %cttz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
    905   store i16 %cttz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
    906   store i16 %cttz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
    907   store i16 %cttz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
    908   store i16 %cttz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
    909   store i16 %cttz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
    910   store i16 %cttz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
    911   store i16 %cttz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
    912   store i16 %cttz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
    913   store i16 %cttz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
    914   store i16 %cttz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
    915   store i16 %cttz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
    916   store i16 %cttz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
    917   ret void
    918 }
    919 
    920 define void @cttz_undef_16i8() #0 {
    921 ; CHECK-LABEL: @cttz_undef_16i8(
    922 ; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
    923 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
    924 ; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
    925 ; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
    926 ; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
    927 ; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
    928 ; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
    929 ; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
    930 ; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
    931 ; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
    932 ; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
    933 ; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
    934 ; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
    935 ; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
    936 ; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
    937 ; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
    938 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 true)
    939 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 true)
    940 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 true)
    941 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 true)
    942 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 true)
    943 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 true)
    944 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 true)
    945 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 true)
    946 ; CHECK-NEXT:    [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 true)
    947 ; CHECK-NEXT:    [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 true)
    948 ; CHECK-NEXT:    [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 true)
    949 ; CHECK-NEXT:    [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 true)
    950 ; CHECK-NEXT:    [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 true)
    951 ; CHECK-NEXT:    [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 true)
    952 ; CHECK-NEXT:    [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 true)
    953 ; CHECK-NEXT:    [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 true)
    954 ; CHECK-NEXT:    store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
    955 ; CHECK-NEXT:    store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
    956 ; CHECK-NEXT:    store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
    957 ; CHECK-NEXT:    store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
    958 ; CHECK-NEXT:    store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
    959 ; CHECK-NEXT:    store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
    960 ; CHECK-NEXT:    store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
    961 ; CHECK-NEXT:    store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
    962 ; CHECK-NEXT:    store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
    963 ; CHECK-NEXT:    store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
    964 ; CHECK-NEXT:    store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
    965 ; CHECK-NEXT:    store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
    966 ; CHECK-NEXT:    store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
    967 ; CHECK-NEXT:    store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
    968 ; CHECK-NEXT:    store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
    969 ; CHECK-NEXT:    store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
    970 ; CHECK-NEXT:    ret void
    971 ;
    972   %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
    973   %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
    974   %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
    975   %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
    976   %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
    977   %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
    978   %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
    979   %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
    980   %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
    981   %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
    982   %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
    983   %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
    984   %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
    985   %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
    986   %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
    987   %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
    988   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
    989   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
    990   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
    991   %cttz3  = call i8 @llvm.cttz.i8(i8 %ld3, i1 -1)
    992   %cttz4  = call i8 @llvm.cttz.i8(i8 %ld4, i1 -1)
    993   %cttz5  = call i8 @llvm.cttz.i8(i8 %ld5, i1 -1)
    994   %cttz6  = call i8 @llvm.cttz.i8(i8 %ld6, i1 -1)
    995   %cttz7  = call i8 @llvm.cttz.i8(i8 %ld7, i1 -1)
    996   %cttz8  = call i8 @llvm.cttz.i8(i8 %ld8, i1 -1)
    997   %cttz9  = call i8 @llvm.cttz.i8(i8 %ld9, i1 -1)
    998   %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 -1)
    999   %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 -1)
   1000   %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 -1)
   1001   %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 -1)
   1002   %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 -1)
   1003   %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 -1)
   1004   store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
   1005   store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
   1006   store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
   1007   store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
   1008   store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
   1009   store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
   1010   store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
   1011   store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
   1012   store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
   1013   store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
   1014   store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
   1015   store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
   1016   store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
   1017   store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
   1018   store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
   1019   store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
   1020   ret void
   1021 }
   1022 
   1023 define void @cttz_undef_32i8() #0 {
   1024 ; CHECK-LABEL: @cttz_undef_32i8(
   1025 ; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
   1026 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
   1027 ; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
   1028 ; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
   1029 ; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
   1030 ; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
   1031 ; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
   1032 ; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
   1033 ; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
   1034 ; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
   1035 ; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
   1036 ; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
   1037 ; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
   1038 ; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
   1039 ; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
   1040 ; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
   1041 ; CHECK-NEXT:    [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
   1042 ; CHECK-NEXT:    [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
   1043 ; CHECK-NEXT:    [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
   1044 ; CHECK-NEXT:    [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
   1045 ; CHECK-NEXT:    [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
   1046 ; CHECK-NEXT:    [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
   1047 ; CHECK-NEXT:    [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
   1048 ; CHECK-NEXT:    [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
   1049 ; CHECK-NEXT:    [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
   1050 ; CHECK-NEXT:    [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
   1051 ; CHECK-NEXT:    [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
   1052 ; CHECK-NEXT:    [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
   1053 ; CHECK-NEXT:    [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
   1054 ; CHECK-NEXT:    [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
   1055 ; CHECK-NEXT:    [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
   1056 ; CHECK-NEXT:    [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
   1057 ; CHECK-NEXT:    [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 true)
   1058 ; CHECK-NEXT:    [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 true)
   1059 ; CHECK-NEXT:    [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 true)
   1060 ; CHECK-NEXT:    [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 true)
   1061 ; CHECK-NEXT:    [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 true)
   1062 ; CHECK-NEXT:    [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 true)
   1063 ; CHECK-NEXT:    [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 true)
   1064 ; CHECK-NEXT:    [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 true)
   1065 ; CHECK-NEXT:    [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 true)
   1066 ; CHECK-NEXT:    [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 true)
   1067 ; CHECK-NEXT:    [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 true)
   1068 ; CHECK-NEXT:    [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 true)
   1069 ; CHECK-NEXT:    [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 true)
   1070 ; CHECK-NEXT:    [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 true)
   1071 ; CHECK-NEXT:    [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 true)
   1072 ; CHECK-NEXT:    [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 true)
   1073 ; CHECK-NEXT:    [[CTTZ16:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD16]], i1 true)
   1074 ; CHECK-NEXT:    [[CTTZ17:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD17]], i1 true)
   1075 ; CHECK-NEXT:    [[CTTZ18:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD18]], i1 true)
   1076 ; CHECK-NEXT:    [[CTTZ19:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD19]], i1 true)
   1077 ; CHECK-NEXT:    [[CTTZ20:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD20]], i1 true)
   1078 ; CHECK-NEXT:    [[CTTZ21:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD21]], i1 true)
   1079 ; CHECK-NEXT:    [[CTTZ22:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD22]], i1 true)
   1080 ; CHECK-NEXT:    [[CTTZ23:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD23]], i1 true)
   1081 ; CHECK-NEXT:    [[CTTZ24:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD24]], i1 true)
   1082 ; CHECK-NEXT:    [[CTTZ25:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD25]], i1 true)
   1083 ; CHECK-NEXT:    [[CTTZ26:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD26]], i1 true)
   1084 ; CHECK-NEXT:    [[CTTZ27:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD27]], i1 true)
   1085 ; CHECK-NEXT:    [[CTTZ28:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD28]], i1 true)
   1086 ; CHECK-NEXT:    [[CTTZ29:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD29]], i1 true)
   1087 ; CHECK-NEXT:    [[CTTZ30:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD30]], i1 true)
   1088 ; CHECK-NEXT:    [[CTTZ31:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD31]], i1 true)
   1089 ; CHECK-NEXT:    store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
   1090 ; CHECK-NEXT:    store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
   1091 ; CHECK-NEXT:    store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
   1092 ; CHECK-NEXT:    store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
   1093 ; CHECK-NEXT:    store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
   1094 ; CHECK-NEXT:    store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
   1095 ; CHECK-NEXT:    store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
   1096 ; CHECK-NEXT:    store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
   1097 ; CHECK-NEXT:    store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
   1098 ; CHECK-NEXT:    store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
   1099 ; CHECK-NEXT:    store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
   1100 ; CHECK-NEXT:    store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
   1101 ; CHECK-NEXT:    store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
   1102 ; CHECK-NEXT:    store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
   1103 ; CHECK-NEXT:    store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
   1104 ; CHECK-NEXT:    store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
   1105 ; CHECK-NEXT:    store i8 [[CTTZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
   1106 ; CHECK-NEXT:    store i8 [[CTTZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
   1107 ; CHECK-NEXT:    store i8 [[CTTZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
   1108 ; CHECK-NEXT:    store i8 [[CTTZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
   1109 ; CHECK-NEXT:    store i8 [[CTTZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
   1110 ; CHECK-NEXT:    store i8 [[CTTZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
   1111 ; CHECK-NEXT:    store i8 [[CTTZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
   1112 ; CHECK-NEXT:    store i8 [[CTTZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
   1113 ; CHECK-NEXT:    store i8 [[CTTZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
   1114 ; CHECK-NEXT:    store i8 [[CTTZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
   1115 ; CHECK-NEXT:    store i8 [[CTTZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
   1116 ; CHECK-NEXT:    store i8 [[CTTZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
   1117 ; CHECK-NEXT:    store i8 [[CTTZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
   1118 ; CHECK-NEXT:    store i8 [[CTTZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
   1119 ; CHECK-NEXT:    store i8 [[CTTZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
   1120 ; CHECK-NEXT:    store i8 [[CTTZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
   1121 ; CHECK-NEXT:    ret void
   1122 ;
   1123   %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
   1124   %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
   1125   %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
   1126   %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
   1127   %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
   1128   %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
   1129   %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
   1130   %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
   1131   %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
   1132   %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
   1133   %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
   1134   %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
   1135   %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
   1136   %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
   1137   %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
   1138   %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
   1139   %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
   1140   %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
   1141   %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
   1142   %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
   1143   %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
   1144   %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
   1145   %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
   1146   %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
   1147   %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
   1148   %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
   1149   %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
   1150   %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
   1151   %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
   1152   %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
   1153   %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
   1154   %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
   1155   %cttz0  = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
   1156   %cttz1  = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
   1157   %cttz2  = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
   1158   %cttz3  = call i8 @llvm.cttz.i8(i8 %ld3, i1 -1)
   1159   %cttz4  = call i8 @llvm.cttz.i8(i8 %ld4, i1 -1)
   1160   %cttz5  = call i8 @llvm.cttz.i8(i8 %ld5, i1 -1)
   1161   %cttz6  = call i8 @llvm.cttz.i8(i8 %ld6, i1 -1)
   1162   %cttz7  = call i8 @llvm.cttz.i8(i8 %ld7, i1 -1)
   1163   %cttz8  = call i8 @llvm.cttz.i8(i8 %ld8, i1 -1)
   1164   %cttz9  = call i8 @llvm.cttz.i8(i8 %ld9, i1 -1)
   1165   %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 -1)
   1166   %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 -1)
   1167   %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 -1)
   1168   %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 -1)
   1169   %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 -1)
   1170   %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 -1)
   1171   %cttz16 = call i8 @llvm.cttz.i8(i8 %ld16, i1 -1)
   1172   %cttz17 = call i8 @llvm.cttz.i8(i8 %ld17, i1 -1)
   1173   %cttz18 = call i8 @llvm.cttz.i8(i8 %ld18, i1 -1)
   1174   %cttz19 = call i8 @llvm.cttz.i8(i8 %ld19, i1 -1)
   1175   %cttz20 = call i8 @llvm.cttz.i8(i8 %ld20, i1 -1)
   1176   %cttz21 = call i8 @llvm.cttz.i8(i8 %ld21, i1 -1)
   1177   %cttz22 = call i8 @llvm.cttz.i8(i8 %ld22, i1 -1)
   1178   %cttz23 = call i8 @llvm.cttz.i8(i8 %ld23, i1 -1)
   1179   %cttz24 = call i8 @llvm.cttz.i8(i8 %ld24, i1 -1)
   1180   %cttz25 = call i8 @llvm.cttz.i8(i8 %ld25, i1 -1)
   1181   %cttz26 = call i8 @llvm.cttz.i8(i8 %ld26, i1 -1)
   1182   %cttz27 = call i8 @llvm.cttz.i8(i8 %ld27, i1 -1)
   1183   %cttz28 = call i8 @llvm.cttz.i8(i8 %ld28, i1 -1)
   1184   %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 -1)
   1185   %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 -1)
   1186   %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 -1)
   1187   store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
   1188   store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
   1189   store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
   1190   store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
   1191   store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
   1192   store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
   1193   store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
   1194   store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
   1195   store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
   1196   store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
   1197   store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
   1198   store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
   1199   store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
   1200   store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
   1201   store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
   1202   store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
   1203   store i8 %cttz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
   1204   store i8 %cttz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
   1205   store i8 %cttz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
   1206   store i8 %cttz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
   1207   store i8 %cttz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
   1208   store i8 %cttz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
   1209   store i8 %cttz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
   1210   store i8 %cttz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
   1211   store i8 %cttz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
   1212   store i8 %cttz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
   1213   store i8 %cttz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
   1214   store i8 %cttz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
   1215   store i8 %cttz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
   1216   store i8 %cttz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
   1217   store i8 %cttz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
   1218   store i8 %cttz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
   1219   ret void
   1220 }
   1221 
   1222 attributes #0 = { nounwind }
   1223