Home | History | Annotate | Download | only in OpenMP
      1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
      2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
      3 // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
      4 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
      5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
      6 // RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
      7 // expected-no-diagnostics
      8 #ifndef HEADER
      9 #define HEADER
     10 
     11 
     12 // CHECK-DAG: [[GA:@.+]] = global double 1.000000e+00
     13 // CHECK-DAG: [[GB:@.+]] = global double 2.000000e+00
     14 // CHECK-DAG: [[GC:@.+]] = global double 3.000000e+00
     15 // CHECK-DAG: [[GD:@.+]] = global double 4.000000e+00
     16 // CHECK-DAG: [[FA:@.+]] = internal global float 5.000000e+00
     17 // CHECK-DAG: [[FB:@.+]] = internal global float 6.000000e+00
     18 // CHECK-DAG: [[FC:@.+]] = internal global float 7.000000e+00
     19 // CHECK-DAG: [[FD:@.+]] = internal global float 8.000000e+00
     20 // CHECK-DAG: [[BA:@.+]] = internal global float 9.000000e+00
     21 // CHECK-DAG: [[BB:@.+]] = internal global float 1.000000e+01
     22 // CHECK-DAG: [[BC:@.+]] = internal global float 1.100000e+01
     23 // CHECK-DAG: [[BD:@.+]] = internal global float 1.200000e+01
     24 // CHECK-DAG: [[TBA:@.+]] = {{.*}}global float 1.700000e+01
     25 // CHECK-DAG: [[TBB:@.+]] = {{.*}}global float 1.800000e+01
     26 // CHECK-DAG: [[TBC:@.+]] = {{.*}}global float 1.900000e+01
     27 // CHECK-DAG: [[TBD:@.+]] = {{.*}}global float 2.000000e+01
     28 
     29 double Ga = 1.0;
     30 double Gb = 2.0;
     31 double Gc = 3.0;
     32 double Gd = 4.0;
     33 
     34 // CHECK: define {{.*}} @{{.*}}foo{{.*}}(
     35 // CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
     36 // CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
     37 // CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
     38 // CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
     39 // CHECK: [[LA:%.+]] = alloca i16
     40 // CHECK: [[LB:%.+]] = alloca i16
     41 // CHECK: [[LC:%.+]] = alloca i16
     42 // CHECK: [[LD:%.+]] = alloca i16
     43 int foo(short a, short b, short c, short d){
     44   static float Sa = 5.0;
     45   static float Sb = 6.0;
     46   static float Sc = 7.0;
     47   static float Sd = 8.0;
     48 
     49   // CHECK-DAG:    [[VALLB:%.+]] = load i16, i16* [[LB]],
     50   // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* [[GB]],
     51   // CHECK-DAG:    [[VALFB:%.+]] = load float, float* [[FB]],
     52   // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* [[GC]],
     53   // CHECK-DAG:    [[VALLC:%.+]] = load i16, i16* [[LC]],
     54   // CHECK-DAG:    [[VALFC:%.+]] = load float, float* [[FC]],
     55   // CHECK-DAG:    [[VALLD:%.+]] = load i16, i16* [[LD]],
     56   // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* [[GD]],
     57   // CHECK-DAG:    [[VALFD:%.+]] = load float, float* [[FD]],
     58 
     59   // 3 local vars being captured.
     60 
     61   // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
     62   // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
     63   // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
     64   // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
     65   // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
     66   // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
     67 
     68   // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
     69   // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
     70   // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
     71   // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
     72   // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
     73   // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
     74 
     75   // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
     76   // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
     77   // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
     78   // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
     79   // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
     80   // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
     81 
     82   // 3 static vars being captured.
     83 
     84   // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
     85   // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
     86   // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
     87   // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
     88   // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
     89   // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
     90 
     91   // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]],
     92   // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float*
     93   // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]],
     94   // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8*
     95   // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]],
     96   // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
     97 
     98   // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]],
     99   // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float*
    100   // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]],
    101   // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8*
    102   // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]],
    103   // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    104 
    105   // 3 static global vars being captured.
    106 
    107   // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]],
    108   // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double*
    109   // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]],
    110   // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8*
    111   // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]],
    112   // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]],
    113   // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    114 
    115   // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]],
    116   // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double*
    117   // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]],
    118   // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8*
    119   // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]],
    120   // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]],
    121   // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    122 
    123   // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]],
    124   // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double*
    125   // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]],
    126   // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8*
    127   // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]],
    128   // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]],
    129   // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    130 
    131   // CHECK: call i32 @__tgt_target
    132   // CHECK: call void [[OFFLOADF:@.+]](
    133   // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
    134   #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
    135   {
    136     b += 1;
    137     Gb += 1.0;
    138     Sb += 1.0;
    139 
    140     // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}})
    141     // The parallel region only uses 3 captures.
    142     // CHECK:     call {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
    143     // CHECK:     call void @.omp_outlined.(i32* %{{.+}}, i32* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
    144     // Capture d, Gd, Sd,
    145 
    146     // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}},
    147     #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
    148     {
    149       d += 1;
    150       Gd += 1.0;
    151       Sd += 1.0;
    152     }
    153   }
    154   return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
    155 }
    156 
    157 // CHECK: define {{.*}} @{{.*}}bar{{.*}}(
    158 // CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
    159 // CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
    160 // CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
    161 // CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
    162 // CHECK: [[LA:%.+]] = alloca i16
    163 // CHECK: [[LB:%.+]] = alloca i16
    164 // CHECK: [[LC:%.+]] = alloca i16
    165 // CHECK: [[LD:%.+]] = alloca i16
    166 int bar(short a, short b, short c, short d){
    167   static float Sa = 9.0;
    168   static float Sb = 10.0;
    169   static float Sc = 11.0;
    170   static float Sd = 12.0;
    171 
    172   // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}})
    173   // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i16* dereferenceable(2) [[A:%.+]], i16* dereferenceable(2) [[B:%.+]], i16* dereferenceable(2) [[C:%.+]], i16* dereferenceable(2) [[D:%.+]])
    174   // Capture a, b, c, d
    175   // CHECK: [[ALLOCLA:%.+]] = alloca i16
    176   // CHECK: [[ALLOCLB:%.+]] = alloca i16
    177   // CHECK: [[ALLOCLC:%.+]] = alloca i16
    178   // CHECK: [[ALLOCLD:%.+]] = alloca i16
    179   // CHECK: [[LLA:%.+]] = load i16*, i16** [[ALLOCLA]],
    180   // CHECK: [[LLB:%.+]] = load i16*, i16** [[ALLOCLB]],
    181   // CHECK: [[LLC:%.+]] = load i16*, i16** [[ALLOCLC]],
    182   // CHECK: [[LLD:%.+]] = load i16*, i16** [[ALLOCLD]],
    183   #pragma omp parallel
    184   {
    185     // CHECK-DAG:    [[VALLB:%.+]] = load i16, i16* [[LLB]],
    186     // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* [[GB]],
    187     // CHECK-DAG:    [[VALFB:%.+]] = load float, float* [[BB]],
    188     // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* [[GC]],
    189     // CHECK-DAG:    [[VALLC:%.+]] = load i16, i16* [[LLC]],
    190     // CHECK-DAG:    [[VALFC:%.+]] = load float, float* [[BC]],
    191     // CHECK-DAG:    [[VALLD:%.+]] = load i16, i16* [[LLD]],
    192     // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* [[GD]],
    193     // CHECK-DAG:    [[VALFD:%.+]] = load float, float* [[BD]],
    194 
    195     // 3 local vars being captured.
    196 
    197     // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
    198     // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
    199     // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
    200     // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
    201     // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
    202     // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    203 
    204     // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
    205     // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
    206     // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
    207     // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
    208     // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
    209     // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    210 
    211     // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
    212     // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
    213     // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
    214     // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
    215     // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
    216     // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    217 
    218     // 3 static vars being captured.
    219 
    220     // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
    221     // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
    222     // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
    223     // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
    224     // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
    225     // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    226 
    227     // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]],
    228     // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float*
    229     // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]],
    230     // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8*
    231     // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]],
    232     // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    233 
    234     // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]],
    235     // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float*
    236     // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]],
    237     // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8*
    238     // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]],
    239     // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    240 
    241     // 3 static global vars being captured.
    242 
    243     // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]],
    244     // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double*
    245     // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]],
    246     // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8*
    247     // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]],
    248     // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]],
    249     // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    250 
    251     // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]],
    252     // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double*
    253     // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]],
    254     // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8*
    255     // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]],
    256     // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]],
    257     // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    258 
    259     // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]],
    260     // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double*
    261     // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]],
    262     // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8*
    263     // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]],
    264     // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]],
    265     // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    266 
    267     // CHECK: call i32 @__tgt_target
    268     // CHECK: call void [[OFFLOADF:@.+]](
    269     // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
    270     #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
    271     {
    272       b += 1;
    273       Gb += 1.0;
    274       Sb += 1.0;
    275 
    276       // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}})
    277       // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}})
    278 
    279       // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}})
    280       // Capture d, Gd, Sd
    281       #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
    282       {
    283         d += 1;
    284         Gd += 1.0;
    285         Sd += 1.0;
    286       }
    287     }
    288   }
    289   return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
    290 }
    291 
    292 ///
    293 /// Tests with template functions.
    294 ///
    295 
    296 // CHECK: define {{.*}} @{{.*}}tbar2{{.*}}(
    297 
    298 // CHECK: define {{.*}} @{{.*}}tbar{{.*}}(
    299 // CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
    300 // CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
    301 // CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
    302 // CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
    303 // CHECK: [[LA:%.+]] = alloca i16
    304 // CHECK: [[LB:%.+]] = alloca i16
    305 // CHECK: [[LC:%.+]] = alloca i16
    306 // CHECK: [[LD:%.+]] = alloca i16
    307 template<typename T>
    308 int tbar(T a, T b, T c, T d){
    309   static float Sa = 17.0;
    310   static float Sb = 18.0;
    311   static float Sc = 19.0;
    312   static float Sd = 20.0;
    313 
    314   // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}})
    315   // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i16* dereferenceable(2) [[A:%.+]], i16* dereferenceable(2) [[B:%.+]], i16* dereferenceable(2) [[C:%.+]], i16* dereferenceable(2) [[D:%.+]])
    316   // Capture a, b, c, d
    317   // CHECK: [[ALLOCLA:%.+]] = alloca i16
    318   // CHECK: [[ALLOCLB:%.+]] = alloca i16
    319   // CHECK: [[ALLOCLC:%.+]] = alloca i16
    320   // CHECK: [[ALLOCLD:%.+]] = alloca i16
    321   // CHECK: [[LLA:%.+]] = load i16*, i16** [[ALLOCLA]],
    322   // CHECK: [[LLB:%.+]] = load i16*, i16** [[ALLOCLB]],
    323   // CHECK: [[LLC:%.+]] = load i16*, i16** [[ALLOCLC]],
    324   // CHECK: [[LLD:%.+]] = load i16*, i16** [[ALLOCLD]],
    325   #pragma omp parallel
    326   {
    327     // CHECK-DAG:    [[VALLB:%.+]] = load i16, i16* [[LLB]],
    328     // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* [[GB]],
    329     // CHECK-DAG:    [[VALFB:%.+]] = load float, float* [[TBB]],
    330     // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* [[GC]],
    331     // CHECK-DAG:    [[VALLC:%.+]] = load i16, i16* [[LLC]],
    332     // CHECK-DAG:    [[VALFC:%.+]] = load float, float* [[TBC]],
    333     // CHECK-DAG:    [[VALLD:%.+]] = load i16, i16* [[LLD]],
    334     // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* [[GD]],
    335     // CHECK-DAG:    [[VALFD:%.+]] = load float, float* [[TBD]],
    336 
    337     // 3 local vars being captured.
    338 
    339     // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
    340     // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
    341     // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
    342     // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
    343     // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
    344     // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    345 
    346     // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
    347     // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
    348     // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
    349     // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
    350     // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
    351     // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    352 
    353     // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
    354     // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
    355     // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
    356     // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
    357     // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
    358     // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    359 
    360     // 3 static vars being captured.
    361 
    362     // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
    363     // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
    364     // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
    365     // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
    366     // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
    367     // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    368 
    369     // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]],
    370     // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float*
    371     // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]],
    372     // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8*
    373     // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]],
    374     // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    375 
    376     // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]],
    377     // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float*
    378     // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]],
    379     // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8*
    380     // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]],
    381     // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    382 
    383     // 3 static global vars being captured.
    384 
    385     // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]],
    386     // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double*
    387     // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]],
    388     // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8*
    389     // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]],
    390     // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]],
    391     // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    392 
    393     // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]],
    394     // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double*
    395     // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]],
    396     // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8*
    397     // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]],
    398     // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]],
    399     // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    400 
    401     // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]],
    402     // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double*
    403     // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]],
    404     // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8*
    405     // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]],
    406     // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]],
    407     // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
    408 
    409     // CHECK: call i32 @__tgt_target
    410     // CHECK: call void [[OFFLOADF:@.+]](
    411     // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
    412     #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
    413     {
    414       b += 1;
    415       Gb += 1.0;
    416       Sb += 1.0;
    417 
    418       // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}})
    419       // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}})
    420 
    421       // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}})
    422       // Capture d, Gd, Sd
    423       #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
    424       {
    425         d += 1;
    426         Gd += 1.0;
    427         Sd += 1.0;
    428       }
    429     }
    430   }
    431   return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
    432 }
    433 
    434 int tbar2(short a, short b, short c, short d){
    435   return tbar(a, b, c, d);
    436 }
    437 
    438 #endif
    439