Home | History | Annotate | Download | only in OpenMP
      1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
      2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
      3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
      4 // REQUIRES: x86-registered-target
      5 // expected-no-diagnostics
      6 #ifndef HEADER
      7 #define HEADER
      8 
      9 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
     10 // CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
     11 // CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
     12 void static_not_chunked(float *a, float *b, float *c, float *d) {
     13 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
     14   #pragma omp for schedule(static) ordered
     15 // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
     16 //
     17 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
     18 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
     19 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
     20 
     21 // Loop header
     22 // CHECK: [[O_LOOP1_BODY]]
     23 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
     24 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
     25 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
     26 
     27 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
     28 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
     29 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
     30   for (int i = 32000000; i > 33; i += -7) {
     31 // CHECK: [[LOOP1_BODY]]
     32 // Start of body: calculate i from IV:
     33 // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]
     34 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 7
     35 // CHECK-NEXT: [[CALC_I_2:%.+]] = sub nsw i32 32000000, [[CALC_I_1]]
     36 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
     37 
     38 // ... start of ordered region ...
     39 // CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
     40 // ... loop body ...
     41 // End of body: store into a[i]:
     42 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
     43 // CHECK-NOT: !llvm.mem.parallel_loop_access
     44 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
     45 // ... end of ordered region ...
     46     #pragma omp ordered
     47     a[i] = b[i] * c[i] * d[i];
     48 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
     49 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
     50 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
     51 // CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
     52 // CHECK-NEXT: br label %{{.+}}
     53   }
     54 // CHECK: [[LOOP1_END]]
     55 // CHECK: [[O_LOOP1_END]]
     56 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
     57 // CHECK: ret void
     58 }
     59 
     60 // CHECK-LABEL: define {{.*void}} @{{.*}}dynamic1{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
     61 void dynamic1(float *a, float *b, float *c, float *d) {
     62 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
     63   #pragma omp for schedule(dynamic) ordered
     64 // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
     65 //
     66 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
     67 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
     68 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
     69 
     70 // Loop header
     71 // CHECK: [[O_LOOP1_BODY]]
     72 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
     73 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
     74 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
     75 
     76 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
     77 // CHECK-NEXT: [[CMP:%.+]] = icmp ule i64 [[IV]], [[UB]]
     78 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
     79   for (unsigned long long i = 131071; i < 2147483647; i += 127) {
     80 // CHECK: [[LOOP1_BODY]]
     81 // Start of body: calculate i from IV:
     82 // CHECK: [[IV1_1:%.+]] = load i64, i64* [[OMP_IV]]
     83 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 127
     84 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 131071, [[CALC_I_1]]
     85 // CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
     86 
     87 // ... start of ordered region ...
     88 // CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
     89 // ... loop body ...
     90 // End of body: store into a[i]:
     91 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
     92 // CHECK-NOT: !llvm.mem.parallel_loop_access
     93 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
     94 // ... end of ordered region ...
     95     #pragma omp ordered threads
     96     a[i] = b[i] * c[i] * d[i];
     97 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
     98 // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
     99 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
    100 
    101 // ... end iteration for ordered loop ...
    102 // CHECK-NEXT: call void @__kmpc_dispatch_fini_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
    103 // CHECK-NEXT: br label %{{.+}}
    104   }
    105 // CHECK: [[LOOP1_END]]
    106 // CHECK: [[O_LOOP1_END]]
    107 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
    108 // CHECK: ret void
    109 }
    110 
    111 // CHECK-LABEL: define {{.*void}} @{{.*}}test_auto{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
    112 void test_auto(float *a, float *b, float *c, float *d) {
    113   unsigned int x = 0;
    114   unsigned int y = 0;
    115 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
    116   #pragma omp for schedule(auto) collapse(2) ordered
    117 // CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 70, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1)
    118 //
    119 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
    120 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
    121 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
    122 
    123 // Loop header
    124 // CHECK: [[O_LOOP1_BODY]]
    125 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
    126 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
    127 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
    128 
    129 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
    130 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB]]
    131 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
    132 // FIXME: When the iteration count of some nested loop is not a known constant,
    133 // we should pre-calculate it, like we do for the total number of iterations!
    134   for (char i = static_cast<char>(y); i <= '9'; ++i)
    135     for (x = 11; x > 0; --x) {
    136 // CHECK: [[LOOP1_BODY]]
    137 // Start of body: indices are calculated from IV:
    138 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
    139 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
    140 
    141 // ... start of ordered region ...
    142 // CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
    143 // ... loop body ...
    144 // End of body: store into a[i]:
    145 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
    146 // CHECK-NOT: !llvm.mem.parallel_loop_access
    147 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
    148 // ... end of ordered region ...
    149     #pragma omp ordered
    150     a[i] = b[i] * c[i] * d[i];
    151 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
    152 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1
    153 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
    154 
    155 // ... end iteration for ordered loop ...
    156 // CHECK-NEXT: call void @__kmpc_dispatch_fini_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
    157 // CHECK-NEXT: br label %{{.+}}
    158   }
    159 // CHECK: [[LOOP1_END]]
    160 // CHECK: [[O_LOOP1_END]]
    161 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
    162 // CHECK: ret void
    163 }
    164 
    165 // CHECK-LABEL: define {{.*void}} @{{.*}}runtime{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
    166 void runtime(float *a, float *b, float *c, float *d) {
    167   int x = 0;
    168 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
    169   #pragma omp for collapse(2) schedule(runtime) ordered
    170 // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 69, i32 0, i32 199, i32 1, i32 1)
    171 //
    172 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
    173 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
    174 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
    175 
    176 // Loop header
    177 // CHECK: [[O_LOOP1_BODY]]
    178 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
    179 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
    180 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
    181 
    182 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
    183 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
    184 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
    185   for (unsigned char i = '0' ; i <= '9'; ++i)
    186     for (x = -10; x < 10; ++x) {
    187 // CHECK: [[LOOP1_BODY]]
    188 // Start of body: indices are calculated from IV:
    189 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
    190 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
    191 
    192 // ... start of ordered region ...
    193 // CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
    194 // ... loop body ...
    195 // End of body: store into a[i]:
    196 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
    197 // CHECK-NOT: !llvm.mem.parallel_loop_access
    198 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
    199 // ... end of ordered region ...
    200     #pragma omp ordered threads
    201     a[i] = b[i] * c[i] * d[i];
    202 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
    203 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
    204 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
    205 
    206 // ... end iteration for ordered loop ...
    207 // CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
    208 // CHECK-NEXT: br label %{{.+}}
    209   }
    210 // CHECK: [[LOOP1_END]]
    211 // CHECK: [[O_LOOP1_END]]
    212 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
    213 // CHECK: ret void
    214 }
    215 
    216 float f[10];
    217 // CHECK-LABEL: foo_simd
    218 void foo_simd(int low, int up) {
    219   // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access !
    220   // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access !
    221 #pragma omp simd
    222   for (int i = low; i < up; ++i) {
    223     f[i] = 0.0;
    224 #pragma omp ordered simd
    225     f[i] = 1.0;
    226   }
    227   // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}
    228   // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}
    229 #pragma omp for simd ordered
    230   for (int i = low; i < up; ++i) {
    231     f[i] = 0.0;
    232 #pragma omp ordered simd
    233     f[i] = 1.0;
    234   }
    235 }
    236 
    237 // CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) #
    238 // CHECK: store float 1.000000e+00, float* %{{.+}}, align
    239 // CHECK-NEXT: ret void
    240 
    241 #endif // HEADER
    242 
    243