Home | History | Annotate | Download | only in parallel
      1 // -*- C++ -*-
      2 
      3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the terms
      7 // of the GNU General Public License as published by the Free Software
      8 // Foundation; either version 3, or (at your option) any later
      9 // version.
     10 
     11 // This library is distributed in the hope that it will be useful, but
     12 // WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 // General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /** @file parallel/partition.h
     26  *  @brief Parallel implementation of std::partition(),
     27  *  std::nth_element(), and std::partial_sort().
     28  *  This file is a GNU parallel extension to the Standard C++ Library.
     29  */
     30 
     31 // Written by Johannes Singler and Felix Putze.
     32 
     33 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
     34 #define _GLIBCXX_PARALLEL_PARTITION_H 1
     35 
     36 #include <parallel/basic_iterator.h>
     37 #include <parallel/sort.h>
     38 #include <parallel/random_number.h>
     39 #include <bits/stl_algo.h>
     40 #include <parallel/parallel.h>
     41 
     42 /** @brief Decide whether to declare certain variables volatile. */
     43 #define _GLIBCXX_VOLATILE volatile
     44 
     45 namespace __gnu_parallel
     46 {
     47 /** @brief Parallel implementation of std::partition.
     48   *  @param begin Begin iterator of input sequence to split.
     49   *  @param end End iterator of input sequence to split.
     50   *  @param pred Partition predicate, possibly including some kind of pivot.
     51   *  @param num_threads Maximum number of threads to use for this task.
     52   *  @return Number of elements not fulfilling the predicate. */
     53 template<typename RandomAccessIterator, typename Predicate>
     54   typename std::iterator_traits<RandomAccessIterator>::difference_type
     55   parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
     56                      Predicate pred, thread_index_t num_threads)
     57   {
     58     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     59     typedef typename traits_type::value_type value_type;
     60     typedef typename traits_type::difference_type difference_type;
     61 
     62     difference_type n = end - begin;
     63 
     64     _GLIBCXX_CALL(n)
     65 
     66     const _Settings& __s = _Settings::get();
     67 
     68     // Shared.
     69     _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
     70     _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
     71     _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
     72 
     73     bool* reserved_left = NULL, * reserved_right = NULL;
     74 
     75     difference_type chunk_size = __s.partition_chunk_size;
     76 
     77     omp_lock_t result_lock;
     78     omp_init_lock(&result_lock);
     79 
     80     //at least two chunks per thread
     81     if(right - left + 1 >= 2 * num_threads * chunk_size)
     82 #   pragma omp parallel num_threads(num_threads)
     83       {
     84 #       pragma omp single
     85           {
     86             num_threads = omp_get_num_threads();
     87             reserved_left = new bool[num_threads];
     88             reserved_right = new bool[num_threads];
     89 
     90             if (__s.partition_chunk_share > 0.0)
     91               chunk_size = std::max<difference_type>(__s.partition_chunk_size,
     92 				    (double)n * __s.partition_chunk_share
     93 						     / (double)num_threads);
     94             else
     95               chunk_size = __s.partition_chunk_size;
     96           }
     97 
     98         while (right - left + 1 >= 2 * num_threads * chunk_size)
     99           {
    100 #           pragma omp single
    101               {
    102                 difference_type num_chunks = (right - left + 1) / chunk_size;
    103 
    104                 for (int r = 0; r < num_threads; ++r)
    105                   {
    106                     reserved_left[r] = false;
    107                     reserved_right[r] = false;
    108                   }
    109                 leftover_left = 0;
    110                 leftover_right = 0;
    111               } //implicit barrier
    112 
    113             // Private.
    114             difference_type thread_left, thread_left_border,
    115                             thread_right, thread_right_border;
    116             thread_left = left + 1;
    117 
    118             // Just to satisfy the condition below.
    119             thread_left_border = thread_left - 1;
    120             thread_right = n - 1;
    121             thread_right_border = thread_right + 1;
    122 
    123             bool iam_finished = false;
    124             while (!iam_finished)
    125               {
    126                 if (thread_left > thread_left_border)
    127                   {
    128                     omp_set_lock(&result_lock);
    129                     if (left + (chunk_size - 1) > right)
    130                       iam_finished = true;
    131                     else
    132                       {
    133                         thread_left = left;
    134                         thread_left_border = left + (chunk_size - 1);
    135                         left += chunk_size;
    136                       }
    137                     omp_unset_lock(&result_lock);
    138                   }
    139 
    140                 if (thread_right < thread_right_border)
    141                   {
    142                     omp_set_lock(&result_lock);
    143                     if (left > right - (chunk_size - 1))
    144                       iam_finished = true;
    145                     else
    146                       {
    147                         thread_right = right;
    148                         thread_right_border = right - (chunk_size - 1);
    149                         right -= chunk_size;
    150                       }
    151                     omp_unset_lock(&result_lock);
    152                   }
    153 
    154                 if (iam_finished)
    155                   break;
    156 
    157                 // Swap as usual.
    158                 while (thread_left < thread_right)
    159                   {
    160                     while (pred(begin[thread_left])
    161                             && thread_left <= thread_left_border)
    162                       ++thread_left;
    163                     while (!pred(begin[thread_right])
    164                             && thread_right >= thread_right_border)
    165                       --thread_right;
    166 
    167                     if (thread_left > thread_left_border
    168                         || thread_right < thread_right_border)
    169                       // Fetch new chunk(s).
    170                       break;
    171 
    172                     std::swap(begin[thread_left], begin[thread_right]);
    173                     ++thread_left;
    174                     --thread_right;
    175                   }
    176               }
    177 
    178             // Now swap the leftover chunks to the right places.
    179             if (thread_left <= thread_left_border)
    180 #             pragma omp atomic
    181               ++leftover_left;
    182             if (thread_right >= thread_right_border)
    183 #             pragma omp atomic
    184               ++leftover_right;
    185 
    186 #           pragma omp barrier
    187 
    188 #           pragma omp single
    189               {
    190                 leftnew = left - leftover_left * chunk_size;
    191                 rightnew = right + leftover_right * chunk_size;
    192               }
    193 
    194 #           pragma omp barrier
    195 
    196             // <=> thread_left_border + (chunk_size - 1) >= leftnew
    197             if (thread_left <= thread_left_border
    198                 && thread_left_border >= leftnew)
    199               {
    200                 // Chunk already in place, reserve spot.
    201                 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
    202                     = true;
    203               }
    204 
    205             // <=> thread_right_border - (chunk_size - 1) <= rightnew
    206             if (thread_right >= thread_right_border
    207                 && thread_right_border <= rightnew)
    208               {
    209                 // Chunk already in place, reserve spot.
    210                 reserved_right[((thread_right_border - 1) - right)
    211 			       / chunk_size] = true;
    212               }
    213 
    214 #           pragma omp barrier
    215 
    216             if (thread_left <= thread_left_border
    217                 && thread_left_border < leftnew)
    218               {
    219                 // Find spot and swap.
    220                 difference_type swapstart = -1;
    221                 omp_set_lock(&result_lock);
    222                 for (int r = 0; r < leftover_left; ++r)
    223                   if (!reserved_left[r])
    224                     {
    225                       reserved_left[r] = true;
    226                       swapstart = left - (r + 1) * chunk_size;
    227                       break;
    228                     }
    229                 omp_unset_lock(&result_lock);
    230 
    231 #if _GLIBCXX_ASSERTIONS
    232                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
    233 #endif
    234 
    235                 std::swap_ranges(begin + thread_left_border
    236 				 - (chunk_size - 1),
    237 				 begin + thread_left_border + 1,
    238 				 begin + swapstart);
    239               }
    240 
    241             if (thread_right >= thread_right_border
    242                 && thread_right_border > rightnew)
    243               {
    244                 // Find spot and swap
    245                 difference_type swapstart = -1;
    246                 omp_set_lock(&result_lock);
    247                 for (int r = 0; r < leftover_right; ++r)
    248                   if (!reserved_right[r])
    249                     {
    250                       reserved_right[r] = true;
    251                       swapstart = right + r * chunk_size + 1;
    252                       break;
    253                     }
    254                 omp_unset_lock(&result_lock);
    255 
    256 #if _GLIBCXX_ASSERTIONS
    257                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
    258 #endif
    259 
    260                 std::swap_ranges(begin + thread_right_border,
    261 				 begin + thread_right_border + chunk_size,
    262 				 begin + swapstart);
    263               }
    264 #if _GLIBCXX_ASSERTIONS
    265 #             pragma omp barrier
    266 
    267 #             pragma omp single
    268                 {
    269                   for (int r = 0; r < leftover_left; ++r)
    270                     _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
    271                   for (int r = 0; r < leftover_right; ++r)
    272                     _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
    273                 }
    274 
    275 #             pragma omp barrier
    276 #endif
    277 
    278 #             pragma omp barrier
    279 
    280               left = leftnew;
    281               right = rightnew;
    282           }
    283 #         pragma omp flush(left, right)
    284       } // end "recursion" //parallel
    285 
    286     difference_type final_left = left, final_right = right;
    287 
    288     while (final_left < final_right)
    289       {
    290         // Go right until key is geq than pivot.
    291         while (pred(begin[final_left]) && final_left < final_right)
    292           ++final_left;
    293 
    294         // Go left until key is less than pivot.
    295         while (!pred(begin[final_right]) && final_left < final_right)
    296           --final_right;
    297 
    298         if (final_left == final_right)
    299           break;
    300         std::swap(begin[final_left], begin[final_right]);
    301         ++final_left;
    302         --final_right;
    303       }
    304 
    305     // All elements on the left side are < piv, all elements on the
    306     // right are >= piv
    307     delete[] reserved_left;
    308     delete[] reserved_right;
    309 
    310     omp_destroy_lock(&result_lock);
    311 
    312     // Element "between" final_left and final_right might not have
    313     // been regarded yet
    314     if (final_left < n && !pred(begin[final_left]))
    315       // Really swapped.
    316       return final_left;
    317     else
    318       return final_left + 1;
    319   }
    320 
    321 /**
    322   *  @brief Parallel implementation of std::nth_element().
    323   *  @param begin Begin iterator of input sequence.
    324   *  @param nth Iterator of element that must be in position afterwards.
    325   *  @param end End iterator of input sequence.
    326   *  @param comp Comparator.
    327   */
    328 template<typename RandomAccessIterator, typename Comparator>
    329   void
    330   parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
    331 		       RandomAccessIterator end, Comparator comp)
    332   {
    333     typedef std::iterator_traits<RandomAccessIterator> traits_type;
    334     typedef typename traits_type::value_type value_type;
    335     typedef typename traits_type::difference_type difference_type;
    336 
    337     _GLIBCXX_CALL(end - begin)
    338 
    339     RandomAccessIterator split;
    340     random_number rng;
    341 
    342     const _Settings& __s = _Settings::get();
    343     difference_type minimum_length = std::max<difference_type>(2,
    344         std::max(__s.nth_element_minimal_n, __s.partition_minimal_n));
    345 
    346     // Break if input range to small.
    347     while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
    348       {
    349         difference_type n = end - begin;
    350 
    351         RandomAccessIterator pivot_pos = begin + rng(n);
    352 
    353         // Swap pivot_pos value to end.
    354         if (pivot_pos != (end - 1))
    355           std::swap(*pivot_pos, *(end - 1));
    356         pivot_pos = end - 1;
    357 
    358         // XXX Comparator must have first_value_type, second_value_type,
    359 	// result_type
    360         // Comparator == __gnu_parallel::lexicographic<S, int,
    361 	// __gnu_parallel::less<S, S> >
    362         // pivot_pos == std::pair<S, int>*
    363         // XXX binder2nd only for RandomAccessIterators??
    364         __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
    365 	  pred(comp, *pivot_pos);
    366 
    367         // Divide, leave pivot unchanged in last place.
    368         RandomAccessIterator split_pos1, split_pos2;
    369         split_pos1 = begin + parallel_partition(begin, end - 1, pred,
    370 						get_max_threads());
    371 
    372         // Left side: < pivot_pos; right side: >= pivot_pos
    373 
    374         // Swap pivot back to middle.
    375         if (split_pos1 != pivot_pos)
    376           std::swap(*split_pos1, *pivot_pos);
    377         pivot_pos = split_pos1;
    378 
    379         // In case all elements are equal, split_pos1 == 0
    380         if ((split_pos1 + 1 - begin) < (n >> 7)
    381 	    || (end - split_pos1) < (n >> 7))
    382           {
    383             // Very unequal split, one part smaller than one 128th
    384             // elements not strictly larger than the pivot.
    385             __gnu_parallel::unary_negate<__gnu_parallel::
    386 	      binder1st<Comparator, value_type, value_type, bool>, value_type>
    387 	      pred(__gnu_parallel::binder1st<Comparator, value_type,
    388 		   value_type, bool>(comp, *pivot_pos));
    389 
    390             // Find other end of pivot-equal range.
    391             split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
    392 						     end, pred);
    393           }
    394         else
    395           // Only skip the pivot.
    396           split_pos2 = split_pos1 + 1;
    397 
    398         // Compare iterators.
    399         if (split_pos2 <= nth)
    400           begin = split_pos2;
    401         else if (nth < split_pos1)
    402           end = split_pos1;
    403         else
    404           break;
    405       }
    406 
    407     // Only at most _Settings::partition_minimal_n elements left.
    408     __gnu_sequential::nth_element(begin, nth, end, comp);
    409   }
    410 
    411 /** @brief Parallel implementation of std::partial_sort().
    412 *  @param begin Begin iterator of input sequence.
    413 *  @param middle Sort until this position.
    414 *  @param end End iterator of input sequence.
    415 *  @param comp Comparator. */
    416 template<typename RandomAccessIterator, typename Comparator>
    417   void
    418   parallel_partial_sort(RandomAccessIterator begin,
    419 			RandomAccessIterator middle,
    420 			RandomAccessIterator end, Comparator comp)
    421   {
    422     parallel_nth_element(begin, middle, end, comp);
    423     std::sort(begin, middle, comp);
    424   }
    425 
    426 } //namespace __gnu_parallel
    427 
    428 #undef _GLIBCXX_VOLATILE
    429 
    430 #endif /* _GLIBCXX_PARALLEL_PARTITION_H */
    431