Home | History | Annotate | Download | only in parallel
      1 // -*- C++ -*-
      2 
      3 // Copyright (C) 2007-2013 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the terms
      7 // of the GNU General Public License as published by the Free Software
      8 // Foundation; either version 3, or (at your option) any later
      9 // version.
     10 
     11 // This library is distributed in the hope that it will be useful, but
     12 // WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 // General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /** @file parallel/multiway_mergesort.h
     26  *  @brief Parallel multiway merge sort.
     27  *  This file is a GNU parallel extension to the Standard C++ Library.
     28  */
     29 
     30 // Written by Johannes Singler.
     31 
     32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
     33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
     34 
     35 #include <vector>
     36 
     37 #include <parallel/basic_iterator.h>
     38 #include <bits/stl_algo.h>
     39 #include <parallel/parallel.h>
     40 #include <parallel/multiway_merge.h>
     41 
     42 namespace __gnu_parallel
     43 {
     44   /** @brief Subsequence description. */
     45   template<typename _DifferenceTp>
     46     struct _Piece
     47     {
     48       typedef _DifferenceTp _DifferenceType;
     49 
     50       /** @brief Begin of subsequence. */
     51       _DifferenceType _M_begin;
     52 
     53       /** @brief End of subsequence. */
     54       _DifferenceType _M_end;
     55     };
     56 
     57   /** @brief Data accessed by all threads.
     58    *
     59    *  PMWMS = parallel multiway mergesort */
     60   template<typename _RAIter>
     61     struct _PMWMSSortingData
     62     {
     63       typedef std::iterator_traits<_RAIter> _TraitsType;
     64       typedef typename _TraitsType::value_type _ValueType;
     65       typedef typename _TraitsType::difference_type _DifferenceType;
     66 
     67       /** @brief Number of threads involved. */
     68       _ThreadIndex _M_num_threads;
     69 
     70       /** @brief Input __begin. */
     71       _RAIter _M_source;
     72 
     73       /** @brief Start indices, per thread. */
     74       _DifferenceType* _M_starts;
     75 
     76       /** @brief Storage in which to sort. */
     77       _ValueType** _M_temporary;
     78 
     79       /** @brief Samples. */
     80       _ValueType* _M_samples;
     81 
     82       /** @brief Offsets to add to the found positions. */
     83       _DifferenceType* _M_offsets;
     84 
     85       /** @brief Pieces of data to merge @c [thread][__sequence] */
     86       std::vector<_Piece<_DifferenceType> >* _M_pieces;
     87   };
     88 
     89   /**
     90    *  @brief Select _M_samples from a sequence.
     91    *  @param __sd Pointer to algorithm data. _Result will be placed in
     92    *  @c __sd->_M_samples.
     93    *  @param __num_samples Number of _M_samples to select.
     94    */
     95   template<typename _RAIter, typename _DifferenceTp>
     96     void
     97     __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
     98 			_DifferenceTp __num_samples)
     99     {
    100       typedef std::iterator_traits<_RAIter> _TraitsType;
    101       typedef typename _TraitsType::value_type _ValueType;
    102       typedef _DifferenceTp _DifferenceType;
    103 
    104       _ThreadIndex __iam = omp_get_thread_num();
    105 
    106       _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
    107 
    108       __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
    109 		      __num_samples + 1, __es);
    110 
    111       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
    112 	::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
    113 	    _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
    114 				       + __es[__i + 1]]);
    115 
    116       delete[] __es;
    117     }
    118 
    119   /** @brief Split consistently. */
    120   template<bool __exact, typename _RAIter,
    121 	   typename _Compare, typename _SortingPlacesIterator>
    122     struct _SplitConsistently
    123     { };
    124 
    125   /** @brief Split by exact splitting. */
    126   template<typename _RAIter, typename _Compare,
    127 	   typename _SortingPlacesIterator>
    128     struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
    129     {
    130       void
    131       operator()(const _ThreadIndex __iam,
    132 		 _PMWMSSortingData<_RAIter>* __sd,
    133 		 _Compare& __comp,
    134 		 const typename
    135 		 std::iterator_traits<_RAIter>::difference_type
    136 		 __num_samples) const
    137       {
    138 #       pragma omp barrier
    139 
    140 	std::vector<std::pair<_SortingPlacesIterator,
    141 	                      _SortingPlacesIterator> >
    142 	  __seqs(__sd->_M_num_threads);
    143 	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
    144 	  __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
    145 				       __sd->_M_temporary[__s]
    146 				       + (__sd->_M_starts[__s + 1]
    147 					  - __sd->_M_starts[__s]));
    148 
    149 	std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
    150 
    151 	// if not last thread
    152 	if (__iam < __sd->_M_num_threads - 1)
    153 	  multiseq_partition(__seqs.begin(), __seqs.end(),
    154 			     __sd->_M_starts[__iam + 1], __offsets.begin(),
    155 			     __comp);
    156 
    157 	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
    158 	  {
    159 	    // for each sequence
    160 	    if (__iam < (__sd->_M_num_threads - 1))
    161 	      __sd->_M_pieces[__iam][__seq]._M_end
    162 		= __offsets[__seq] - __seqs[__seq].first;
    163 	    else
    164 	      // very end of this sequence
    165 	      __sd->_M_pieces[__iam][__seq]._M_end =
    166 		__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
    167 	  }
    168 
    169 #       pragma omp barrier
    170 
    171 	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
    172 	  {
    173 	    // For each sequence.
    174 	    if (__iam > 0)
    175 	      __sd->_M_pieces[__iam][__seq]._M_begin =
    176 		__sd->_M_pieces[__iam - 1][__seq]._M_end;
    177 	    else
    178 	      // Absolute beginning.
    179 	      __sd->_M_pieces[__iam][__seq]._M_begin = 0;
    180 	  }
    181       }
    182   };
    183 
    184   /** @brief Split by sampling. */
    185   template<typename _RAIter, typename _Compare,
    186 	   typename _SortingPlacesIterator>
    187     struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
    188     {
    189       void
    190       operator()(const _ThreadIndex __iam,
    191 		 _PMWMSSortingData<_RAIter>* __sd,
    192 		 _Compare& __comp,
    193 		 const typename
    194 		 std::iterator_traits<_RAIter>::difference_type
    195 		 __num_samples) const
    196       {
    197 	typedef std::iterator_traits<_RAIter> _TraitsType;
    198 	typedef typename _TraitsType::value_type _ValueType;
    199 	typedef typename _TraitsType::difference_type _DifferenceType;
    200 
    201 	__determine_samples(__sd, __num_samples);
    202 
    203 #       pragma omp barrier
    204 
    205 #       pragma omp single
    206 	__gnu_sequential::sort(__sd->_M_samples,
    207 			       __sd->_M_samples
    208 			       + (__num_samples * __sd->_M_num_threads),
    209 			       __comp);
    210 
    211 #       pragma omp barrier
    212 
    213 	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
    214 	  {
    215 	    // For each sequence.
    216 	    if (__num_samples * __iam > 0)
    217 	      __sd->_M_pieces[__iam][__s]._M_begin =
    218                 std::lower_bound(__sd->_M_temporary[__s],
    219 				 __sd->_M_temporary[__s]
    220 				 + (__sd->_M_starts[__s + 1]
    221 				    - __sd->_M_starts[__s]),
    222 				 __sd->_M_samples[__num_samples * __iam],
    223 				 __comp)
    224                 - __sd->_M_temporary[__s];
    225 	    else
    226 	      // Absolute beginning.
    227 	      __sd->_M_pieces[__iam][__s]._M_begin = 0;
    228 
    229 	    if ((__num_samples * (__iam + 1)) <
    230 		(__num_samples * __sd->_M_num_threads))
    231 	      __sd->_M_pieces[__iam][__s]._M_end =
    232                 std::lower_bound(__sd->_M_temporary[__s],
    233 				 __sd->_M_temporary[__s]
    234 				 + (__sd->_M_starts[__s + 1]
    235 				    - __sd->_M_starts[__s]),
    236 				 __sd->_M_samples[__num_samples * (__iam + 1)],
    237 				 __comp)
    238                 - __sd->_M_temporary[__s];
    239 	    else
    240 	      // Absolute end.
    241 	      __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
    242 						    - __sd->_M_starts[__s]);
    243 	  }
    244       }
    245   };
    246 
    247   template<bool __stable, typename _RAIter, typename _Compare>
    248     struct __possibly_stable_sort
    249     { };
    250 
    251   template<typename _RAIter, typename _Compare>
    252     struct __possibly_stable_sort<true, _RAIter, _Compare>
    253     {
    254       void operator()(const _RAIter& __begin,
    255 		      const _RAIter& __end, _Compare& __comp) const
    256       { __gnu_sequential::stable_sort(__begin, __end, __comp); }
    257     };
    258 
    259   template<typename _RAIter, typename _Compare>
    260     struct __possibly_stable_sort<false, _RAIter, _Compare>
    261     {
    262       void operator()(const _RAIter __begin,
    263 		      const _RAIter __end, _Compare& __comp) const
    264       { __gnu_sequential::sort(__begin, __end, __comp); }
    265     };
    266 
    267   template<bool __stable, typename Seq_RAIter,
    268 	   typename _RAIter, typename _Compare,
    269 	   typename DiffType>
    270     struct __possibly_stable_multiway_merge
    271     { };
    272 
    273   template<typename Seq_RAIter, typename _RAIter,
    274 	   typename _Compare, typename _DiffType>
    275     struct __possibly_stable_multiway_merge<true, Seq_RAIter,
    276 					    _RAIter, _Compare, _DiffType>
    277     {
    278       void operator()(const Seq_RAIter& __seqs_begin,
    279 		      const Seq_RAIter& __seqs_end,
    280 		      const _RAIter& __target,
    281 		      _Compare& __comp,
    282 		      _DiffType __length_am) const
    283       { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
    284 			      __length_am, __comp, sequential_tag()); }
    285     };
    286 
    287   template<typename Seq_RAIter, typename _RAIter,
    288 	   typename _Compare, typename _DiffType>
    289     struct __possibly_stable_multiway_merge<false, Seq_RAIter,
    290 					    _RAIter, _Compare, _DiffType>
    291     {
    292       void operator()(const Seq_RAIter& __seqs_begin,
    293                       const Seq_RAIter& __seqs_end,
    294                       const _RAIter& __target,
    295                       _Compare& __comp,
    296                       _DiffType __length_am) const
    297       { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
    298 		       __comp, sequential_tag()); }
    299     };
    300 
    301   /** @brief PMWMS code executed by each thread.
    302    *  @param __sd Pointer to algorithm data.
    303    *  @param __comp Comparator.
    304    */
    305   template<bool __stable, bool __exact, typename _RAIter,
    306 	   typename _Compare>
    307     void
    308     parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
    309 			  _Compare& __comp)
    310     {
    311       typedef std::iterator_traits<_RAIter> _TraitsType;
    312       typedef typename _TraitsType::value_type _ValueType;
    313       typedef typename _TraitsType::difference_type _DifferenceType;
    314 
    315       _ThreadIndex __iam = omp_get_thread_num();
    316 
    317       // Length of this thread's chunk, before merging.
    318       _DifferenceType __length_local =
    319 	__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
    320 
    321       // Sort in temporary storage, leave space for sentinel.
    322 
    323       typedef _ValueType* _SortingPlacesIterator;
    324 
    325       __sd->_M_temporary[__iam] =
    326         static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
    327 						* (__length_local + 1)));
    328 
    329       // Copy there.
    330       std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
    331 			      __sd->_M_source + __sd->_M_starts[__iam]
    332 			      + __length_local,
    333 			      __sd->_M_temporary[__iam]);
    334 
    335       __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
    336         (__sd->_M_temporary[__iam],
    337 	 __sd->_M_temporary[__iam] + __length_local,
    338          __comp);
    339 
    340       // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
    341       // __sd->_M_temporary[__iam] + __length_local.
    342 
    343       // No barrier here: Synchronization is done by the splitting routine.
    344 
    345       _DifferenceType __num_samples =
    346         _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
    347       _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
    348         (__iam, __sd, __comp, __num_samples);
    349 
    350       // Offset from __target __begin, __length after merging.
    351       _DifferenceType __offset = 0, __length_am = 0;
    352       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
    353 	{
    354 	  __length_am += (__sd->_M_pieces[__iam][__s]._M_end
    355 			  - __sd->_M_pieces[__iam][__s]._M_begin);
    356 	  __offset += __sd->_M_pieces[__iam][__s]._M_begin;
    357 	}
    358 
    359       typedef std::vector<
    360         std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
    361         _SeqVector;
    362       _SeqVector __seqs(__sd->_M_num_threads);
    363 
    364       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
    365 	{
    366 	  __seqs[__s] =
    367 	    std::make_pair(__sd->_M_temporary[__s]
    368 			   + __sd->_M_pieces[__iam][__s]._M_begin,
    369 			   __sd->_M_temporary[__s]
    370 			   + __sd->_M_pieces[__iam][__s]._M_end);
    371 	}
    372 
    373       __possibly_stable_multiway_merge<
    374         __stable, typename _SeqVector::iterator,
    375 	_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
    376 				     __sd->_M_source + __offset, __comp,
    377 				     __length_am);
    378 
    379 #     pragma omp barrier
    380 
    381       for (_DifferenceType __i = 0; __i < __length_local; ++__i)
    382 	__sd->_M_temporary[__iam][__i].~_ValueType();
    383       ::operator delete(__sd->_M_temporary[__iam]);
    384     }
    385 
    386   /** @brief PMWMS main call.
    387    *  @param __begin Begin iterator of sequence.
    388    *  @param __end End iterator of sequence.
    389    *  @param __comp Comparator.
    390    *  @param __num_threads Number of threads to use.
    391    */
    392   template<bool __stable, bool __exact, typename _RAIter,
    393            typename _Compare>
    394     void
    395     parallel_sort_mwms(_RAIter __begin, _RAIter __end,
    396 		       _Compare __comp,
    397 		       _ThreadIndex __num_threads)
    398     {
    399       _GLIBCXX_CALL(__end - __begin)
    400 
    401       typedef std::iterator_traits<_RAIter> _TraitsType;
    402       typedef typename _TraitsType::value_type _ValueType;
    403       typedef typename _TraitsType::difference_type _DifferenceType;
    404 
    405       _DifferenceType __n = __end - __begin;
    406 
    407       if (__n <= 1)
    408 	return;
    409 
    410       // at least one element per thread
    411       if (__num_threads > __n)
    412 	__num_threads = static_cast<_ThreadIndex>(__n);
    413 
    414       // shared variables
    415       _PMWMSSortingData<_RAIter> __sd;
    416       _DifferenceType* __starts;
    417       _DifferenceType __size;
    418 
    419 #     pragma omp parallel num_threads(__num_threads)
    420       {
    421         __num_threads = omp_get_num_threads(); //no more threads than requested
    422 
    423 #       pragma omp single
    424 	{
    425 	  __sd._M_num_threads = __num_threads;
    426 	  __sd._M_source = __begin;
    427 
    428 	  __sd._M_temporary = new _ValueType*[__num_threads];
    429 
    430 	  if (!__exact)
    431 	    {
    432 	      __size =
    433 		(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
    434 		* __num_threads;
    435 	      __sd._M_samples = static_cast<_ValueType*>
    436 		(::operator new(__size * sizeof(_ValueType)));
    437 	    }
    438 	  else
    439 	    __sd._M_samples = 0;
    440 
    441 	  __sd._M_offsets = new _DifferenceType[__num_threads - 1];
    442 	  __sd._M_pieces
    443 	    = new std::vector<_Piece<_DifferenceType> >[__num_threads];
    444 	  for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
    445 	    __sd._M_pieces[__s].resize(__num_threads);
    446 	  __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
    447 
    448 	  _DifferenceType __chunk_length = __n / __num_threads;
    449 	  _DifferenceType __split = __n % __num_threads;
    450 	  _DifferenceType __pos = 0;
    451 	  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
    452 	    {
    453 	      __starts[__i] = __pos;
    454 	      __pos += ((__i < __split)
    455 			? (__chunk_length + 1) : __chunk_length);
    456 	    }
    457 	  __starts[__num_threads] = __pos;
    458 	} //single
    459 
    460         // Now sort in parallel.
    461         parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
    462       } //parallel
    463 
    464       delete[] __starts;
    465       delete[] __sd._M_temporary;
    466 
    467       if (!__exact)
    468 	{
    469 	  for (_DifferenceType __i = 0; __i < __size; ++__i)
    470 	    __sd._M_samples[__i].~_ValueType();
    471 	  ::operator delete(__sd._M_samples);
    472 	}
    473 
    474       delete[] __sd._M_offsets;
    475       delete[] __sd._M_pieces;
    476     }
    477 
    478 } //namespace __gnu_parallel
    479 
    480 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */
    481