1 // -*- C++ -*- 2 3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the terms 7 // of the GNU General Public License as published by the Free Software 8 // Foundation; either version 3, or (at your option) any later 9 // version. 10 11 // This library is distributed in the hope that it will be useful, but 12 // WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 // General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** @file parallel/partial_sum.h 26 * @brief Parallel implementation of std::partial_sum(), i. e. prefix 27 * sums. 28 * This file is a GNU parallel extension to the Standard C++ Library. 29 */ 30 31 // Written by Johannes Singler. 32 33 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H 34 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 35 36 #include <omp.h> 37 #include <new> 38 #include <bits/stl_algobase.h> 39 #include <parallel/parallel.h> 40 #include <parallel/numericfwd.h> 41 42 namespace __gnu_parallel 43 { 44 // Problem: there is no 0-element given. 45 46 /** @brief Base case prefix sum routine. 47 * @param begin Begin iterator of input sequence. 48 * @param end End iterator of input sequence. 49 * @param result Begin iterator of output sequence. 50 * @param bin_op Associative binary function. 51 * @param value Start value. Must be passed since the neutral 52 * element is unknown in general. 53 * @return End iterator of output sequence. */ 54 template<typename InputIterator, 55 typename OutputIterator, 56 typename BinaryOperation> 57 OutputIterator 58 parallel_partial_sum_basecase(InputIterator begin, InputIterator end, 59 OutputIterator result, BinaryOperation bin_op, 60 typename std::iterator_traits 61 <InputIterator>::value_type value) 62 { 63 if (begin == end) 64 return result; 65 66 while (begin != end) 67 { 68 value = bin_op(value, *begin); 69 *result = value; 70 ++result; 71 ++begin; 72 } 73 return result; 74 } 75 76 /** @brief Parallel partial sum implementation, two-phase approach, 77 no recursion. 78 * @param begin Begin iterator of input sequence. 79 * @param end End iterator of input sequence. 80 * @param result Begin iterator of output sequence. 81 * @param bin_op Associative binary function. 82 * @param n Length of sequence. 83 * @param num_threads Number of threads to use. 84 * @return End iterator of output sequence. 85 */ 86 template<typename InputIterator, 87 typename OutputIterator, 88 typename BinaryOperation> 89 OutputIterator 90 parallel_partial_sum_linear(InputIterator begin, InputIterator end, 91 OutputIterator result, BinaryOperation bin_op, 92 typename std::iterator_traits 93 <InputIterator>::difference_type n) 94 { 95 typedef std::iterator_traits<InputIterator> traits_type; 96 typedef typename traits_type::value_type value_type; 97 typedef typename traits_type::difference_type difference_type; 98 99 if (begin == end) 100 return result; 101 102 thread_index_t num_threads = 103 std::min<difference_type>(get_max_threads(), n - 1); 104 105 if (num_threads < 2) 106 { 107 *result = *begin; 108 return parallel_partial_sum_basecase( 109 begin + 1, end, result + 1, bin_op, *begin); 110 } 111 112 difference_type* borders; 113 value_type* sums; 114 115 const _Settings& __s = _Settings::get(); 116 117 # pragma omp parallel num_threads(num_threads) 118 { 119 # pragma omp single 120 { 121 num_threads = omp_get_num_threads(); 122 123 borders = new difference_type[num_threads + 2]; 124 125 if (__s.partial_sum_dilation == 1.0f) 126 equally_split(n, num_threads + 1, borders); 127 else 128 { 129 difference_type chunk_length = 130 ((double)n 131 / ((double)num_threads + __s.partial_sum_dilation)), 132 borderstart = n - num_threads * chunk_length; 133 borders[0] = 0; 134 for (int i = 1; i < (num_threads + 1); ++i) 135 { 136 borders[i] = borderstart; 137 borderstart += chunk_length; 138 } 139 borders[num_threads + 1] = n; 140 } 141 142 sums = static_cast<value_type*>(::operator new(sizeof(value_type) 143 * num_threads)); 144 OutputIterator target_end; 145 } //single 146 147 thread_index_t iam = omp_get_thread_num(); 148 if (iam == 0) 149 { 150 *result = *begin; 151 parallel_partial_sum_basecase(begin + 1, begin + borders[1], 152 result + 1, bin_op, *begin); 153 ::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); 154 } 155 else 156 { 157 ::new(&(sums[iam])) 158 value_type(std::accumulate(begin + borders[iam] + 1, 159 begin + borders[iam + 1], 160 *(begin + borders[iam]), 161 bin_op, 162 __gnu_parallel::sequential_tag())); 163 } 164 165 # pragma omp barrier 166 167 # pragma omp single 168 parallel_partial_sum_basecase( 169 sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]); 170 171 # pragma omp barrier 172 173 // Still same team. 174 parallel_partial_sum_basecase(begin + borders[iam + 1], 175 begin + borders[iam + 2], 176 result + borders[iam + 1], bin_op, 177 sums[iam]); 178 } //parallel 179 180 ::operator delete(sums); 181 delete[] borders; 182 183 return result + n; 184 } 185 186 /** @brief Parallel partial sum front-end. 187 * @param begin Begin iterator of input sequence. 188 * @param end End iterator of input sequence. 189 * @param result Begin iterator of output sequence. 190 * @param bin_op Associative binary function. 191 * @return End iterator of output sequence. */ 192 template<typename InputIterator, 193 typename OutputIterator, 194 typename BinaryOperation> 195 OutputIterator 196 parallel_partial_sum(InputIterator begin, InputIterator end, 197 OutputIterator result, BinaryOperation bin_op) 198 { 199 _GLIBCXX_CALL(begin - end) 200 201 typedef std::iterator_traits<InputIterator> traits_type; 202 typedef typename traits_type::value_type value_type; 203 typedef typename traits_type::difference_type difference_type; 204 205 difference_type n = end - begin; 206 207 switch (_Settings::get().partial_sum_algorithm) 208 { 209 case LINEAR: 210 // Need an initial offset. 211 return parallel_partial_sum_linear(begin, end, result, bin_op, n); 212 default: 213 // Partial_sum algorithm not implemented. 214 _GLIBCXX_PARALLEL_ASSERT(0); 215 return result + n; 216 } 217 } 218 } 219 220 #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ 221