libstdc++-v3/include/parallel/multiway_mergesort.h

   1 // -*- C++ -*-
   2
   3 // Copyright (C) 2007-2024 Free Software Foundation, Inc.
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the terms
   7 // of the GNU General Public License as published by the Free Software
   8 // Foundation; either version 3, or (at your option) any later
   9 // version.
  10
  11 // This library is distributed in the hope that it will be useful, but
  12 // WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 // General Public License for more details.
  15
  16 // Under Section 7 of GPL version 3, you are granted additional
  17 // permissions described in the GCC Runtime Library Exception, version
  18 // 3.1, as published by the Free Software Foundation.
  19
  20 // You should have received a copy of the GNU General Public License and
  21 // a copy of the GCC Runtime Library Exception along with this program;
  22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 // <http://www.gnu.org/licenses/>.
  24
  25 /** @file parallel/multiway_mergesort.h
  26  *  @brief Parallel multiway merge sort.
  27  *  This file is a GNU parallel extension to the Standard C++ Library.
  28  */
  29
  30 // Written by Johannes Singler.
  31
  32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
  33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
  34
  35 #include <vector>
  36
  37 #include <parallel/basic_iterator.h>
  38 #include <bits/stl_algo.h>
  39 #include <parallel/parallel.h>
  40 #include <parallel/multiway_merge.h>
  41
  42 namespace __gnu_parallel
  43 {
  44   /** @brief Subsequence description. */
  45   template<typename _DifferenceTp>
  46     struct _Piece
  47     {
  48       typedef _DifferenceTp _DifferenceType;
  49
  50       /** @brief Begin of subsequence. */
  51       _DifferenceType _M_begin;
  52
  53       /** @brief End of subsequence. */
  54       _DifferenceType _M_end;
  55     };
  56
  57   /** @brief Data accessed by all threads.
  58    *
  59    *  PMWMS = parallel multiway mergesort */
  60   template<typename _RAIter>
  61     struct _PMWMSSortingData
  62     {
  63       typedef std::iterator_traits<_RAIter> _TraitsType;
  64       typedef typename _TraitsType::value_type _ValueType;
  65       typedef typename _TraitsType::difference_type _DifferenceType;
  66
  67       /** @brief Number of threads involved. */
  68       _ThreadIndex _M_num_threads;
  69
  70       /** @brief Input __begin. */
  71       _RAIter _M_source;
  72
  73       /** @brief Start indices, per thread. */
  74       _DifferenceType* _M_starts;
  75
  76       /** @brief Storage in which to sort. */
  77       _ValueType** _M_temporary;
  78
  79       /** @brief Samples. */
  80       _ValueType* _M_samples;
  81
  82       /** @brief Offsets to add to the found positions. */
  83       _DifferenceType* _M_offsets;
  84
  85       /** @brief Pieces of data to merge @c [thread][__sequence] */
  86       std::vector<_Piece<_DifferenceType> >* _M_pieces;
  87   };
  88
  89   /**
  90    *  @brief Select _M_samples from a sequence.
  91    *  @param __sd Pointer to algorithm data. _Result will be placed in
  92    *  @c __sd->_M_samples.
  93    *  @param __num_samples Number of _M_samples to select.
  94    */
  95   template<typename _RAIter, typename _DifferenceTp>
  96     void
  97     __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
  98                         _DifferenceTp __num_samples)
  99     {
 100       typedef std::iterator_traits<_RAIter> _TraitsType;
 101       typedef typename _TraitsType::value_type _ValueType;
 102       typedef _DifferenceTp _DifferenceType;
 103
 104       _ThreadIndex __iam = omp_get_thread_num();
 105
 106       _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
 107
 108       __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
 109                       __num_samples + 1, __es);
 110
 111       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
 112         ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
 113             _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
 114                                        + __es[__i + 1]]);
 115
 116       delete[] __es;
 117     }
 118
 119   /** @brief Split consistently. */
 120   template<bool __exact, typename _RAIter,
 121            typename _Compare, typename _SortingPlacesIterator>
 122     struct _SplitConsistently
 123     { };
 124
 125   /** @brief Split by exact splitting. */
 126   template<typename _RAIter, typename _Compare,
 127            typename _SortingPlacesIterator>
 128     struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
 129     {
 130       void
 131       operator()(const _ThreadIndex __iam,
 132                  _PMWMSSortingData<_RAIter>* __sd,
 133                  _Compare& __comp,
 134                  const typename
 135                  std::iterator_traits<_RAIter>::difference_type
 136                  __num_samples) const
 137       {
 138 #       pragma omp barrier
 139
 140         std::vector<std::pair<_SortingPlacesIterator,
 141                               _SortingPlacesIterator> >
 142           __seqs(__sd->_M_num_threads);
 143         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
 144           __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
 145                                        __sd->_M_temporary[__s]
 146                                        + (__sd->_M_starts[__s + 1]
 147                                           - __sd->_M_starts[__s]));
 148
 149         std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
 150
 151         // if not last thread
 152         if (__iam < __sd->_M_num_threads - 1)
 153           multiseq_partition(__seqs.begin(), __seqs.end(),
 154                              __sd->_M_starts[__iam + 1], __offsets.begin(),
 155                              __comp);
 156
 157         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
 158           {
 159             // for each sequence
 160             if (__iam < (__sd->_M_num_threads - 1))
 161               __sd->_M_pieces[__iam][__seq]._M_end
 162                 = __offsets[__seq] - __seqs[__seq].first;
 163             else
 164               // very end of this sequence
 165               __sd->_M_pieces[__iam][__seq]._M_end =
 166                 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
 167           }
 168
 169 #       pragma omp barrier
 170
 171         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
 172           {
 173             // For each sequence.
 174             if (__iam > 0)
 175               __sd->_M_pieces[__iam][__seq]._M_begin =
 176                 __sd->_M_pieces[__iam - 1][__seq]._M_end;
 177             else
 178               // Absolute beginning.
 179               __sd->_M_pieces[__iam][__seq]._M_begin = 0;
 180           }
 181       }
 182   };
 183
 184   /** @brief Split by sampling. */
 185   template<typename _RAIter, typename _Compare,
 186            typename _SortingPlacesIterator>
 187     struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
 188     {
 189       void
 190       operator()(const _ThreadIndex __iam,
 191                  _PMWMSSortingData<_RAIter>* __sd,
 192                  _Compare& __comp,
 193                  const typename
 194                  std::iterator_traits<_RAIter>::difference_type
 195                  __num_samples) const
 196       {
 197         typedef std::iterator_traits<_RAIter> _TraitsType;
 198         typedef typename _TraitsType::value_type _ValueType;
 199         typedef typename _TraitsType::difference_type _DifferenceType;
 200
 201         __determine_samples(__sd, __num_samples);
 202
 203 #       pragma omp barrier
 204
 205 #       pragma omp single
 206         __gnu_sequential::sort(__sd->_M_samples,
 207                                __sd->_M_samples
 208                                + (__num_samples * __sd->_M_num_threads),
 209                                __comp);
 210
 211 #       pragma omp barrier
 212
 213         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
 214           {
 215             // For each sequence.
 216             if (__num_samples * __iam > 0)
 217               __sd->_M_pieces[__iam][__s]._M_begin =
 218                 std::lower_bound(__sd->_M_temporary[__s],
 219                                  __sd->_M_temporary[__s]
 220                                  + (__sd->_M_starts[__s + 1]
 221                                     - __sd->_M_starts[__s]),
 222                                  __sd->_M_samples[__num_samples * __iam],
 223                                  __comp)
 224                 - __sd->_M_temporary[__s];
 225             else
 226               // Absolute beginning.
 227               __sd->_M_pieces[__iam][__s]._M_begin = 0;
 228
 229             if ((__num_samples * (__iam + 1)) <
 230                 (__num_samples * __sd->_M_num_threads))
 231               __sd->_M_pieces[__iam][__s]._M_end =
 232                 std::lower_bound(__sd->_M_temporary[__s],
 233                                  __sd->_M_temporary[__s]
 234                                  + (__sd->_M_starts[__s + 1]
 235                                     - __sd->_M_starts[__s]),
 236                                  __sd->_M_samples[__num_samples * (__iam + 1)],
 237                                  __comp)
 238                 - __sd->_M_temporary[__s];
 239             else
 240               // Absolute end.
 241               __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
 242                                                     - __sd->_M_starts[__s]);
 243           }
 244       }
 245   };
 246
 247   template<bool __stable, typename _RAIter, typename _Compare>
 248     struct __possibly_stable_sort
 249     { };
 250
 251   template<typename _RAIter, typename _Compare>
 252     struct __possibly_stable_sort<true, _RAIter, _Compare>
 253     {
 254       void operator()(const _RAIter& __begin,
 255                       const _RAIter& __end, _Compare& __comp) const
 256       { __gnu_sequential::stable_sort(__begin, __end, __comp); }
 257     };
 258
 259   template<typename _RAIter, typename _Compare>
 260     struct __possibly_stable_sort<false, _RAIter, _Compare>
 261     {
 262       void operator()(const _RAIter __begin,
 263                       const _RAIter __end, _Compare& __comp) const
 264       { __gnu_sequential::sort(__begin, __end, __comp); }
 265     };
 266
 267   template<bool __stable, typename _Seq_RAIter,
 268            typename _RAIter, typename _Compare,
 269            typename _DiffType>
 270     struct __possibly_stable_multiway_merge
 271     { };
 272
 273   template<typename _Seq_RAIter, typename _RAIter,
 274            typename _Compare, typename _DiffType>
 275     struct __possibly_stable_multiway_merge<true, _Seq_RAIter,
 276                                             _RAIter, _Compare, _DiffType>
 277     {
 278       void operator()(const _Seq_RAIter& __seqs_begin,
 279                       const _Seq_RAIter& __seqs_end,
 280                       const _RAIter& __target,
 281                       _Compare& __comp,
 282                       _DiffType __length_am) const
 283       { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
 284                               __length_am, __comp, sequential_tag()); }
 285     };
 286
 287   template<typename _Seq_RAIter, typename _RAIter,
 288            typename _Compare, typename _DiffType>
 289     struct __possibly_stable_multiway_merge<false, _Seq_RAIter,
 290                                             _RAIter, _Compare, _DiffType>
 291     {
 292       void operator()(const _Seq_RAIter& __seqs_begin,
 293                       const _Seq_RAIter& __seqs_end,
 294                       const _RAIter& __target,
 295                       _Compare& __comp,
 296                       _DiffType __length_am) const
 297       { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
 298                        __comp, sequential_tag()); }
 299     };
 300
 301   /** @brief PMWMS code executed by each thread.
 302    *  @param __sd Pointer to algorithm data.
 303    *  @param __comp Comparator.
 304    */
 305   template<bool __stable, bool __exact, typename _RAIter,
 306            typename _Compare>
 307     void
 308     parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
 309                           _Compare& __comp)
 310     {
 311       typedef std::iterator_traits<_RAIter> _TraitsType;
 312       typedef typename _TraitsType::value_type _ValueType;
 313       typedef typename _TraitsType::difference_type _DifferenceType;
 314
 315       _ThreadIndex __iam = omp_get_thread_num();
 316
 317       // Length of this thread's chunk, before merging.
 318       _DifferenceType __length_local =
 319         __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
 320
 321       // Sort in temporary storage, leave space for sentinel.
 322
 323       typedef _ValueType* _SortingPlacesIterator;
 324
 325       __sd->_M_temporary[__iam] =
 326         static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
 327                                                 * (__length_local + 1)));
 328
 329       // Copy there.
 330       std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
 331                               __sd->_M_source + __sd->_M_starts[__iam]
 332                               + __length_local,
 333                               __sd->_M_temporary[__iam]);
 334
 335       __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
 336         (__sd->_M_temporary[__iam],
 337          __sd->_M_temporary[__iam] + __length_local,
 338          __comp);
 339
 340       // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
 341       // __sd->_M_temporary[__iam] + __length_local.
 342
 343       // No barrier here: Synchronization is done by the splitting routine.
 344
 345       _DifferenceType __num_samples =
 346         _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
 347       _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
 348         (__iam, __sd, __comp, __num_samples);
 349
 350       // Offset from __target __begin, __length after merging.
 351       _DifferenceType __offset = 0, __length_am = 0;
 352       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
 353         {
 354           __length_am += (__sd->_M_pieces[__iam][__s]._M_end
 355                           - __sd->_M_pieces[__iam][__s]._M_begin);
 356           __offset += __sd->_M_pieces[__iam][__s]._M_begin;
 357         }
 358
 359       typedef std::vector<
 360         std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
 361         _SeqVector;
 362       _SeqVector __seqs(__sd->_M_num_threads);
 363
 364       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
 365         {
 366           __seqs[__s] =
 367             std::make_pair(__sd->_M_temporary[__s]
 368                            + __sd->_M_pieces[__iam][__s]._M_begin,
 369                            __sd->_M_temporary[__s]
 370                            + __sd->_M_pieces[__iam][__s]._M_end);
 371         }
 372
 373       __possibly_stable_multiway_merge<
 374         __stable, typename _SeqVector::iterator,
 375         _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
 376                                      __sd->_M_source + __offset, __comp,
 377                                      __length_am);
 378
 379 #     pragma omp barrier
 380
 381       for (_DifferenceType __i = 0; __i < __length_local; ++__i)
 382         __sd->_M_temporary[__iam][__i].~_ValueType();
 383       ::operator delete(__sd->_M_temporary[__iam]);
 384     }
 385
 386   /** @brief PMWMS main call.
 387    *  @param __begin Begin iterator of sequence.
 388    *  @param __end End iterator of sequence.
 389    *  @param __comp Comparator.
 390    *  @param __num_threads Number of threads to use.
 391    */
 392   template<bool __stable, bool __exact, typename _RAIter,
 393            typename _Compare>
 394     void
 395     parallel_sort_mwms(_RAIter __begin, _RAIter __end,
 396                        _Compare __comp,
 397                        _ThreadIndex __num_threads)
 398     {
 399       _GLIBCXX_CALL(__end - __begin)
 400
 401       typedef std::iterator_traits<_RAIter> _TraitsType;
 402       typedef typename _TraitsType::value_type _ValueType;
 403       typedef typename _TraitsType::difference_type _DifferenceType;
 404
 405       _DifferenceType __n = __end - __begin;
 406
 407       if (__n <= 1)
 408         return;
 409
 410       // at least one element per thread
 411       if (__num_threads > __n)
 412         __num_threads = static_cast<_ThreadIndex>(__n);
 413
 414       // shared variables
 415       _PMWMSSortingData<_RAIter> __sd;
 416       _DifferenceType* __starts;
 417       _DifferenceType __size;
 418
 419 #     pragma omp parallel num_threads(__num_threads)
 420       {
 421         __num_threads = omp_get_num_threads(); //no more threads than requested
 422
 423 #       pragma omp single
 424         {
 425           __sd._M_num_threads = __num_threads;
 426           __sd._M_source = __begin;
 427
 428           __sd._M_temporary = new _ValueType*[__num_threads];
 429
 430           if (!__exact)
 431             {
 432               __size =
 433                 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
 434                 * __num_threads;
 435               __sd._M_samples = static_cast<_ValueType*>
 436                 (::operator new(__size * sizeof(_ValueType)));
 437             }
 438           else
 439             __sd._M_samples = 0;
 440
 441           __sd._M_offsets = new _DifferenceType[__num_threads - 1];
 442           __sd._M_pieces
 443             = new std::vector<_Piece<_DifferenceType> >[__num_threads];
 444           for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
 445             __sd._M_pieces[__s].resize(__num_threads);
 446           __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
 447
 448           _DifferenceType __chunk_length = __n / __num_threads;
 449           _DifferenceType __split = __n % __num_threads;
 450           _DifferenceType __pos = 0;
 451           for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
 452             {
 453               __starts[__i] = __pos;
 454               __pos += ((__i < __split)
 455                         ? (__chunk_length + 1) : __chunk_length);
 456             }
 457           __starts[__num_threads] = __pos;
 458         } //single
 459
 460         // Now sort in parallel.
 461         parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
 462       } //parallel
 463
 464       delete[] __starts;
 465       delete[] __sd._M_temporary;
 466
 467       if (!__exact)
 468         {
 469           for (_DifferenceType __i = 0; __i < __size; ++__i)
 470             __sd._M_samples[__i].~_ValueType();
 471           ::operator delete(__sd._M_samples);
 472         }
 473
 474       delete[] __sd._M_offsets;
 475       delete[] __sd._M_pieces;
 476     }
 477
 478 } //namespace __gnu_parallel
 479
 480 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */