3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
31 /** @file parallel/multiseq_selection.h
32 * @brief Functions to find elements of a certain global rank in
33 * multiple sorted sequences. Also serves for splitting such
36 * The algorithm description can be found in
38 * P. J. Varman, S. D. Scheufler, B. R. Iyer, and G. R. Ricard.
39 * Merging Multiple Lists on Hierarchical-Memory Multiprocessors.
40 * Journal of Parallel and Distributed Computing, 12(2):171–177, 1991.
42 * This file is a GNU parallel extension to the Standard C++ Library.
45 // Written by Johannes Singler.
47 #ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H
48 #define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1
53 #include <bits/stl_algo.h>
55 #include <parallel/sort.h>
57 namespace __gnu_parallel
59 /** @brief Compare a pair of types lexicographically, ascending. */
60 template<typename T1
, typename T2
, typename Comparator
>
62 : public std::binary_function
<std::pair
<T1
, T2
>, std::pair
<T1
, T2
>, bool>
68 lexicographic(Comparator
& _comp
) : comp(_comp
) { }
71 operator()(const std::pair
<T1
, T2
>& p1
,
72 const std::pair
<T1
, T2
>& p2
) const
74 if (comp(p1
.first
, p2
.first
))
77 if (comp(p2
.first
, p1
.first
))
81 return p1
.second
< p2
.second
;
85 /** @brief Compare a pair of types lexicographically, descending. */
86 template<typename T1
, typename T2
, typename Comparator
>
87 class lexicographic_reverse
: public std::binary_function
<T1
, T2
, bool>
93 lexicographic_reverse(Comparator
& _comp
) : comp(_comp
) { }
96 operator()(const std::pair
<T1
, T2
>& p1
,
97 const std::pair
<T1
, T2
>& p2
) const
99 if (comp(p2
.first
, p1
.first
))
102 if (comp(p1
.first
, p2
.first
))
106 return p2
.second
< p1
.second
;
111 * @brief Splits several sorted sequences at a certain global rank,
112 * resulting in a splitting point for each sequence.
113 * The sequences are passed via a sequence of random-access
114 * iterator pairs, none of the sequences may be empty. If there
115 * are several equal elements across the split, the ones on the
116 * left side will be chosen from sequences with smaller number.
117 * @param begin_seqs Begin of the sequence of iterator pairs.
118 * @param end_seqs End of the sequence of iterator pairs.
119 * @param rank The global rank to partition at.
120 * @param begin_offsets A random-access sequence begin where the
121 * result will be stored in. Each element of the sequence is an
122 * iterator that points to the first element on the greater part of
123 * the respective sequence.
124 * @param comp The ordering functor, defaults to std::less<T>.
126 template<typename RanSeqs
, typename RankType
, typename RankIterator
,
129 multiseq_partition(RanSeqs begin_seqs
, RanSeqs end_seqs
,
131 RankIterator begin_offsets
,
132 Comparator comp
= std::less
<
133 typename
std::iterator_traits
<typename
134 std::iterator_traits
<RanSeqs
>::value_type::
135 first_type
>::value_type
>()) // std::less<T>
137 _GLIBCXX_CALL(end_seqs
- begin_seqs
)
139 typedef typename
std::iterator_traits
<RanSeqs
>::value_type::first_type
141 typedef typename
std::iterator_traits
<It
>::difference_type
143 typedef typename
std::iterator_traits
<It
>::value_type value_type
;
145 lexicographic
<value_type
, int, Comparator
> lcomp(comp
);
146 lexicographic_reverse
<value_type
, int, Comparator
> lrcomp(comp
);
148 // Number of sequences, number of elements in total (possibly
149 // including padding).
150 difference_type m
= std::distance(begin_seqs
, end_seqs
), N
= 0,
153 for (int i
= 0; i
< m
; i
++)
155 N
+= std::distance(begin_seqs
[i
].first
, begin_seqs
[i
].second
);
156 _GLIBCXX_PARALLEL_ASSERT(
157 std::distance(begin_seqs
[i
].first
, begin_seqs
[i
].second
) > 0);
162 for (int i
= 0; i
< m
; i
++)
163 begin_offsets
[i
] = begin_seqs
[i
].second
; // Very end.
168 _GLIBCXX_PARALLEL_ASSERT(m
!= 0);
169 _GLIBCXX_PARALLEL_ASSERT(N
!= 0);
170 _GLIBCXX_PARALLEL_ASSERT(rank
>= 0);
171 _GLIBCXX_PARALLEL_ASSERT(rank
< N
);
173 difference_type
* ns
= new difference_type
[m
];
174 difference_type
* a
= new difference_type
[m
];
175 difference_type
* b
= new difference_type
[m
];
178 ns
[0] = std::distance(begin_seqs
[0].first
, begin_seqs
[0].second
);
180 for (int i
= 0; i
< m
; i
++)
182 ns
[i
] = std::distance(begin_seqs
[i
].first
, begin_seqs
[i
].second
);
183 nmax
= std::max(nmax
, ns
[i
]);
188 // Pad all lists to this length, at least as long as any ns[i],
189 // equality iff nmax = 2^k - 1.
192 // From now on, including padding.
195 for (int i
= 0; i
< m
; i
++)
203 // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
205 #define S(i) (begin_seqs[i].first)
207 // Initial partition.
208 std::vector
<std::pair
<value_type
, int> > sample
;
210 for (int i
= 0; i
< m
; i
++)
211 if (n
< ns
[i
]) //sequence long enough
212 sample
.push_back(std::make_pair(S(i
)[n
], i
));
213 __gnu_sequential::sort(sample
.begin(), sample
.end(), lcomp
);
215 for (int i
= 0; i
< m
; i
++) //conceptual infinity
216 if (n
>= ns
[i
]) //sequence too short, conceptual infinity
217 sample
.push_back(std::make_pair(S(i
)[0] /*dummy element*/, i
));
219 difference_type localrank
= rank
* m
/ N
;
222 for (j
= 0; j
< localrank
&& ((n
+ 1) <= ns
[sample
[j
].second
]); ++j
)
223 a
[sample
[j
].second
] += n
+ 1;
225 b
[sample
[j
].second
] -= n
+ 1;
227 // Further refinement.
232 int lmax_seq
= -1; // to avoid warning
233 const value_type
* lmax
= NULL
; // impossible to avoid the warning?
234 for (int i
= 0; i
< m
; i
++)
240 lmax
= &(S(i
)[a
[i
] - 1]);
245 // Max, favor rear sequences.
246 if (!comp(S(i
)[a
[i
] - 1], *lmax
))
248 lmax
= &(S(i
)[a
[i
] - 1]);
256 for (i
= 0; i
< m
; i
++)
258 difference_type middle
= (b
[i
] + a
[i
]) / 2;
259 if (lmax
&& middle
< ns
[i
] &&
260 lcomp(std::make_pair(S(i
)[middle
], i
),
261 std::make_pair(*lmax
, lmax_seq
)))
262 a
[i
] = std::min(a
[i
] + n
+ 1, ns
[i
]);
267 difference_type leftsize
= 0, total
= 0;
268 for (int i
= 0; i
< m
; i
++)
270 leftsize
+= a
[i
] / (n
+ 1);
271 total
+= l
/ (n
+ 1);
274 difference_type skew
= static_cast<difference_type
>
275 (static_cast<uint64
>(total
) * rank
/ N
- leftsize
);
279 // Move to the left, find smallest.
280 std::priority_queue
<std::pair
<value_type
, int>,
281 std::vector
<std::pair
<value_type
, int> >,
282 lexicographic_reverse
<value_type
, int, Comparator
> >
285 for (int i
= 0; i
< m
; i
++)
287 pq
.push(std::make_pair(S(i
)[b
[i
]], i
));
289 for (; skew
!= 0 && !pq
.empty(); --skew
)
291 int source
= pq
.top().second
;
294 a
[source
] = std::min(a
[source
] + n
+ 1, ns
[source
]);
297 if (b
[source
] < ns
[source
])
298 pq
.push(std::make_pair(S(source
)[b
[source
]], source
));
303 // Move to the right, find greatest.
304 std::priority_queue
<std::pair
<value_type
, int>,
305 std::vector
<std::pair
<value_type
, int> >,
306 lexicographic
<value_type
, int, Comparator
> > pq(lcomp
);
308 for (int i
= 0; i
< m
; i
++)
310 pq
.push(std::make_pair(S(i
)[a
[i
] - 1], i
));
312 for (; skew
!= 0; ++skew
)
314 int source
= pq
.top().second
;
321 pq
.push(std::make_pair(S(source
)[a
[source
] - 1], source
));
327 // a[i] == b[i] in most cases, except when a[i] has been clamped
328 // because of having reached the boundary
330 // Now return the result, calculate the offset.
332 // Compare the keys on both edges of the border.
334 // Maximum of left edge, minimum of right edge.
335 value_type
* maxleft
= NULL
;
336 value_type
* minright
= NULL
;
337 for (int i
= 0; i
< m
; i
++)
342 maxleft
= &(S(i
)[a
[i
] - 1]);
345 // Max, favor rear sequences.
346 if (!comp(S(i
)[a
[i
] - 1], *maxleft
))
347 maxleft
= &(S(i
)[a
[i
] - 1]);
353 minright
= &(S(i
)[b
[i
]]);
356 // Min, favor fore sequences.
357 if (comp(S(i
)[b
[i
]], *minright
))
358 minright
= &(S(i
)[b
[i
]]);
364 for (int i
= 0; i
< m
; i
++)
365 begin_offsets
[i
] = S(i
) + a
[i
];
374 * @brief Selects the element at a certain global rank from several
377 * The sequences are passed via a sequence of random-access
378 * iterator pairs, none of the sequences may be empty.
379 * @param begin_seqs Begin of the sequence of iterator pairs.
380 * @param end_seqs End of the sequence of iterator pairs.
381 * @param rank The global rank to partition at.
382 * @param offset The rank of the selected element in the global
383 * subsequence of elements equal to the selected element. If the
384 * selected element is unique, this number is 0.
385 * @param comp The ordering functor, defaults to std::less.
387 template<typename T
, typename RanSeqs
, typename RankType
,
390 multiseq_selection(RanSeqs begin_seqs
, RanSeqs end_seqs
, RankType rank
,
391 RankType
& offset
, Comparator comp
= std::less
<T
>())
393 _GLIBCXX_CALL(end_seqs
- begin_seqs
)
395 typedef typename
std::iterator_traits
<RanSeqs
>::value_type::first_type
397 typedef typename
std::iterator_traits
<It
>::difference_type
400 lexicographic
<T
, int, Comparator
> lcomp(comp
);
401 lexicographic_reverse
<T
, int, Comparator
> lrcomp(comp
);
403 // Number of sequences, number of elements in total (possibly
404 // including padding).
405 difference_type m
= std::distance(begin_seqs
, end_seqs
);
406 difference_type N
= 0;
407 difference_type nmax
, n
, r
;
409 for (int i
= 0; i
< m
; i
++)
410 N
+= std::distance(begin_seqs
[i
].first
, begin_seqs
[i
].second
);
412 if (m
== 0 || N
== 0 || rank
< 0 || rank
>= N
)
414 // Result undefined when there is no data or rank is outside bounds.
415 throw std::exception();
419 difference_type
* ns
= new difference_type
[m
];
420 difference_type
* a
= new difference_type
[m
];
421 difference_type
* b
= new difference_type
[m
];
424 ns
[0] = std::distance(begin_seqs
[0].first
, begin_seqs
[0].second
);
426 for (int i
= 0; i
< m
; ++i
)
428 ns
[i
] = std::distance(begin_seqs
[i
].first
, begin_seqs
[i
].second
);
429 nmax
= std::max(nmax
, ns
[i
]);
434 // Pad all lists to this length, at least as long as any ns[i],
435 // equality iff nmax = 2^k - 1
438 // From now on, including padding.
441 for (int i
= 0; i
< m
; ++i
)
449 // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
451 #define S(i) (begin_seqs[i].first)
453 // Initial partition.
454 std::vector
<std::pair
<T
, int> > sample
;
456 for (int i
= 0; i
< m
; i
++)
458 sample
.push_back(std::make_pair(S(i
)[n
], i
));
459 __gnu_sequential::sort(sample
.begin(), sample
.end(),
460 lcomp
, sequential_tag());
462 // Conceptual infinity.
463 for (int i
= 0; i
< m
; i
++)
465 sample
.push_back(std::make_pair(S(i
)[0] /*dummy element*/, i
));
467 difference_type localrank
= rank
* m
/ N
;
470 for (j
= 0; j
< localrank
&& ((n
+ 1) <= ns
[sample
[j
].second
]); ++j
)
471 a
[sample
[j
].second
] += n
+ 1;
473 b
[sample
[j
].second
] -= n
+ 1;
475 // Further refinement.
480 const T
* lmax
= NULL
;
481 for (int i
= 0; i
< m
; ++i
)
486 lmax
= &(S(i
)[a
[i
] - 1]);
489 if (comp(*lmax
, S(i
)[a
[i
] - 1])) //max
490 lmax
= &(S(i
)[a
[i
] - 1]);
496 for (i
= 0; i
< m
; i
++)
498 difference_type middle
= (b
[i
] + a
[i
]) / 2;
499 if (lmax
&& middle
< ns
[i
] && comp(S(i
)[middle
], *lmax
))
500 a
[i
] = std::min(a
[i
] + n
+ 1, ns
[i
]);
505 difference_type leftsize
= 0, total
= 0;
506 for (int i
= 0; i
< m
; ++i
)
508 leftsize
+= a
[i
] / (n
+ 1);
509 total
+= l
/ (n
+ 1);
512 difference_type skew
= ((unsigned long long)total
* rank
/ N
517 // Move to the left, find smallest.
518 std::priority_queue
<std::pair
<T
, int>,
519 std::vector
<std::pair
<T
, int> >,
520 lexicographic_reverse
<T
, int, Comparator
> > pq(lrcomp
);
522 for (int i
= 0; i
< m
; ++i
)
524 pq
.push(std::make_pair(S(i
)[b
[i
]], i
));
526 for (; skew
!= 0 && !pq
.empty(); --skew
)
528 int source
= pq
.top().second
;
531 a
[source
] = std::min(a
[source
] + n
+ 1, ns
[source
]);
534 if (b
[source
] < ns
[source
])
535 pq
.push(std::make_pair(S(source
)[b
[source
]], source
));
540 // Move to the right, find greatest.
541 std::priority_queue
<std::pair
<T
, int>,
542 std::vector
<std::pair
<T
, int> >,
543 lexicographic
<T
, int, Comparator
> > pq(lcomp
);
545 for (int i
= 0; i
< m
; ++i
)
547 pq
.push(std::make_pair(S(i
)[a
[i
] - 1], i
));
549 for (; skew
!= 0; ++skew
)
551 int source
= pq
.top().second
;
558 pq
.push(std::make_pair(S(source
)[a
[source
] - 1], source
));
564 // a[i] == b[i] in most cases, except when a[i] has been clamped
565 // because of having reached the boundary
567 // Now return the result, calculate the offset.
569 // Compare the keys on both edges of the border.
571 // Maximum of left edge, minimum of right edge.
572 bool maxleftset
= false, minrightset
= false;
574 // Impossible to avoid the warning?
576 for (int i
= 0; i
< m
; ++i
)
582 maxleft
= S(i
)[a
[i
] - 1];
588 if (comp(maxleft
, S(i
)[a
[i
] - 1]))
589 maxleft
= S(i
)[a
[i
] - 1];
596 minright
= S(i
)[b
[i
]];
602 if (comp(S(i
)[b
[i
]], minright
))
603 minright
= S(i
)[b
[i
]];
608 // Minright is the splitter, in any case.
610 if (!maxleftset
|| comp(minright
, maxleft
))
612 // Good luck, everything is split unambiguously.
617 // We have to calculate an offset.
620 for (int i
= 0; i
< m
; ++i
)
622 difference_type lb
= std::lower_bound(S(i
), S(i
) + ns
[i
],
639 #endif /* _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H */