1 | /*! |
---|
2 | * \file |
---|
3 | * \brief Sorting functions |
---|
4 | * \author Tony Ottosson, Mark Dobossy and Adam Piatyszek |
---|
5 | * |
---|
6 | * ------------------------------------------------------------------------- |
---|
7 | * |
---|
8 | * IT++ - C++ library of mathematical, signal processing, speech processing, |
---|
9 | * and communications classes and functions |
---|
10 | * |
---|
11 | * Copyright (C) 1995-2007 (see AUTHORS file for a list of contributors) |
---|
12 | * |
---|
13 | * This program is free software; you can redistribute it and/or modify |
---|
14 | * it under the terms of the GNU General Public License as published by |
---|
15 | * the Free Software Foundation; either version 2 of the License, or |
---|
16 | * (at your option) any later version. |
---|
17 | * |
---|
18 | * This program is distributed in the hope that it will be useful, |
---|
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
21 | * GNU General Public License for more details. |
---|
22 | * |
---|
23 | * You should have received a copy of the GNU General Public License |
---|
24 | * along with this program; if not, write to the Free Software |
---|
25 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
---|
26 | * |
---|
27 | * ------------------------------------------------------------------------- |
---|
28 | */ |
---|
29 | |
---|
30 | #ifndef SORT_H |
---|
31 | #define SORT_H |
---|
32 | |
---|
33 | #include <itpp/base/vec.h> |
---|
34 | #include <itpp/base/converters.h> |
---|
35 | #include <itpp/base/math/log_exp.h> |
---|
36 | |
---|
37 | |
---|
38 | namespace itpp { |
---|
39 | |
---|
40 | /*! |
---|
41 | * \brief Soring algoritms that can be used in a \a Sort class |
---|
42 | * |
---|
43 | * - Introsort (the default and the fastest method in most cases) |
---|
44 | * - Quicksort |
---|
45 | * - Heapsort |
---|
46 | * - Inertion Sort (suitable for very short vectors) |
---|
47 | */ |
---|
48 | enum SORTING_METHOD { INTROSORT = 0, QUICKSORT = 1, HEAPSORT = 2, |
---|
49 | INSERTSORT = 3 }; |
---|
50 | |
---|
51 | /*! |
---|
52 | * \brief Class for sorting of vectors |
---|
53 | * |
---|
54 | * A class which takes a vector, and sorts its values descending. There |
---|
55 | * are two types of sort: a normal sort (accessed via the Sort::sort() |
---|
56 | * function) which sorts the vector passed in the argument, and an index |
---|
57 | * sort (accessed via the Sort::sort_index() function) which leaves the |
---|
58 | * passed vector intact, but returns an index vector describing the sorted |
---|
59 | * order. |
---|
60 | * |
---|
61 | * The Sort class has four sorting methods implemented: |
---|
62 | * - Introsort [1,2]: It is a sorting algorithm developed by David Musser. |
---|
63 | * I starts as a quicksort, but switches to a heapsort in cases where |
---|
64 | * the recursion becomes too deep. Additionally, when sub vectors become |
---|
65 | * smaller than 16 elements, it switches to an insertion sort. Introsort |
---|
66 | * has a worst-case of \f$\Theta(n\log n)\f$ comparisons, bu has the |
---|
67 | * efficiency of the quick sort algorithm in cases where the data is |
---|
68 | * well conditioned for quicksort. |
---|
69 | * - Quicksort [3]: It is a comparison sorting algorithm that has an |
---|
70 | * average complexity of \f$\Theta(n\log n)\f$ comparisons. For most |
---|
71 | * data sets, the quicksort will be significantly more efficient than |
---|
72 | * this average. However for data sets not well suited to it, quicksort |
---|
73 | * may require as many as \f$\Theta(n^2)\f$ comparisons. Example of such |
---|
74 | * ill-suited data sets are those which are nearly in order, and data |
---|
75 | * sets with multiple elements of the same value. |
---|
76 | * - Heapsort [4]: It is a comparison sorting algorithm. While it is |
---|
77 | * usually seen to be slower than quicksort routines, its worst-case |
---|
78 | * requires only \f$\Theta(n\log n)\f$ comparisons. This makes it an |
---|
79 | * ideal quicksort replacement for data sets that that are |
---|
80 | * ill-conditioned for quicksorting. |
---|
81 | * - Insertion sort [5]: An insertion sort is a simple comparison sort, |
---|
82 | * which is widely considered to be one of the most efficient algorithms |
---|
83 | * for very small data sets (10-20 elements). |
---|
84 | * http://en.wikipedia.org/wiki/Insertion_sort |
---|
85 | * |
---|
86 | * References: |
---|
87 | * - [1] http://en.wikipedia.org/wiki/Introsort |
---|
88 | * - [2] http://www.cs.rpi.edu/~musser/gp/introsort.ps |
---|
89 | * - [3] http://en.wikipedia.org/wiki/Quicksort |
---|
90 | * - [4] http://en.wikipedia.org/wiki/Heapsort |
---|
91 | * - [5] http://en.wikipedia.org/wiki/Insertion_sort |
---|
92 | * |
---|
93 | * \author Tony Ottosson (Quicksort), Mark Dobossy (Introsort, Heapsort |
---|
94 | * and Insertion Sort) and Adam Piatyszek (Sort class design, code |
---|
95 | * clean-up) |
---|
96 | */ |
---|
97 | template<class T> |
---|
98 | class Sort { |
---|
99 | public: |
---|
100 | //! Constructor that sets Intro Sort method by default |
---|
101 | Sort(SORTING_METHOD method = INTROSORT): sort_method(method) {} |
---|
102 | |
---|
103 | //! Set sorting method |
---|
104 | void set_method(SORTING_METHOD method) { sort_method = method; } |
---|
105 | |
---|
106 | //! Get current sorting method |
---|
107 | SORTING_METHOD get_method() const { return sort_method; } |
---|
108 | |
---|
109 | /*! |
---|
110 | * \brief Sorting function of a subset of a vector \a data |
---|
111 | * |
---|
112 | * \param low Start index of a subvector to be sorted |
---|
113 | * \param high End index of a subvector to be sorted |
---|
114 | * \param data Data vector, in which a part of it is to be sorted |
---|
115 | */ |
---|
116 | void sort(int low, int high, Vec<T> &data); |
---|
117 | |
---|
118 | /*! |
---|
119 | * \brief Sorting function that returns a sorted index vector |
---|
120 | * |
---|
121 | * \param low Start index of a subvector to be sorted |
---|
122 | * \param high End index of a subvector to be sorted |
---|
123 | * \param data Data vector, in which a part of it is to be sorted |
---|
124 | */ |
---|
125 | ivec sort_index(int low, int high, const Vec<T> &data); |
---|
126 | |
---|
127 | /*! |
---|
128 | * \brief Introsort function of a subset of a vector \c data |
---|
129 | * |
---|
130 | * \param low Start index of a subvector to be sorted |
---|
131 | * \param high End index of a subvector to be sorted |
---|
132 | * \param max_depth Maximum recursion depth before switching to heap sort |
---|
133 | * recommended value: log2 of the length of the data vector |
---|
134 | * \param data Data vector, in which a part of it is to be sorted |
---|
135 | * |
---|
136 | * \note An introsort is not a stable sort (i.e. it may not maintain |
---|
137 | * the relative order of elements with equal value.) |
---|
138 | * \note This function uses recurence. |
---|
139 | */ |
---|
140 | void intro_sort(int low, int high, int max_depth, Vec<T> &data); |
---|
141 | |
---|
142 | /*! |
---|
143 | * \brief Introsort function, which returns a sorted index vector |
---|
144 | * |
---|
145 | * \param low Start index of a subvector to be sorted |
---|
146 | * \param high End index of a subvector to be sorted |
---|
147 | * \param max_depth Maximum recursion depth before switching to heap sort |
---|
148 | * recommended value: log2 of the length of the data vector |
---|
149 | * \param data Data vector, in which a part of it is to be sorted |
---|
150 | * |
---|
151 | * \note An Introsort is not a stable sort (i.e. it may not maintain |
---|
152 | * the relative order of elements with equal value.) |
---|
153 | * \note This function uses recurence. |
---|
154 | */ |
---|
155 | ivec intro_sort_index(int low, int high, int max_depth, |
---|
156 | const Vec<T> &data); |
---|
157 | |
---|
158 | private: |
---|
159 | SORTING_METHOD sort_method; |
---|
160 | |
---|
161 | void IntroSort(int low, int high, int max_depth, T data[]); |
---|
162 | void IntroSort_Index(int low, int high, int max_depth, int indexlist[], |
---|
163 | const T data[]); |
---|
164 | |
---|
165 | void QuickSort(int low, int high, T data[]); |
---|
166 | void QuickSort_Index(int low, int high, int indexlist[], const T data[]); |
---|
167 | |
---|
168 | void HeapSort(int low, int high, T data[]); |
---|
169 | void HeapSort_Index(int low, int high, int indexlist[], const T data[]); |
---|
170 | |
---|
171 | void InsertSort(int low, int high, T data[]); |
---|
172 | void InsertSort_Index(int low, int high, int indexlist[], const T data[]); |
---|
173 | }; |
---|
174 | |
---|
175 | |
---|
176 | /*! |
---|
177 | * \relates Vec |
---|
178 | * \brief Sort the \a data vector in increasing order |
---|
179 | * |
---|
180 | * \param data Vector to be sorted |
---|
181 | * \param method Sorting method: INTROSORT (default), QUICKSORT, HEAPSORT |
---|
182 | * or INSERTSORT |
---|
183 | */ |
---|
184 | template<class T> |
---|
185 | void sort(Vec<T> &data, SORTING_METHOD method = INTROSORT) |
---|
186 | { |
---|
187 | Sort<T> s(method); |
---|
188 | s.sort(0, data.size()-1, data); |
---|
189 | } |
---|
190 | |
---|
191 | /*! |
---|
192 | * \relates Vec |
---|
193 | * \brief Return an index vector corresponding to a sorted vector \a data |
---|
194 | * in increasing order |
---|
195 | * |
---|
196 | * \param data Vector for which to return a sorted index vector |
---|
197 | * \param method Sorting method: INTROSORT (default), QUICKSORT, HEAPSORT |
---|
198 | * or INSERTSORT |
---|
199 | */ |
---|
200 | template<class T> |
---|
201 | ivec sort_index(const Vec<T> &data, SORTING_METHOD method = INTROSORT) |
---|
202 | { |
---|
203 | Sort<T> s(method); |
---|
204 | return s.sort_index(0, data.size()-1, data); |
---|
205 | } |
---|
206 | |
---|
207 | |
---|
208 | // ---------------------------------------------------------------------- |
---|
209 | // Public functions for various sorting methods |
---|
210 | // ---------------------------------------------------------------------- |
---|
211 | |
---|
212 | template<class T> |
---|
213 | void Sort<T>::sort(int low, int high, Vec<T> &data) |
---|
214 | { |
---|
215 | int N = data.size(); |
---|
216 | |
---|
217 | it_assert((low >= 0) && (high > low) && (high < N), "Sort::sort(): " |
---|
218 | "low or high out of bounds"); |
---|
219 | |
---|
220 | switch (sort_method) { |
---|
221 | case INTROSORT: |
---|
222 | IntroSort(low, high, levels2bits(N), data._data()); |
---|
223 | break; |
---|
224 | case QUICKSORT: |
---|
225 | QuickSort(low, high, data._data()); |
---|
226 | break; |
---|
227 | case HEAPSORT: |
---|
228 | HeapSort(low, high, data._data()); |
---|
229 | break; |
---|
230 | case INSERTSORT: |
---|
231 | InsertSort(low, high, data._data()); |
---|
232 | break; |
---|
233 | default: |
---|
234 | it_error("Sort<T>::sort(): Unknown sorting method"); |
---|
235 | } |
---|
236 | } |
---|
237 | |
---|
238 | |
---|
239 | template<class T> |
---|
240 | ivec Sort<T>::sort_index(int low, int high, const Vec<T> &data) |
---|
241 | { |
---|
242 | int N = data.size(); |
---|
243 | |
---|
244 | it_assert((low >= 0) && (high > low) && (high < N), "Sort::sort(): " |
---|
245 | "low or high out of bounds"); |
---|
246 | |
---|
247 | ivec indexlist(N); |
---|
248 | for(int i = 0; i < N; ++i) { |
---|
249 | indexlist(i) = i; |
---|
250 | } |
---|
251 | |
---|
252 | switch (sort_method) { |
---|
253 | case INTROSORT: |
---|
254 | IntroSort_Index(low, high, levels2bits(N), indexlist._data(), |
---|
255 | data._data()); |
---|
256 | break; |
---|
257 | case QUICKSORT: |
---|
258 | QuickSort_Index(low, high, indexlist._data(), data._data()); |
---|
259 | break; |
---|
260 | case HEAPSORT: |
---|
261 | HeapSort_Index(low, high, indexlist._data(), data._data()); |
---|
262 | break; |
---|
263 | case INSERTSORT: |
---|
264 | InsertSort_Index(low, high, indexlist._data(), data._data()); |
---|
265 | break; |
---|
266 | default: |
---|
267 | it_error("Sort<T>::sort_index(): Unknown sorting method"); |
---|
268 | } |
---|
269 | |
---|
270 | return indexlist; |
---|
271 | } |
---|
272 | |
---|
273 | |
---|
274 | // INTRO SORT |
---|
275 | template<class T> |
---|
276 | void Sort<T>::intro_sort(int low, int high, int max_depth, Vec<T> &data) |
---|
277 | { |
---|
278 | it_assert((low >= 0) && (high > low) && (high < data.size()), |
---|
279 | "Sort::sort(): low or high out of bounds"); |
---|
280 | IntroSort(low, high, max_depth, data._data()); |
---|
281 | } |
---|
282 | |
---|
283 | // INTRO SORT INDEX |
---|
284 | template<class T> |
---|
285 | ivec Sort<T>::intro_sort_index(int low, int high, int max_depth, |
---|
286 | const Vec<T> &data) |
---|
287 | { |
---|
288 | int N = data.size(); |
---|
289 | it_assert((low >= 0) && (high > low) && (high < N), |
---|
290 | "Sort::sort(): low or high out of bounds"); |
---|
291 | |
---|
292 | ivec indexlist(N); |
---|
293 | for (int i = 0; i < N; ++i) { |
---|
294 | indexlist(i) = i; |
---|
295 | } |
---|
296 | |
---|
297 | IntroSort_Index(low, high, max_depth, indexlist._data(), data._data()); |
---|
298 | |
---|
299 | return indexlist; |
---|
300 | } |
---|
301 | |
---|
302 | |
---|
303 | // ---------------------------------------------------------------------- |
---|
304 | // Private functions for sorting methods |
---|
305 | // ---------------------------------------------------------------------- |
---|
306 | |
---|
307 | template<class T> |
---|
308 | void Sort<T>::IntroSort(int low, int high, int max_depth, T data[]) |
---|
309 | { |
---|
310 | if (high-low > 16) { |
---|
311 | max_depth--; |
---|
312 | if (max_depth == 0) { |
---|
313 | HeapSort(low, high, data); |
---|
314 | return; |
---|
315 | } |
---|
316 | |
---|
317 | if (high>low) { |
---|
318 | T a = data[low]; |
---|
319 | int plow = low; |
---|
320 | int phigh = high; |
---|
321 | T test = data[phigh]; |
---|
322 | while (plow < phigh) { |
---|
323 | if (test < a) { |
---|
324 | data[plow] = test; |
---|
325 | plow++; |
---|
326 | test = data[plow]; |
---|
327 | } else { |
---|
328 | data[phigh] = test; |
---|
329 | phigh--; |
---|
330 | test = data[phigh]; |
---|
331 | } |
---|
332 | } |
---|
333 | data[plow] = a; |
---|
334 | IntroSort(low,plow-1,max_depth,data); |
---|
335 | IntroSort(plow+1,high,max_depth,data); |
---|
336 | return; |
---|
337 | } |
---|
338 | } else { |
---|
339 | InsertSort(low, high, data); |
---|
340 | return; |
---|
341 | } |
---|
342 | } |
---|
343 | |
---|
344 | template<class T> |
---|
345 | void Sort<T>::IntroSort_Index(int low, int high, int max_depth, |
---|
346 | int indexlist[], const T data[]) |
---|
347 | { |
---|
348 | if (high-low > 16) { |
---|
349 | max_depth--; |
---|
350 | if (max_depth == 0) { |
---|
351 | HeapSort_Index(low, high, indexlist, data); |
---|
352 | return; |
---|
353 | } |
---|
354 | |
---|
355 | if (high > low) { |
---|
356 | int aindex = indexlist[low]; |
---|
357 | T a = data[aindex]; |
---|
358 | int plow = low; |
---|
359 | int phigh = high; |
---|
360 | int testindex = indexlist[phigh]; |
---|
361 | T test = data[testindex]; |
---|
362 | while (plow < phigh) { |
---|
363 | if (test < a) { |
---|
364 | indexlist[plow] = testindex; |
---|
365 | plow++; |
---|
366 | testindex = indexlist[plow]; |
---|
367 | test = data[testindex]; |
---|
368 | } else { |
---|
369 | indexlist[phigh] = testindex; |
---|
370 | phigh--; |
---|
371 | testindex = indexlist[phigh]; |
---|
372 | test = data[testindex]; |
---|
373 | } |
---|
374 | } |
---|
375 | indexlist[plow] = aindex; |
---|
376 | IntroSort_Index(low,plow-1,max_depth,indexlist,data); |
---|
377 | IntroSort_Index(plow+1,high,max_depth,indexlist,data); |
---|
378 | } |
---|
379 | } else { |
---|
380 | InsertSort_Index(low, high, indexlist, data); |
---|
381 | return; |
---|
382 | } |
---|
383 | } |
---|
384 | |
---|
385 | template <class T> |
---|
386 | void Sort<T>::QuickSort(int low, int high, T data[]) |
---|
387 | { |
---|
388 | if (high > low) { |
---|
389 | T a = data[low]; |
---|
390 | int plow = low; |
---|
391 | int phigh = high; |
---|
392 | T test = data[phigh]; |
---|
393 | while (plow < phigh) { |
---|
394 | if (test < a) { |
---|
395 | data[plow] = test; |
---|
396 | plow++; |
---|
397 | test = data[plow]; |
---|
398 | } else { |
---|
399 | data[phigh] = test; |
---|
400 | phigh--; |
---|
401 | test = data[phigh]; |
---|
402 | } |
---|
403 | } |
---|
404 | data[plow] = a; |
---|
405 | QuickSort(low,plow-1,data); |
---|
406 | QuickSort(plow+1,high,data); |
---|
407 | } |
---|
408 | } |
---|
409 | |
---|
410 | template<class T> |
---|
411 | void Sort<T>::QuickSort_Index(int low, int high, int indexlist[], |
---|
412 | const T data[]) |
---|
413 | { |
---|
414 | if (high > low) { |
---|
415 | int aindex = indexlist[low]; |
---|
416 | T a = data[aindex]; |
---|
417 | int plow = low; |
---|
418 | int phigh = high; |
---|
419 | int testindex = indexlist[phigh]; |
---|
420 | T test = data[testindex]; |
---|
421 | while (plow < phigh) { |
---|
422 | if (test < a) { |
---|
423 | indexlist[plow] = testindex; |
---|
424 | plow++; |
---|
425 | testindex = indexlist[plow]; |
---|
426 | test = data[testindex]; |
---|
427 | } else { |
---|
428 | indexlist[phigh] = testindex; |
---|
429 | phigh--; |
---|
430 | testindex = indexlist[phigh]; |
---|
431 | test = data[testindex]; |
---|
432 | } |
---|
433 | } |
---|
434 | indexlist[plow] = aindex; |
---|
435 | QuickSort_Index(low,plow-1,indexlist,data); |
---|
436 | QuickSort_Index(plow+1,high,indexlist,data); |
---|
437 | } |
---|
438 | } |
---|
439 | |
---|
440 | template<class T> |
---|
441 | void Sort<T>::HeapSort(int low, int high, T data[]) |
---|
442 | { |
---|
443 | int size=(high+1)-low; |
---|
444 | int i=size/2; |
---|
445 | T temp; |
---|
446 | while(1) { |
---|
447 | if (i > 0) |
---|
448 | temp = data[--i + low]; |
---|
449 | else { |
---|
450 | if (size-- == 0) |
---|
451 | break; |
---|
452 | temp = data[size + low]; |
---|
453 | data[size + low] = data[low]; |
---|
454 | } |
---|
455 | |
---|
456 | int parent = i; |
---|
457 | int child = i*2 + 1; |
---|
458 | |
---|
459 | while (child < size) { |
---|
460 | if (child + 1 < size && data[child + 1 + low] > data[child + low]) |
---|
461 | child++; |
---|
462 | if (data[child + low] > temp) { |
---|
463 | data[parent + low] = data[child + low]; |
---|
464 | parent = child; |
---|
465 | child = parent*2 + 1; |
---|
466 | } else |
---|
467 | break; |
---|
468 | } |
---|
469 | data[parent + low] = temp; |
---|
470 | } |
---|
471 | } |
---|
472 | |
---|
473 | template<class T> |
---|
474 | void Sort<T>::HeapSort_Index(int low, int high, int indexlist[], |
---|
475 | const T data[]) |
---|
476 | { |
---|
477 | int size=(high+1)-low; |
---|
478 | int i=size/2; |
---|
479 | |
---|
480 | while(1) { |
---|
481 | T tempValue; |
---|
482 | int tempIndex; |
---|
483 | if (i > 0) { |
---|
484 | i--; |
---|
485 | tempValue = data[indexlist[i + low]]; |
---|
486 | tempIndex = indexlist[i + low]; |
---|
487 | } else { |
---|
488 | if (size-- == 0) |
---|
489 | break; |
---|
490 | tempValue = data[indexlist[size + low]]; |
---|
491 | tempIndex = indexlist[size + low]; |
---|
492 | indexlist[size+low] = indexlist[low]; |
---|
493 | } |
---|
494 | |
---|
495 | int parent = i; |
---|
496 | int child = i*2 + 1; |
---|
497 | |
---|
498 | while (child < size) { |
---|
499 | if ((child + 1 < size) |
---|
500 | && data[indexlist[child + 1 + low]] > data[indexlist[child + low]]) |
---|
501 | child++; |
---|
502 | if (data[indexlist[child + low]] > tempValue) { |
---|
503 | indexlist[parent + low] = indexlist[child + low]; |
---|
504 | parent = child; |
---|
505 | child = parent*2 + 1; |
---|
506 | } else |
---|
507 | break; |
---|
508 | } |
---|
509 | indexlist[parent + low] = tempIndex; |
---|
510 | } |
---|
511 | } |
---|
512 | |
---|
513 | template<class T> |
---|
514 | void Sort<T>::InsertSort(int low, int high, T data[]) |
---|
515 | { |
---|
516 | for(int i = low+1; i <= high; i++) { |
---|
517 | T value = data[i]; |
---|
518 | int j; |
---|
519 | for (j = i - 1; j >= low && data[j] > value; j--) { |
---|
520 | data[j + 1] = data[j]; |
---|
521 | } |
---|
522 | data[j + 1] = value; |
---|
523 | } |
---|
524 | } |
---|
525 | |
---|
526 | template<class T> |
---|
527 | void Sort<T>::InsertSort_Index(int low, int high, int indexlist[], |
---|
528 | const T data[]) |
---|
529 | { |
---|
530 | for(int i = low + 1; i <= high; i++) { |
---|
531 | T value = data[indexlist[i]]; |
---|
532 | int tempIndex = indexlist[i]; |
---|
533 | int j; |
---|
534 | for (j = i - 1; j >= low && data[indexlist[j]] > value; j--) { |
---|
535 | indexlist[j + 1] = indexlist[j]; |
---|
536 | } |
---|
537 | indexlist[j + 1] = tempIndex; |
---|
538 | } |
---|
539 | } |
---|
540 | |
---|
541 | |
---|
542 | } // namespace itpp |
---|
543 | |
---|
544 | #endif // #ifndef SORT_H |
---|