1: /* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc.
2: This file is part of the GNU C Library.
3: Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
4:
5: The GNU C Library is free software; you can redistribute it and/or
6: modify it under the terms of the GNU Lesser General Public
7: License as published by the Free Software Foundation; either
8: version 2.1 of the License, or (at your option) any later version.
9:
10: The GNU C Library is distributed in the hope that it will be useful,
11: but WITHOUT ANY WARRANTY; without even the implied warranty of
12: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: Lesser General Public License for more details.
14:
15: You should have received a copy of the GNU Lesser General Public
16: License along with the GNU C Library; if not, write to the Free
17: Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18: 02111-1307 USA. */
19:
20: /* If you consider tuning this algorithm, you should consult first:
21: Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
22: Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
23:
24: #include <alloca.h>
25: #include <limits.h>
26: #include <stdlib.h>
27: #include <string.h>
28:
29: /* Byte-wise swap two items of size SIZE. */
30: #define SWAP(a, b, size) \
31: do \
32: { \
33: register size_t __size = (size); \
34: register char *__a = (a), *__b = (b); \
35: do \
36: { \
37: char __tmp = *__a; \
38: *__a++ = *__b; \
39: *__b++ = __tmp; \
40: } while (--__size > 0); \
41: } while (0)
42:
43: /* Discontinue quicksort algorithm when partition gets below this size.
44: This particular magic number was chosen to work best on a Sun 4/260. */
45: #define MAX_THRESH 4
46:
47: /* Stack node declarations used to store unfulfilled partition obligations. */
48: typedef struct
49: {
50: char *lo;
51: char *hi;
52: } stack_node;
53:
54: /* The next 4 #defines implement a very fast in-line stack abstraction. */
55: /* The stack needs log (total_elements) entries (we could even subtract
56: log(MAX_THRESH)). Since total_elements has type size_t, we get as
57: upper bound for log (total_elements):
58: bits per byte (CHAR_BIT) * sizeof(size_t). */
59: #define STACK_SIZE (CHAR_BIT * sizeof(size_t))
60: #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
61: #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
62: #define STACK_NOT_EMPTY (stack < top)
63:
64:
65: /* Order size using quicksort. This implementation incorporates
66: four optimizations discussed in Sedgewick:
67:
68: 1. Non-recursive, using an explicit stack of pointer that store the
69: next array partition to sort. To save time, this maximum amount
70: of space required to store an array of SIZE_MAX is allocated on the
71: stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
72: only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
73: Pretty cheap, actually.
74:
75: 2. Chose the pivot element using a median-of-three decision tree.
76: This reduces the probability of selecting a bad pivot value and
77: eliminates certain extraneous comparisons.
78:
79: 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
80: insertion sort to order the MAX_THRESH items within each partition.
81: This is a big win, since insertion sort is faster for small, mostly
82: sorted array segments.
83:
84: 4. The larger of the two sub-partitions is always pushed onto the
85: stack first, with the algorithm then concentrating on the
86: smaller partition. This *guarantees* no more than log (total_elems)
87: stack size is needed (actually O(1) in this case)! */
88:
89: void
90: _quicksort (void *const pbase, size_t total_elems, size_t size,
91: __compar_fn_t cmp)
92: {
93: register char *base_ptr = (char *) pbase;
94:
95: const size_t max_thresh = MAX_THRESH * size;
96:
97: if (total_elems == 0)
98: /* Avoid lossage with unsigned arithmetic below. */
99: return;
100:
101: if (total_elems > MAX_THRESH)
102: {
103: char *lo = base_ptr;
104: char *hi = &lo[size * (total_elems - 1)];
105: stack_node stack[STACK_SIZE];
106: stack_node *top = stack;
107:
108: PUSH (NULL, NULL);
109:
110: while (STACK_NOT_EMPTY)
111: {
112: char *left_ptr;
113: char *right_ptr;
114:
115: /* Select median value from among LO, MID, and HI. Rearrange
116: LO and HI so the three values are sorted. This lowers the
117: probability of picking a pathological pivot value and
118: skips a comparison for both the LEFT_PTR and RIGHT_PTR in
119: the while loops. */
120:
121: char *mid = lo + size * ((hi - lo) / size >> 1);
122:
123: if ((*cmp) ((void *) mid, (void *) lo) < 0)
124: SWAP (mid, lo, size);
125: if ((*cmp) ((void *) hi, (void *) mid) < 0)
126: SWAP (mid, hi, size);
127: else
128: goto jump_over;
129: if ((*cmp) ((void *) mid, (void *) lo) < 0)
130: SWAP (mid, lo, size);
131: jump_over:;
132:
133: left_ptr = lo + size;
134: right_ptr = hi - size;
135:
136: /* Here's the famous ``collapse the walls'' section of quicksort.
137: Gotta like those tight inner loops! They are the main reason
138: that this algorithm runs much faster than others. */
139: do
140: {
141: while ((*cmp) ((void *) left_ptr, (void *) mid) < 0)
142: left_ptr += size;
143:
144: while ((*cmp) ((void *) mid, (void *) right_ptr) < 0)
145: right_ptr -= size;
146:
147: if (left_ptr < right_ptr)
148: {
149: SWAP (left_ptr, right_ptr, size);
150: if (mid == left_ptr)
151: mid = right_ptr;
152: else if (mid == right_ptr)
153: mid = left_ptr;
154: left_ptr += size;
155: right_ptr -= size;
156: }
157: else if (left_ptr == right_ptr)
158: {
159: left_ptr += size;
160: right_ptr -= size;
161: break;
162: }
163: }
164: while (left_ptr <= right_ptr);
165:
166: /* Set up pointers for next iteration. First determine whether
167: left and right partitions are below the threshold size. If so,
168: ignore one or both. Otherwise, push the larger partition's
169: bounds on the stack and continue sorting the smaller one. */
170:
171: if ((size_t) (right_ptr - lo) <= max_thresh)
172: {
173: if ((size_t) (hi - left_ptr) <= max_thresh)
174: /* Ignore both small partitions. */
175: POP (lo, hi);
176: else
177: /* Ignore small left partition. */
178: lo = left_ptr;
179: }
180: else if ((size_t) (hi - left_ptr) <= max_thresh)
181: /* Ignore small right partition. */
182: hi = right_ptr;
183: else if ((right_ptr - lo) > (hi - left_ptr))
184: {
185: /* Push larger left partition indices. */
186: PUSH (lo, right_ptr);
187: lo = left_ptr;
188: }
189: else
190: {
191: /* Push larger right partition indices. */
192: PUSH (left_ptr, hi);
193: hi = right_ptr;
194: }
195: }
196: }
197:
198: /* Once the BASE_PTR array is partially sorted by quicksort the rest
199: is completely sorted using insertion sort, since this is efficient
200: for partitions below MAX_THRESH size. BASE_PTR points to the beginning
201: of the array to sort, and END_PTR points at the very last element in
202: the array (*not* one beyond it!). */
203:
204: #define min(x, y) ((x) < (y) ? (x) : (y))
205:
206: {
207: char *const end_ptr = &base_ptr[size * (total_elems - 1)];
208: char *tmp_ptr = base_ptr;
209: char *thresh = min(end_ptr, base_ptr + max_thresh);
210: register char *run_ptr;
211:
212: /* Find smallest element in first threshold and place it at the
213: array's beginning. This is the smallest array element,
214: and the operation speeds up insertion sort's inner loop. */
215:
216: for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
217: if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
218: tmp_ptr = run_ptr;
219:
220: if (tmp_ptr != base_ptr)
221: SWAP (tmp_ptr, base_ptr, size);
222:
223: /* Insertion sort, running from left-hand-side up to right-hand-side. */
224:
225: run_ptr = base_ptr + size;
226: while ((run_ptr += size) <= end_ptr)
227: {
228: tmp_ptr = run_ptr - size;
229: while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
230: tmp_ptr -= size;
231:
232: tmp_ptr += size;
233: if (tmp_ptr != run_ptr)
234: {
235: char *trav;
236:
237: trav = run_ptr + size;
238: while (--trav >= run_ptr)
239: {
240: char c = *trav;
241: char *hi, *lo;
242:
243: for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
244: *hi = *lo;
245: *hi = c;
246: }
247: }
248: }
249: }
250: }