1: /* Copyright (C) 1991,1992,1996,1997,1999,2004 Free Software Foundation, Inc. 2: This file is part of the GNU C Library. 3: Written by Douglas C. Schmidt (schmidt@ics.uci.edu). 4: 5: The GNU C Library is free software; you can redistribute it and/or 6: modify it under the terms of the GNU Lesser General Public 7: License as published by the Free Software Foundation; either 8: version 2.1 of the License, or (at your option) any later version. 9: 10: The GNU C Library is distributed in the hope that it will be useful, 11: but WITHOUT ANY WARRANTY; without even the implied warranty of 12: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13: Lesser General Public License for more details. 14: 15: You should have received a copy of the GNU Lesser General Public 16: License along with the GNU C Library; if not, write to the Free 17: Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18: 02111-1307 USA. */ 19: 20: /* If you consider tuning this algorithm, you should consult first: 21: Engineering a sort function; Jon Bentley and M. Douglas McIlroy; 22: Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ 23: 24: #include <alloca.h> 25: #include <limits.h> 26: #include <stdlib.h> 27: #include <string.h> 28: 29: /* Byte-wise swap two items of size SIZE. */ 30: #define SWAP(a, b, size) \ 31: do \ 32: { \ 33: register size_t __size = (size); \ 34: register char *__a = (a), *__b = (b); \ 35: do \ 36: { \ 37: char __tmp = *__a; \ 38: *__a++ = *__b; \ 39: *__b++ = __tmp; \ 40: } while (--__size > 0); \ 41: } while (0) 42: 43: /* Discontinue quicksort algorithm when partition gets below this size. 44: This particular magic number was chosen to work best on a Sun 4/260. */ 45: #define MAX_THRESH 4 46: 47: /* Stack node declarations used to store unfulfilled partition obligations. */ 48: typedef struct 49: { 50: char *lo; 51: char *hi; 52: } stack_node; 53: 54: /* The next 4 #defines implement a very fast in-line stack abstraction. */ 55: /* The stack needs log (total_elements) entries (we could even subtract 56: log(MAX_THRESH)). Since total_elements has type size_t, we get as 57: upper bound for log (total_elements): 58: bits per byte (CHAR_BIT) * sizeof(size_t). */ 59: #define STACK_SIZE (CHAR_BIT * sizeof(size_t)) 60: #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) 61: #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) 62: #define STACK_NOT_EMPTY (stack < top) 63: 64: 65: /* Order size using quicksort. This implementation incorporates 66: four optimizations discussed in Sedgewick: 67: 68: 1. Non-recursive, using an explicit stack of pointer that store the 69: next array partition to sort. To save time, this maximum amount 70: of space required to store an array of SIZE_MAX is allocated on the 71: stack. Assuming a 32-bit (64 bit) integer for size_t, this needs 72: only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). 73: Pretty cheap, actually. 74: 75: 2. Chose the pivot element using a median-of-three decision tree. 76: This reduces the probability of selecting a bad pivot value and 77: eliminates certain extraneous comparisons. 78: 79: 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving 80: insertion sort to order the MAX_THRESH items within each partition. 81: This is a big win, since insertion sort is faster for small, mostly 82: sorted array segments. 83: 84: 4. The larger of the two sub-partitions is always pushed onto the 85: stack first, with the algorithm then concentrating on the 86: smaller partition. This *guarantees* no more than log (total_elems) 87: stack size is needed (actually O(1) in this case)! */ 88: 89: void 90: _quicksort (void *const pbase, size_t total_elems, size_t size, 91: __compar_fn_t cmp) 92: { 93: register char *base_ptr = (char *) pbase; 94: 95: const size_t max_thresh = MAX_THRESH * size; 96: 97: if (total_elems == 0) 98: /* Avoid lossage with unsigned arithmetic below. */ 99: return; 100: 101: if (total_elems > MAX_THRESH) 102: { 103: char *lo = base_ptr; 104: char *hi = &lo[size * (total_elems - 1)]; 105: stack_node stack[STACK_SIZE]; 106: stack_node *top = stack; 107: 108: PUSH (NULL, NULL); 109: 110: while (STACK_NOT_EMPTY) 111: { 112: char *left_ptr; 113: char *right_ptr; 114: 115: /* Select median value from among LO, MID, and HI. Rearrange 116: LO and HI so the three values are sorted. This lowers the 117: probability of picking a pathological pivot value and 118: skips a comparison for both the LEFT_PTR and RIGHT_PTR in 119: the while loops. */ 120: 121: char *mid = lo + size * ((hi - lo) / size >> 1); 122: 123: if ((*cmp) ((void *) mid, (void *) lo) < 0) 124: SWAP (mid, lo, size); 125: if ((*cmp) ((void *) hi, (void *) mid) < 0) 126: SWAP (mid, hi, size); 127: else 128: goto jump_over; 129: if ((*cmp) ((void *) mid, (void *) lo) < 0) 130: SWAP (mid, lo, size); 131: jump_over:; 132: 133: left_ptr = lo + size; 134: right_ptr = hi - size; 135: 136: /* Here's the famous ``collapse the walls'' section of quicksort. 137: Gotta like those tight inner loops! They are the main reason 138: that this algorithm runs much faster than others. */ 139: do 140: { 141: while ((*cmp) ((void *) left_ptr, (void *) mid) < 0) 142: left_ptr += size; 143: 144: while ((*cmp) ((void *) mid, (void *) right_ptr) < 0) 145: right_ptr -= size; 146: 147: if (left_ptr < right_ptr) 148: { 149: SWAP (left_ptr, right_ptr, size); 150: if (mid == left_ptr) 151: mid = right_ptr; 152: else if (mid == right_ptr) 153: mid = left_ptr; 154: left_ptr += size; 155: right_ptr -= size; 156: } 157: else if (left_ptr == right_ptr) 158: { 159: left_ptr += size; 160: right_ptr -= size; 161: break; 162: } 163: } 164: while (left_ptr <= right_ptr); 165: 166: /* Set up pointers for next iteration. First determine whether 167: left and right partitions are below the threshold size. If so, 168: ignore one or both. Otherwise, push the larger partition's 169: bounds on the stack and continue sorting the smaller one. */ 170: 171: if ((size_t) (right_ptr - lo) <= max_thresh) 172: { 173: if ((size_t) (hi - left_ptr) <= max_thresh) 174: /* Ignore both small partitions. */ 175: POP (lo, hi); 176: else 177: /* Ignore small left partition. */ 178: lo = left_ptr; 179: } 180: else if ((size_t) (hi - left_ptr) <= max_thresh) 181: /* Ignore small right partition. */ 182: hi = right_ptr; 183: else if ((right_ptr - lo) > (hi - left_ptr)) 184: { 185: /* Push larger left partition indices. */ 186: PUSH (lo, right_ptr); 187: lo = left_ptr; 188: } 189: else 190: { 191: /* Push larger right partition indices. */ 192: PUSH (left_ptr, hi); 193: hi = right_ptr; 194: } 195: } 196: } 197: 198: /* Once the BASE_PTR array is partially sorted by quicksort the rest 199: is completely sorted using insertion sort, since this is efficient 200: for partitions below MAX_THRESH size. BASE_PTR points to the beginning 201: of the array to sort, and END_PTR points at the very last element in 202: the array (*not* one beyond it!). */ 203: 204: #define min(x, y) ((x) < (y) ? (x) : (y)) 205: 206: { 207: char *const end_ptr = &base_ptr[size * (total_elems - 1)]; 208: char *tmp_ptr = base_ptr; 209: char *thresh = min(end_ptr, base_ptr + max_thresh); 210: register char *run_ptr; 211: 212: /* Find smallest element in first threshold and place it at the 213: array's beginning. This is the smallest array element, 214: and the operation speeds up insertion sort's inner loop. */ 215: 216: for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) 217: if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0) 218: tmp_ptr = run_ptr; 219: 220: if (tmp_ptr != base_ptr) 221: SWAP (tmp_ptr, base_ptr, size); 222: 223: /* Insertion sort, running from left-hand-side up to right-hand-side. */ 224: 225: run_ptr = base_ptr + size; 226: while ((run_ptr += size) <= end_ptr) 227: { 228: tmp_ptr = run_ptr - size; 229: while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0) 230: tmp_ptr -= size; 231: 232: tmp_ptr += size; 233: if (tmp_ptr != run_ptr) 234: { 235: char *trav; 236: 237: trav = run_ptr + size; 238: while (--trav >= run_ptr) 239: { 240: char c = *trav; 241: char *hi, *lo; 242: 243: for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) 244: *hi = *lo; 245: *hi = c; 246: } 247: } 248: } 249: } 250: }