
1: /* Malloc implementation for multiple threads without lock contention. 2: Copyright (C) 1996-2006, 2007 Free Software Foundation, Inc. 3: This file is part of the GNU C Library. 4: Contributed by Wolfram Gloger <wg@malloc.de> 5: and Doug Lea <dl@cs.oswego.edu>, 2001. 6: 7: The GNU C Library is free software; you can redistribute it and/or 8: modify it under the terms of the GNU Lesser General Public License as 9: published by the Free Software Foundation; either version 2.1 of the 10: License, or (at your option) any later version. 11: 12: The GNU C Library is distributed in the hope that it will be useful, 13: but WITHOUT ANY WARRANTY; without even the implied warranty of 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15: Lesser General Public License for more details. 16: 17: You should have received a copy of the GNU Lesser General Public 18: License along with the GNU C Library; see the file COPYING.LIB. If not, 19: write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 20: Boston, MA 02111-1307, USA. */ 21: 22: /* 23: This is a version (aka ptmalloc2) of malloc/free/realloc written by 24: Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger. 25: 26: * Version ptmalloc2-20011215 27: based on: 28: VERSION 2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) 29: 30: * Quickstart 31: 32: In order to compile this implementation, a Makefile is provided with 33: the ptmalloc2 distribution, which has pre-defined targets for some 34: popular systems (e.g. "make posix" for Posix threads). All that is 35: typically required with regard to compiler flags is the selection of 36: the thread package via defining one out of USE_PTHREADS, USE_THR or 37: USE_SPROC. Check the thread-m.h file for what effects this has. 38: Many/most systems will additionally require USE_TSD_DATA_HACK to be 39: defined, so this is the default for "make posix". 40: 41: * Why use this malloc? 42: 43: This is not the fastest, most space-conserving, most portable, or 44: most tunable malloc ever written. However it is among the fastest 45: while also being among the most space-conserving, portable and tunable. 46: Consistent balance across these factors results in a good general-purpose 47: allocator for malloc-intensive programs. 48: 49: The main properties of the algorithms are: 50: * For large (>= 512 bytes) requests, it is a pure best-fit allocator, 51: with ties normally decided via FIFO (i.e. least recently used). 52: * For small (<= 64 bytes by default) requests, it is a caching 53: allocator, that maintains pools of quickly recycled chunks. 54: * In between, and for combinations of large and small requests, it does 55: the best it can trying to meet both goals at once. 56: * For very large requests (>= 128KB by default), it relies on system 57: memory mapping facilities, if supported. 58: 59: For a longer but slightly out of date high-level description, see 60: http://gee.cs.oswego.edu/dl/html/malloc.html 61: 62: You may already by default be using a C library containing a malloc 63: that is based on some version of this malloc (for example in 64: linux). You might still want to use the one in this file in order to 65: customize settings or to avoid overheads associated with library 66: versions. 67: 68: * Contents, described in more detail in "description of public routines" below. 69: 70: Standard (ANSI/SVID/...) functions: 71: malloc(size_t n); 72: calloc(size_t n_elements, size_t element_size); 73: free(Void_t* p); 74: realloc(Void_t* p, size_t n); 75: memalign(size_t alignment, size_t n); 76: valloc(size_t n); 77: mallinfo() 78: mallopt(int parameter_number, int parameter_value) 79: 80: Additional functions: 81: independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); 82: independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); 83: pvalloc(size_t n); 84: cfree(Void_t* p); 85: malloc_trim(size_t pad); 86: malloc_usable_size(Void_t* p); 87: malloc_stats(); 88: 89: * Vital statistics: 90: 91: Supported pointer representation: 4 or 8 bytes 92: Supported size_t representation: 4 or 8 bytes 93: Note that size_t is allowed to be 4 bytes even if pointers are 8. 94: You can adjust this by defining INTERNAL_SIZE_T 95: 96: Alignment: 2 * sizeof(size_t) (default) 97: (i.e., 8 byte alignment with 4byte size_t). This suffices for 98: nearly all current machines and C compilers. However, you can 99: define MALLOC_ALIGNMENT to be wider than this if necessary. 100: 101: Minimum overhead per allocated chunk: 4 or 8 bytes 102: Each malloced chunk has a hidden word of overhead holding size 103: and status information. 104: 105: Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead) 106: 8-byte ptrs: 24/32 bytes (including, 4/8 overhead) 107: 108: When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte 109: ptrs but 4 byte size) or 24 (for 8/8) additional bytes are 110: needed; 4 (8) for a trailing size field and 8 (16) bytes for 111: free list pointers. Thus, the minimum allocatable size is 112: 16/24/32 bytes. 113: 114: Even a request for zero bytes (i.e., malloc(0)) returns a 115: pointer to something of the minimum allocatable size. 116: 117: The maximum overhead wastage (i.e., number of extra bytes 118: allocated than were requested in malloc) is less than or equal 119: to the minimum size, except for requests >= mmap_threshold that 120: are serviced via mmap(), where the worst case wastage is 2 * 121: sizeof(size_t) bytes plus the remainder from a system page (the 122: minimal mmap unit); typically 4096 or 8192 bytes. 123: 124: Maximum allocated size: 4-byte size_t: 2^32 minus about two pages 125: 8-byte size_t: 2^64 minus about two pages 126: 127: It is assumed that (possibly signed) size_t values suffice to 128: represent chunk sizes. `Possibly signed' is due to the fact 129: that `size_t' may be defined on a system as either a signed or 130: an unsigned type. The ISO C standard says that it must be 131: unsigned, but a few systems are known not to adhere to this. 132: Additionally, even when size_t is unsigned, sbrk (which is by 133: default used to obtain memory from system) accepts signed 134: arguments, and may not be able to handle size_t-wide arguments 135: with negative sign bit. Generally, values that would 136: appear as negative after accounting for overhead and alignment 137: are supported only via mmap(), which does not have this 138: limitation. 139: 140: Requests for sizes outside the allowed range will perform an optional 141: failure action and then return null. (Requests may also 142: also fail because a system is out of memory.) 143: 144: Thread-safety: thread-safe unless NO_THREADS is defined 145: 146: Compliance: I believe it is compliant with the 1997 Single Unix Specification 147: (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably 148: others as well. 149: 150: * Synopsis of compile-time options: 151: 152: People have reported using previous versions of this malloc on all 153: versions of Unix, sometimes by tweaking some of the defines 154: below. It has been tested most extensively on Solaris and 155: Linux. It is also reported to work on WIN32 platforms. 156: People also report using it in stand-alone embedded systems. 157: 158: The implementation is in straight, hand-tuned ANSI C. It is not 159: at all modular. (Sorry!) It uses a lot of macros. To be at all 160: usable, this code should be compiled using an optimizing compiler 161: (for example gcc -O3) that can simplify expressions and control 162: paths. (FAQ: some macros import variables as arguments rather than 163: declare locals because people reported that some debuggers 164: otherwise get confused.) 165: 166: OPTION DEFAULT VALUE 167: 168: Compilation Environment options: 169: 170: __STD_C derived from C compiler defines 171: WIN32 NOT defined 172: HAVE_MEMCPY defined 173: USE_MEMCPY 1 if HAVE_MEMCPY is defined 174: HAVE_MMAP defined as 1 175: MMAP_CLEARS 1 176: HAVE_MREMAP 0 unless linux defined 177: USE_ARENAS the same as HAVE_MMAP 178: malloc_getpagesize derived from system #includes, or 4096 if not 179: HAVE_USR_INCLUDE_MALLOC_H NOT defined 180: LACKS_UNISTD_H NOT defined unless WIN32 181: LACKS_SYS_PARAM_H NOT defined unless WIN32 182: LACKS_SYS_MMAN_H NOT defined unless WIN32 183: 184: Changing default word sizes: 185: 186: INTERNAL_SIZE_T size_t 187: MALLOC_ALIGNMENT MAX (2 * sizeof(INTERNAL_SIZE_T), 188: __alignof__ (long double)) 189: 190: Configuration and functionality options: 191: 192: USE_DL_PREFIX NOT defined 193: USE_PUBLIC_MALLOC_WRAPPERS NOT defined 194: USE_MALLOC_LOCK NOT defined 195: MALLOC_DEBUG NOT defined 196: REALLOC_ZERO_BYTES_FREES 1 197: MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op 198: TRIM_FASTBINS 0 199: 200: Options for customizing MORECORE: 201: 202: MORECORE sbrk 203: MORECORE_FAILURE -1 204: MORECORE_CONTIGUOUS 1 205: MORECORE_CANNOT_TRIM NOT defined 206: MORECORE_CLEARS 1 207: MMAP_AS_MORECORE_SIZE (1024 * 1024) 208: 209: Tuning options that are also dynamically changeable via mallopt: 210: 211: DEFAULT_MXFAST 64 212: DEFAULT_TRIM_THRESHOLD 128 * 1024 213: DEFAULT_TOP_PAD 0 214: DEFAULT_MMAP_THRESHOLD 128 * 1024 215: DEFAULT_MMAP_MAX 65536 216: 217: There are several other #defined constants and macros that you 218: probably don't want to touch unless you are extending or adapting malloc. */ 219: 220: /* 221: __STD_C should be nonzero if using ANSI-standard C compiler, a C++ 222: compiler, or a C compiler sufficiently close to ANSI to get away 223: with it. 224: */ 225: 226: #ifndef __STD_C 227: #if defined(__STDC__) || defined(__cplusplus) 228: #define __STD_C 1 229: #else 230: #define __STD_C 0 231: #endif 232: #endif /*__STD_C*/ 233: 234: 235: /* 236: Void_t* is the pointer type that malloc should say it returns 237: */ 238: 239: #ifndef Void_t 240: #if (__STD_C || defined(WIN32)) 241: #define Void_t void 242: #else 243: #define Void_t char 244: #endif 245: #endif /*Void_t*/ 246: 247: #if __STD_C 248: #include <stddef.h> /* for size_t */ 249: #include <stdlib.h> /* for getenv(), abort() */ 250: #else 251: #include <sys/types.h> 252: #endif 253: 254: #include <malloc-machine.h> 255: 256: #ifdef _LIBC 257: #include <stdio-common/_itoa.h> 258: #include <bits/wordsize.h> 259: #endif 260: 261: #ifdef __cplusplus 262: extern "C" { 263: #endif 264: 265: /* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */ 266: 267: /* #define LACKS_UNISTD_H */ 268: 269: #ifndef LACKS_UNISTD_H 270: #include <unistd.h> 271: #endif 272: 273: /* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */ 274: 275: /* #define LACKS_SYS_PARAM_H */ 276: 277: 278: #include <stdio.h> /* needed for malloc_stats */ 279: #include <errno.h> /* needed for optional MALLOC_FAILURE_ACTION */ 280: 281: /* For uintptr_t. */ 282: #include <stdint.h> 283: 284: /* For va_arg, va_start, va_end. */ 285: #include <stdarg.h> 286: 287: /* For writev and struct iovec. */ 288: #include <sys/uio.h> 289: /* For syslog. */ 290: #include <sys/syslog.h> 291: 292: /* For various dynamic linking things. */ 293: #include <dlfcn.h> 294: 295: 296: /* 297: Debugging: 298: 299: Because freed chunks may be overwritten with bookkeeping fields, this 300: malloc will often die when freed memory is overwritten by user 301: programs. This can be very effective (albeit in an annoying way) 302: in helping track down dangling pointers. 303: 304: If you compile with -DMALLOC_DEBUG, a number of assertion checks are 305: enabled that will catch more memory errors. You probably won't be 306: able to make much sense of the actual assertion errors, but they 307: should help you locate incorrectly overwritten memory. The checking 308: is fairly extensive, and will slow down execution 309: noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set 310: will attempt to check every non-mmapped allocated and free chunk in 311: the course of computing the summmaries. (By nature, mmapped regions 312: cannot be checked very much automatically.) 313: 314: Setting MALLOC_DEBUG may also be helpful if you are trying to modify 315: this code. The assertions in the check routines spell out in more 316: detail the assumptions and invariants underlying the algorithms. 317: 318: Setting MALLOC_DEBUG does NOT provide an automated mechanism for 319: checking that all accesses to malloced memory stay within their 320: bounds. However, there are several add-ons and adaptations of this 321: or other mallocs available that do this. 322: */ 323: 324: #if MALLOC_DEBUG 325: #include <assert.h> 326: #else 327: #undef assert 328: #define assert(x) ((void)0) 329: #endif 330: 331: 332: /* 333: INTERNAL_SIZE_T is the word-size used for internal bookkeeping 334: of chunk sizes. 335: 336: The default version is the same as size_t. 337: 338: While not strictly necessary, it is best to define this as an 339: unsigned type, even if size_t is a signed type. This may avoid some 340: artificial size limitations on some systems. 341: 342: On a 64-bit machine, you may be able to reduce malloc overhead by 343: defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the 344: expense of not being able to handle more than 2^32 of malloced 345: space. If this limitation is acceptable, you are encouraged to set 346: this unless you are on a platform requiring 16byte alignments. In 347: this case the alignment requirements turn out to negate any 348: potential advantages of decreasing size_t word size. 349: 350: Implementors: Beware of the possible combinations of: 351: - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, 352: and might be the same width as int or as long 353: - size_t might have different width and signedness as INTERNAL_SIZE_T 354: - int and long might be 32 or 64 bits, and might be the same width 355: To deal with this, most comparisons and difference computations 356: among INTERNAL_SIZE_Ts should cast them to unsigned long, being 357: aware of the fact that casting an unsigned int to a wider long does 358: not sign-extend. (This also makes checking for negative numbers 359: awkward.) Some of these casts result in harmless compiler warnings 360: on some systems. 361: */ 362: 363: #ifndef INTERNAL_SIZE_T 364: #define INTERNAL_SIZE_T size_t 365: #endif 366: 367: /* The corresponding word size */ 368: #define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) 369: 370: 371: /* 372: MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. 373: It must be a power of two at least 2 * SIZE_SZ, even on machines 374: for which smaller alignments would suffice. It may be defined as 375: larger than this though. Note however that code and data structures 376: are optimized for the case of 8-byte alignment. 377: */ 378: 379: 380: #ifndef MALLOC_ALIGNMENT 381: /* XXX This is the correct definition. It differs from 2*SIZE_SZ only on 382: powerpc32. For the time being, changing this is causing more 383: compatibility problems due to malloc_get_state/malloc_set_state than 384: will returning blocks not adequately aligned for long double objects 385: under -mlong-double-128. 386: 387: #define MALLOC_ALIGNMENT (2 * SIZE_SZ < __alignof__ (long double) \ 388: ? __alignof__ (long double) : 2 * SIZE_SZ) 389: */ 390: #define MALLOC_ALIGNMENT (2 * SIZE_SZ) 391: #endif 392: 393: /* The corresponding bit mask value */ 394: #define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) 395: 396: 397: 398: /* 399: REALLOC_ZERO_BYTES_FREES should be set if a call to 400: realloc with zero bytes should be the same as a call to free. 401: This is required by the C standard. Otherwise, since this malloc 402: returns a unique pointer for malloc(0), so does realloc(p, 0). 403: */ 404: 405: #ifndef REALLOC_ZERO_BYTES_FREES 406: #define REALLOC_ZERO_BYTES_FREES 1 407: #endif 408: 409: /* 410: TRIM_FASTBINS controls whether free() of a very small chunk can 411: immediately lead to trimming. Setting to true (1) can reduce memory 412: footprint, but will almost always slow down programs that use a lot 413: of small chunks. 414: 415: Define this only if you are willing to give up some speed to more 416: aggressively reduce system-level memory footprint when releasing 417: memory in programs that use many small chunks. You can get 418: essentially the same effect by setting MXFAST to 0, but this can 419: lead to even greater slowdowns in programs using many small chunks. 420: TRIM_FASTBINS is an in-between compile-time option, that disables 421: only those chunks bordering topmost memory from being placed in 422: fastbins. 423: */ 424: 425: #ifndef TRIM_FASTBINS 426: #define TRIM_FASTBINS 0 427: #endif 428: 429: 430: /* 431: USE_DL_PREFIX will prefix all public routines with the string 'dl'. 432: This is necessary when you only want to use this malloc in one part 433: of a program, using your regular system malloc elsewhere. 434: */ 435: 436: /* #define USE_DL_PREFIX */ 437: 438: 439: /* 440: Two-phase name translation. 441: All of the actual routines are given mangled names. 442: When wrappers are used, they become the public callable versions. 443: When DL_PREFIX is used, the callable names are prefixed. 444: */ 445: 446: #ifdef USE_DL_PREFIX 447: #define public_cALLOc dlcalloc 448: #define public_fREe dlfree 449: #define public_cFREe dlcfree 450: #define public_mALLOc dlmalloc 451: #define public_mEMALIGn dlmemalign 452: #define public_rEALLOc dlrealloc 453: #define public_vALLOc dlvalloc 454: #define public_pVALLOc dlpvalloc 455: #define public_mALLINFo dlmallinfo 456: #define public_mALLOPt dlmallopt 457: #define public_mTRIm dlmalloc_trim 458: #define public_mSTATs dlmalloc_stats 459: #define public_mUSABLe dlmalloc_usable_size 460: #define public_iCALLOc dlindependent_calloc 461: #define public_iCOMALLOc dlindependent_comalloc 462: #define public_gET_STATe dlget_state 463: #define public_sET_STATe dlset_state 464: #else /* USE_DL_PREFIX */ 465: #ifdef _LIBC 466: 467: /* Special defines for the GNU C library. */ 468: #define public_cALLOc __libc_calloc 469: #define public_fREe __libc_free 470: #define public_cFREe __libc_cfree 471: #define public_mALLOc __libc_malloc 472: #define public_mEMALIGn __libc_memalign 473: #define public_rEALLOc __libc_realloc 474: #define public_vALLOc __libc_valloc 475: #define public_pVALLOc __libc_pvalloc 476: #define public_mALLINFo __libc_mallinfo 477: #define public_mALLOPt __libc_mallopt 478: #define public_mTRIm __malloc_trim 479: #define public_mSTATs __malloc_stats 480: #define public_mUSABLe __malloc_usable_size 481: #define public_iCALLOc __libc_independent_calloc 482: #define public_iCOMALLOc __libc_independent_comalloc 483: #define public_gET_STATe __malloc_get_state 484: #define public_sET_STATe __malloc_set_state 485: #define malloc_getpagesize __getpagesize() 486: #define open __open 487: #define mmap __mmap 488: #define munmap __munmap 489: #define mremap __mremap 490: #define mprotect __mprotect 491: #define MORECORE (*__morecore) 492: #define MORECORE_FAILURE 0 493: 494: Void_t * __default_morecore (ptrdiff_t); 495: Void_t *(*__morecore)(ptrdiff_t) = __default_morecore; 496: 497: #else /* !_LIBC */ 498: #define public_cALLOc calloc 499: #define public_fREe free 500: #define public_cFREe cfree 501: #define public_mALLOc malloc 502: #define public_mEMALIGn memalign 503: #define public_rEALLOc realloc 504: #define public_vALLOc valloc 505: #define public_pVALLOc pvalloc 506: #define public_mALLINFo mallinfo 507: #define public_mALLOPt mallopt 508: #define public_mTRIm malloc_trim 509: #define public_mSTATs malloc_stats 510: #define public_mUSABLe malloc_usable_size 511: #define public_iCALLOc independent_calloc 512: #define public_iCOMALLOc independent_comalloc 513: #define public_gET_STATe malloc_get_state 514: #define public_sET_STATe malloc_set_state 515: #endif /* _LIBC */ 516: #endif /* USE_DL_PREFIX */ 517: 518: #ifndef _LIBC 519: #define __builtin_expect(expr, val) (expr) 520: 521: #define fwrite(buf, size, count, fp) _IO_fwrite (buf, size, count, fp) 522: #endif 523: 524: /* 525: HAVE_MEMCPY should be defined if you are not otherwise using 526: ANSI STD C, but still have memcpy and memset in your C library 527: and want to use them in calloc and realloc. Otherwise simple 528: macro versions are defined below. 529: 530: USE_MEMCPY should be defined as 1 if you actually want to 531: have memset and memcpy called. People report that the macro 532: versions are faster than libc versions on some systems. 533: 534: Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks 535: (of <= 36 bytes) are manually unrolled in realloc and calloc. 536: */ 537: 538: #define HAVE_MEMCPY 539: 540: #ifndef USE_MEMCPY 541: #ifdef HAVE_MEMCPY 542: #define USE_MEMCPY 1 543: #else 544: #define USE_MEMCPY 0 545: #endif 546: #endif 547: 548: 549: #if (__STD_C || defined(HAVE_MEMCPY)) 550: 551: #ifdef _LIBC 552: # include <string.h> 553: #else 554: #ifdef WIN32 555: /* On Win32 memset and memcpy are already declared in windows.h */ 556: #else 557: #if __STD_C 558: void* memset(void*, int, size_t); 559: void* memcpy(void*, const void*, size_t); 560: #else 561: Void_t* memset(); 562: Void_t* memcpy(); 563: #endif 564: #endif 565: #endif 566: #endif 567: 568: /* 569: MALLOC_FAILURE_ACTION is the action to take before "return 0" when 570: malloc fails to be able to return memory, either because memory is 571: exhausted or because of illegal arguments. 572: 573: By default, sets errno if running on STD_C platform, else does nothing. 574: */ 575: 576: #ifndef MALLOC_FAILURE_ACTION 577: #if __STD_C 578: #define MALLOC_FAILURE_ACTION \ 579: errno = ENOMEM; 580: 581: #else 582: #define MALLOC_FAILURE_ACTION 583: #endif 584: #endif 585: 586: /* 587: