diff options
Diffstat (limited to 'common/dlmalloc.c')
-rw-r--r-- | common/dlmalloc.c | 459 |
1 files changed, 458 insertions, 1 deletions
diff --git a/common/dlmalloc.c b/common/dlmalloc.c index 3f7a0970c8..67278ccc91 100644 --- a/common/dlmalloc.c +++ b/common/dlmalloc.c @@ -1,10 +1,467 @@ +#include <config.h> #include <malloc.h> -#include <config.h> #include <stdio.h> #include <module.h> +#ifndef DEFAULT_TRIM_THRESHOLD +#define DEFAULT_TRIM_THRESHOLD (128 * 1024) +#endif + +/* + M_TRIM_THRESHOLD is the maximum amount of unused top-most memory + to keep before releasing via malloc_trim in free(). + + Automatic trimming is mainly useful in long-lived programs. + Because trimming via sbrk can be slow on some systems, and can + sometimes be wasteful (in cases where programs immediately + afterward allocate more large chunks) the value should be high + enough so that your overall system performance would improve by + releasing. + + The trim threshold and the mmap control parameters (see below) + can be traded off with one another. Trimming and mmapping are + two different ways of releasing unused memory back to the + system. Between these two, it is often possible to keep + system-level demands of a long-lived program down to a bare + minimum. For example, in one test suite of sessions measuring + the XF86 X server on Linux, using a trim threshold of 128K and a + mmap threshold of 192K led to near-minimal long term resource + consumption. + + If you are using this malloc in a long-lived program, it should + pay to experiment with these values. As a rough guide, you + might set to a value close to the average size of a process + (program) running on your system. Releasing this much memory + would allow such a process to run in memory. Generally, it's + worth it to tune for trimming rather tham memory mapping when a + program undergoes phases where several large chunks are + allocated and released in ways that can reuse each other's + storage, perhaps mixed with phases where there are no such + chunks at all. And in well-behaved long-lived programs, + controlling release of large blocks via trimming versus mapping + is usually faster. + + However, in most programs, these parameters serve mainly as + protection against the system-level effects of carrying around + massive amounts of unneeded memory. Since frequent calls to + sbrk, mmap, and munmap otherwise degrade performance, the default + parameters are set to relatively high values that serve only as + safeguards. + + The default trim value is high enough to cause trimming only in + fairly extreme (by current memory consumption standards) cases. + It must be greater than page size to have any useful effect. To + disable trimming completely, you can set to (unsigned long)(-1); + + +*/ + + +#ifndef DEFAULT_TOP_PAD +#define DEFAULT_TOP_PAD (0) +#endif + +/* + M_TOP_PAD is the amount of extra `padding' space to allocate or + retain whenever sbrk is called. It is used in two ways internally: + + * When sbrk is called to extend the top of the arena to satisfy + a new malloc request, this much padding is added to the sbrk + request. + + * When malloc_trim is called automatically from free(), + it is used as the `pad' argument. + + In both cases, the actual amount of padding is rounded + so that the end of the arena is always a system page boundary. + + The main reason for using padding is to avoid calling sbrk so + often. Having even a small pad greatly reduces the likelihood + that nearly every malloc request during program start-up (or + after trimming) will invoke sbrk, which needlessly wastes + time. + + Automatic rounding-up to page-size units is normally sufficient + to avoid measurable overhead, so the default is 0. However, in + systems where sbrk is relatively slow, it can pay to increase + this value, at the expense of carrying around more memory than + the program needs. + +*/ + + +#ifndef DEFAULT_MMAP_THRESHOLD +#define DEFAULT_MMAP_THRESHOLD (128 * 1024) +#endif + +/* + + M_MMAP_THRESHOLD is the request size threshold for using mmap() + to service a request. Requests of at least this size that cannot + be allocated using already-existing space will be serviced via mmap. + (If enough normal freed space already exists it is used instead.) + + Using mmap segregates relatively large chunks of memory so that + they can be individually obtained and released from the host + system. A request serviced through mmap is never reused by any + other request (at least not directly; the system may just so + happen to remap successive requests to the same locations). + + Segregating space in this way has the benefit that mmapped space + can ALWAYS be individually released back to the system, which + helps keep the system level memory demands of a long-lived + program low. Mapped memory can never become `locked' between + other chunks, as can happen with normally allocated chunks, which + menas that even trimming via malloc_trim would not release them. + + However, it has the disadvantages that: + + 1. The space cannot be reclaimed, consolidated, and then + used to service later requests, as happens with normal chunks. + 2. It can lead to more wastage because of mmap page alignment + requirements + 3. It causes malloc performance to be more dependent on host + system memory management support routines which may vary in + implementation quality and may impose arbitrary + limitations. Generally, servicing a request via normal + malloc steps is faster than going through a system's mmap. + + All together, these considerations should lead you to use mmap + only for relatively large requests. + + +*/ + + +#ifndef DEFAULT_MMAP_MAX +#define DEFAULT_MMAP_MAX (0) +#endif + +/* + M_MMAP_MAX is the maximum number of requests to simultaneously + service using mmap. This parameter exists because: + + 1. Some systems have a limited number of internal tables for + use by mmap. + 2. In most systems, overreliance on mmap can degrade overall + performance. + 3. If a program allocates many large regions, it is probably + better off using normal sbrk-based allocation routines that + can reclaim and reallocate normal heap memory. Using a + small value allows transition into this mode after the + first few allocations. + + Setting to 0 disables all use of mmap. If HAVE_MMAP is not set, + the default value is 0, and attempts to set it to non-zero values + in mallopt will fail. +*/ + +/* + INTERNAL_SIZE_T is the word-size used for internal bookkeeping + of chunk sizes. On a 64-bit machine, you can reduce malloc + overhead by defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' + at the expense of not being able to handle requests greater than + 2^31. This limitation is hardly ever a concern; you are encouraged + to set this. However, the default version is the same as size_t. +*/ + +#ifndef INTERNAL_SIZE_T +#define INTERNAL_SIZE_T size_t +#endif + +/* + REALLOC_ZERO_BYTES_FREES should be set if a call to + realloc with zero bytes should be the same as a call to free. + Some people think it should. Otherwise, since this malloc + returns a unique pointer for malloc(0), so does realloc(p, 0). +*/ + + +/* #define REALLOC_ZERO_BYTES_FREES */ + +/* + Debugging: + + Because freed chunks may be overwritten with link fields, this + malloc will often die when freed memory is overwritten by user + programs. This can be very effective (albeit in an annoying way) + in helping track down dangling pointers. + + If you compile with -DDEBUG, a number of assertion checks are + enabled that will catch more memory errors. You probably won't be + able to make much sense of the actual assertion errors, but they + should help you locate incorrectly overwritten memory. The + checking is fairly extensive, and will slow down execution + noticeably. Calling malloc_stats or mallinfo with DEBUG set will + attempt to check every non-mmapped allocated and free chunk in the + course of computing the summmaries. (By nature, mmapped regions + cannot be checked very much automatically.) + + Setting DEBUG may also be helpful if you are trying to modify + this code. The assertions in the check routines spell out in more + detail the assumptions and invariants underlying the algorithms. + +*/ + +#ifdef DEBUG +/* #include <assert.h> */ +#define assert(x) ((void)0) +#else +#define assert(x) ((void)0) +#endif + +/* + HAVE_MEMCPY should be defined if you are not otherwise using + ANSI STD C, but still have memcpy and memset in your C library + and want to use them in calloc and realloc. Otherwise simple + macro versions are defined here. + + USE_MEMCPY should be defined as 1 if you actually want to + have memset and memcpy called. People report that the macro + versions are often enough faster than libc versions on many + systems that it is better to use them. + +*/ + +#define HAVE_MEMCPY +#define USE_MEMCPY 1 + +#if (__STD_C || defined(HAVE_MEMCPY)) + +#if __STD_C +void* memset(void*, int, size_t); +void* memcpy(void*, const void*, size_t); +#else +Void_t* memset(); +Void_t* memcpy(); +#endif +#endif + +#if USE_MEMCPY + +/* The following macros are only invoked with (2n+1)-multiples of + INTERNAL_SIZE_T units, with a positive integer n. This is exploited + for fast inline execution when n is small. */ + +#define MALLOC_ZERO(charp, nbytes) \ +do { \ + INTERNAL_SIZE_T mzsz = (nbytes); \ + if(mzsz <= 9*sizeof(mzsz)) { \ + INTERNAL_SIZE_T* mz = (INTERNAL_SIZE_T*) (charp); \ + if(mzsz >= 5*sizeof(mzsz)) { *mz++ = 0; \ + *mz++ = 0; \ + if(mzsz >= 7*sizeof(mzsz)) { *mz++ = 0; \ + *mz++ = 0; \ + if(mzsz >= 9*sizeof(mzsz)) { *mz++ = 0; \ + *mz++ = 0; }}} \ + *mz++ = 0; \ + *mz++ = 0; \ + *mz = 0; \ + } else memset((charp), 0, mzsz); \ +} while(0) + +#define MALLOC_COPY(dest,src,nbytes) \ +do { \ + INTERNAL_SIZE_T mcsz = (nbytes); \ + if(mcsz <= 9*sizeof(mcsz)) { \ + INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) (src); \ + INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) (dest); \ + if(mcsz >= 5*sizeof(mcsz)) { *mcdst++ = *mcsrc++; \ + *mcdst++ = *mcsrc++; \ + if(mcsz >= 7*sizeof(mcsz)) { *mcdst++ = *mcsrc++; \ + *mcdst++ = *mcsrc++; \ + if(mcsz >= 9*sizeof(mcsz)) { *mcdst++ = *mcsrc++; \ + *mcdst++ = *mcsrc++; }}} \ + *mcdst++ = *mcsrc++; \ + *mcdst++ = *mcsrc++; \ + *mcdst = *mcsrc ; \ + } else memcpy(dest, src, mcsz); \ +} while(0) + +#else /* !USE_MEMCPY */ + +/* Use Duff's device for good zeroing/copying performance. */ + +#define MALLOC_ZERO(charp, nbytes) \ +do { \ + INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp); \ + long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T), mcn; \ + if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ + switch (mctmp) { \ + case 0: for(;;) { *mzp++ = 0; \ + case 7: *mzp++ = 0; \ + case 6: *mzp++ = 0; \ + case 5: *mzp++ = 0; \ + case 4: *mzp++ = 0; \ + case 3: *mzp++ = 0; \ + case 2: *mzp++ = 0; \ + case 1: *mzp++ = 0; if(mcn <= 0) break; mcn--; } \ + } \ +} while(0) + +#define MALLOC_COPY(dest,src,nbytes) \ +do { \ + INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src; \ + INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest; \ + long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T), mcn; \ + if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ + switch (mctmp) { \ + case 0: for(;;) { *mcdst++ = *mcsrc++; \ + case 7: *mcdst++ = *mcsrc++; \ + case 6: *mcdst++ = *mcsrc++; \ + case 5: *mcdst++ = *mcsrc++; \ + case 4: *mcdst++ = *mcsrc++; \ + case 3: *mcdst++ = *mcsrc++; \ + case 2: *mcdst++ = *mcsrc++; \ + case 1: *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; } \ + } \ +} while(0) + +#endif + +/* + + Special defines for linux libc + + Except when compiled using these special defines for Linux libc + using weak aliases, this malloc is NOT designed to work in + multithreaded applications. No semaphores or other concurrency + control are provided to ensure that multiple malloc or free calls + don't run at the same time, which could be disasterous. A single + semaphore could be used across malloc, realloc, and free (which is + essentially the effect of the linux weak alias approach). It would + be hard to obtain finer granularity. + +*/ + + +#ifdef INTERNAL_LINUX_C_LIB + +#if __STD_C + +Void_t * __default_morecore_init (ptrdiff_t); +Void_t *(*__morecore)(ptrdiff_t) = __default_morecore_init; + +#else + +Void_t * __default_morecore_init (); +Void_t *(*__morecore)() = __default_morecore_init; + +#endif + +#define MORECORE (*__morecore) +#define MORECORE_FAILURE 0 +#define MORECORE_CLEARS 1 + +#else /* INTERNAL_LINUX_C_LIB */ + +#if __STD_C +extern Void_t* sbrk(ptrdiff_t); +#else +extern Void_t* sbrk(); +#endif + +#ifndef MORECORE +#define MORECORE sbrk +#endif + +#ifndef MORECORE_FAILURE +#define MORECORE_FAILURE -1 +#endif + +#ifndef MORECORE_CLEARS +#define MORECORE_CLEARS 1 +#endif + +#endif /* INTERNAL_LINUX_C_LIB */ + +/* + Define HAVE_MMAP to optionally make malloc() use mmap() to + allocate very large blocks. These will be returned to the + operating system immediately after a free(). +*/ + +#define HAVE_MMAP 0 /* Not available for U-Boot */ + +/* + Define HAVE_MREMAP to make realloc() use mremap() to re-allocate + large blocks. This is currently only possible on Linux with + kernel versions newer than 1.3.77. +*/ + +#undef HAVE_MREMAP /* Not available for U-Boot */ + +/* + + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing the same kind of + information you can get from malloc_stats. It should work on + any SVID/XPG compliant system that has a /usr/include/malloc.h + defining struct mallinfo. (If you'd like to install such a thing + yourself, cut out the preliminary declarations as described above + and below and save them in a malloc.h file. But there's no + compelling reason to bother to do this.) + + The main declaration needed is the mallinfo struct that is returned + (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a + bunch of fields, most of which are not even meaningful in this + version of malloc. Some of these fields are are instead filled by + mallinfo() with other numbers that might possibly be of interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else an SVID2/XPG2 compliant + version is declared below. These must be precisely the same for + mallinfo() to work. + +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + + +/* SVID2/XPG mallinfo structure */ + +struct mallinfo { + int arena; /* total space allocated from system */ + int ordblks; /* number of non-inuse chunks */ + int smblks; /* unused -- always zero */ + int hblks; /* number of mmapped regions */ + int hblkhd; /* total space in mmapped regions */ + int usmblks; /* unused -- always zero */ + int fsmblks; /* unused -- always zero */ + int uordblks; /* total allocated space */ + int fordblks; /* total non-inuse space */ + int keepcost; /* top-most, releasable (via malloc_trim) space */ +}; + +/* SVID2/XPG mallopt options */ + +#define M_MXFAST 1 /* UNUSED in this malloc */ +#define M_NLBLKS 2 /* UNUSED in this malloc */ +#define M_GRAIN 3 /* UNUSED in this malloc */ +#define M_KEEP 4 /* UNUSED in this malloc */ + + +/* mallopt options that actually do something */ + +#define M_TRIM_THRESHOLD -1 +#define M_TOP_PAD -2 +#define M_MMAP_THRESHOLD -3 +#define M_MMAP_MAX -4 + +/* + Access to system page size. To the extent possible, this malloc + manages memory from the system in page-size units. + + The following mechanics for getpagesize were adapted from + bsd/gnu getpagesize.h +*/ + +#define malloc_getpagesize 4096 + + /* Emulation of sbrk for WIN32 All code within the ifdef WIN32 is untested by me. |