Improving Scalability of Multithreaded Dynamic Memory Allocation by Greg Nakhimovsky Listing One % more test1.c #include #include #include main() { char *str; int i; char buf[128]; for (i=0; i<1000000; i++) str = (char *)malloc(8); sprintf(buf, "/usr/proc/bin/pmap -x %ld " "| /bin/egrep 'heap|Kbytes'", getpid()); system(buf); } % cc test1.c % a.out Address Kbytes Resident Shared Private Permissions Mapped File 00022000 15742 15752 - 15752 read/write/exec [ heap ] Listing Two /* Originally written by David Palmer, PTC. Modified by Greg Nakhimovsky, Sun Microsystems. This program helps measure scalability of an MT-hot malloc. The single-threaded (ST) version calls function pound() ncpus times in sequence. The multi-threaded (MT) versions call pound() from the main thread, plus create (ncpus-1) additional bound threads which run the same routine pound() in parallel. In the MT versions, function pound() allocates all of its dynamic memory separately in each thread. Therefore, the memory consumption of this program is approximately ncpus times greater than that of the single-threaded version. Build the ST version: cc -xtarget=ultra -xO4 -DST -o single_threaded mt_hot_malloc_test.c Build the multithreaded version using Solaris malloc(3C): cc -xtarget=ultra -xO4 -DMT -mt -o mt_sys mt_hot_malloc_test.c \ -lpthread Build the MT version using mt_hot malloc: cc -xtarget=ultra -xO4 -DMT -mt -o mt_hot mt_hot_malloc_test.c \ mt_hot_malloc.o -lpthread Run: single_threaded mt_sys mt_hot ncpus is the number of available CPUs (determined dynamically). Function pound() executes a loop each iteration of which allocates many blocks of memory, fills them with data, then frees some of that memory and reallocates it for blocks with different sizes. All this is repeated "iterations" times. */ #include #include #include #include #include #include #define MAX_BLOCKS 80 #define SIZE_BLOCK 80 #define BIG_BLOCK 100 static int iterations; static void *pound(); main(int argc, char *argv[]) { pthread_attr_t attr; pthread_t pthr_ids[64]; int i; int num_main; int ncpus; hrtime_t start, stop; if (argc > 1) iterations = atoi(argv[1]); else iterations = 5000; ncpus = sysconf(_SC_NPROCESSORS_ONLN); printf("Detected %d CPUs\n", ncpus); start = gethrtime(); #ifdef MT if(pthread_attr_init(&attr)) { printf("Error: pthread_attr_init() failed\n"); exit(1); } /* make it a bound thread */ if(pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM)) { printf("Error: pthread_attr_setscope() failed\n"); exit(1); } for( i = 0; i 0; i -= 2) /* Allocate odd numbered blocks */ { blocks[i] = (char *)malloc(SIZE_BLOCK - i); for( j = 0; j < SIZE_BLOCK - i; j++ ) blocks[i][j] = lastc - i; } for( i = MAX_BLOCKS-1; i > 0; i -= 2 ) /* Reallocate odd numbered blocks */ { blocks[i] = (char *)realloc(blocks[i], SIZE_BLOCK + i); for( j = 0; j < SIZE_BLOCK + i; j++ ) /* Fill with data */ blocks[i][j] = '!' + i; } for( i = 1; i <= BIG_BLOCK; i++ ) /* Reallocate one big block */ { blocks[MAX_BLOCKS-1] = (char *)realloc(blocks[MAX_BLOCKS-1],SIZE_BLOCK * i); for( j = 0; j < SIZE_BLOCK * i; j++ ) blocks[MAX_BLOCKS-1][j] = lastc; } for( i = 0; i < MAX_BLOCKS; i++ ) /* Free all blocks */ { free(blocks[i]); blocks[i] = NULL; } for( i = 0; i < MAX_BLOCKS; i++ ) /* Free NULL poin */ free(blocks[i]); } }