/****************************************************************** * CS61C CACHE LAB * * Using this program, on as many different kinds of computers as * * possible, investigate these cache parameters: * * -- total cache size * * -- cache width * * -- cache replacement policy * * * * Make sure you compile this code without optimizations on * * otherwise, the overhead calculations will not be correct * * * * ChangeLog: * * 2004.03.20: * * Heavily rewritten by Jeremy Huddleston * * Changed CACHE* names to ARRAY* names * * Using (1 << X) notation for sizes of 2^X * * #ifndef around CLK_TCK since it's set by POSIX headers * * broke up main() into sub functions * * do everything except output using clock ticks * ******************************************************************/ #include #include #include #include /* You can undefine this if you want to check the r or w speeds, but it needs some * tweaking to fix the averaging... */ #define RW_ONLY /* Note that (1 << X) is 2^X ... the MIN/MAX macros below MUST be powers of 2. */ #define ARRAY_MIN (1 << 10) /* smallest cache (in words) */ #define ARRAY_MAX (1 << 20) /* largest cache */ #define STRIDE_MIN (1 << 0) /* smallest stride (in words) */ #define STRIDE_MAX (1 << 10) /* largest stride */ #define SAMPLE 10000000 /* Approximately how many accesses to the array do we want * to make in our analysis. Larger generates better * statistics. Adjust for speed on your system. This * number is by no means exact. We always do at-least this */ #ifndef CLK_TCK /* POSIX systems already provide this */ #define CLK_TCK 100 /* number clock ticks per micro-second (ie. the MHz of the core)*/ #endif /* This macro creates a function which iterates through the passed array of size asize striding * by stride. We return the number of clock ticks it takes to perform the operation. */ #define ARRAY_ACCESS_FUN(func_name, vars, operation) \ static unsigned func_name(unsigned array[], unsigned asize, unsigned stride, unsigned steps) { \ vars; \ unsigned register i, j; \ unsigned utime; \ struct tms rusage; \ \ /* Initialize the time count */ \ times (&rusage); \ utime = rusage.tms_utime; \ \ /* Iterate through our array */ \ for(j=0; j < steps; j++) { \ for(i=0; i < asize; i += stride) { \ operation; \ } \ } \ \ /* Calculate the final time */ \ times (&rusage);\ return (rusage.tms_utime - utime); \ } /* Make some functions using that macro */ ARRAY_ACCESS_FUN(rArray, unsigned register temp, temp = array[i]) ARRAY_ACCESS_FUN(rArrayOverhead, unsigned register temp, temp = i) ARRAY_ACCESS_FUN(wArray, , array[i] = i) ARRAY_ACCESS_FUN(wArrayOverhead, , ) ARRAY_ACCESS_FUN(rwArray, , array[i] = array[i]) ARRAY_ACCESS_FUN(rwArrayOverhead, , ) /* array going to stride through */ unsigned array[ARRAY_MAX]; /* And... begin... */ int main () { int asize, stride, step, steps, accesses; unsigned rtime, wtime, rwtime; double rsec, wsec, rwsec; printf("CLK_TCK: %d\n", CLK_TCK); for (asize = ARRAY_MIN; asize <= ARRAY_MAX; asize = asize << 1) { /* Print an extra \n here to separate the groups with the same array size */ printf("\n"); for (stride = STRIDE_MIN; stride <= STRIDE_MAX && stride <= asize; stride = stride << 1) { /* How many total accesses do we do on each iteration */ accesses = asize / stride; steps = 1 + ( SAMPLE / accesses ); #ifndef RW_ONLY /**************** FIRST READ ****************/ rtime = 0; /* Do it once to initialize the cache before we start timing */ rArray(array, asize, stride, 1); /* We want to average the overhead too... */ rtime += rArray(array, asize, stride, steps) - rArrayOverhead(array, asize, stride, steps); /* Calculate our time */ rsec = (double) rtime * 1e9 / ((double)CLK_TCK * steps * accesses); /**************** NOW WRITE ****************/ wtime = 0; /* Do it once to initialize the cache before we start timing */ wArray(array, asize, stride, 1); /* We want to average the overhead too... */ wtime += wArray(array, asize, stride, steps) - wArrayOverhead(array, asize, stride, steps); /* Calculate our time */ wsec = (double) rwtime * 1e9 / ((double)CLK_TCK * steps * accesses); #endif /**************** NOW READ/WRITE ****************/ rwtime = 0; /* Do it once to initialize the cache before we start timing */ rwArray(array, asize, stride, 1); /* We want to average the overhead too... */ rwtime += rwArray(array, asize, stride, steps) - rwArrayOverhead(array, asize, stride, steps); /* Calculate our time */ rwsec = (double) rwtime * 1e9 / ((double)CLK_TCK * steps * accesses); /**************** PRINT RESULTS ****************/ #ifdef RW_ONLY printf("Size (bytes): %7d Stride (bytes): %4d read+write: %4.0f ns\n", asize * sizeof (int), stride * sizeof (int), rwsec); #else printf("Size (bytes): %7d Stride (bytes): %4d read: %4.0f ns write: %4.0f ns read+write: %4.0f ns\n", asize * sizeof (int), stride * sizeof (int), rsec, wsec, rwsec); #endif } } }