/*********************************************************************** Purpose: Flush the L1 data cache. This algorithm flushes the data cache by allocating every block in the cache, using the dcbz instruction (one dcbz per block). The allocated blocks can correspond to any unused area of memory; in this case the area indicated by the stack pointer. (Obviously, this area of memory must be "nice," i.e. not cache-inhibited or write-through.) Because every block in the cache is allocated, the cache's LRU (least-recently-used) algorithm causes any modified blocks to be flushed out to memory. The finish flash-invalidates the data cache and disables it. Using dcbz is preferable to using a load instruction because no data actually needs to be loaded in from memory. Note that, because the algorithm ends with a flash-invalidate of the cache, none of the allocated memory region is necessarily modified in memory (unless coherency mechanisms in are in place This algorithm does not flush the instruction cache because the instruction cache can not contain modified data. If the instruction cache is to be disabled along with the data cache, always remember to flash invalidate it (and the data cache, if applicable) whenever it is re-enabled to ensure stale information is not accidentally retained. ***************************************************************************/ /* in case your compiler doesn't understand r numbers.... */ #define r00 0 #define r0 0 #define r1 1 #define r2 2 #define r12 12 #define r13 13 #define r14 14 #define r15 15 #define r16 16 #define hid0 1008 /* Here, the USER needs to # define the appropriate variable for the processor they're using. Options: MPC603, MPC603e, MPC603ev, MPC604, MPC604e, MPC740, MPC750. We'll define MPC603e as a default, change if necessary. */ #define MPC603e 1 /* NUM_SETS is the number of sets in the L1 cache NUM_WAYS is the associativity of the cache, which is also the number of blocks per set BLOCK_SIZE is the size of a cache block */ #define BLOCK_SIZE 32 #if defined(MPC603) #define NUM_SETS 128 #define NUM_WAYS 2 #elif defined(MPC603e) || defined(MPC603ev) #define NUM_SETS 128 #define NUM_WAYS 4 #elif defined(MPC604) #define NUM_SETS 128 #define NUM_WAYS 4 #elif defined(MPC604e) #define NUM_SETS 256 #define NUM_WAYS 4 #elif defined(MPC740) || defined(MPC750) #define NUM_SETS 128 #define NUM_WAYS 8 #else #error Processor type not defined #endif /* ***************************** IMPORTANT **************************** Cache flushing should be done with exceptions disabled so that the LRU is not disturbed by execution of an exception handler. Please make sure you have exceptions disabled before executing this code. Also, the memory being used for dcbz should not be cache-inhibited or write-through. The cache **must be on** before running this code or an alignment exception will be generated. Also, r1 is assumed to point to the stack. Make sure you have sufficient stack space.... ************************************************************************/ data_cache_flush_and_disable: /* The CTR register is loaded with NUM_SETS*NUM_WAYS and you increment by BLOCK_SIZE. */ /* block_size=total number of bytes in one set of one line */ /* num_of_ways=number of lines in cache set */ /* num_of_sets=number of sets in cache */ /* For this algorithm to work, it relies on being able to zero out an area of memory equal to the size of the cache. Additionally, since dcbz zeros out an area of memory that is mod32, the pointer must be adjusted to make certain valid data is not inadvertently destroyed */ /* Create an area on the stack where all regs touched can be saved */ subi r1,r1,0x0010 stw r13,0x0000(r1) stw r14,0x0004(r1) stw r15,0x0008(r1) stw r16,0x000C(r1) li r13,NUM_SETS li r14,NUM_WAYS mullw r15,r13,r14 /* number of times through dcbz loop */ mulli r16,r15,BLOCK_SIZE addi r16,r16,BLOCK_SIZE subf r1,r16,r1 /* stack area allocated for dcbz region */ mr r13,r1 /* make a copy of the stack pointer location */ mtctr r15 /* initialize ctr for use in bdnz */ /* if the dcbz causes an allocation to a block which has been previously marked modified AND the address doesn't match the tag address, that LRU modified data will be cast out, effectively having been flushed */ #ifdef MPC750 /* if we're on a 750, we need to set the DCFA bit to assist the flush */ mfspr r15, hid0 ori r15,r15,0x0040 mtspr hid0, r15 #endif loop: dcbz r0,r13 addi r13,r13,BLOCK_SIZE bdnz loop #ifdef MPC750 /* clear DFCA bit */ mfspr r15,hid0 addis r13,0,0xffff /* do this in 2 steps because some compilers gripe if you try to do it in one step using addi and relying on sign extension */ ori r13,r13,0xffbf and r15,r15,r13 mtspr hid0,r15 #endif /* now that the entire cache has been flushed, invalidate and disable it */ mfspr r15,hid0 ori r15,r15,0x0400 /* set flash invalidate bit */ mtspr hid0, r15 addis r14,0,0xffff ori r14,r14,0xbbff /* disable cache, clear invalidate bit */ and r15,r15,r14 sync /* sync required prior to modifying hid0 DCE */ mtspr hid0,r15 addi r1,r16,r1 /* remove the scratch space from the stack */ lwz r13,0x0000(r1) lwz r14,0x0004(r1) lwz r15,0x0008(r1) lwz r16,0x000C(r1) addi r1,r1,0x0010 /* pop saved registers off of the stack */ /* exit this code.... depending on how you use this piece of code, a blr or b or something else might be more appropriate */ rfi
Bulow