flush_disable_cache.txt

(6 KB) Pobierz
/***********************************************************************

    Purpose:
       Flush the L1 data cache.

This algorithm flushes the data cache by allocating every block in the cache,
using the dcbz instruction (one dcbz per block).  The allocated blocks can
correspond to any unused area of memory; in this case the area indicated
by the stack pointer.  (Obviously, this area of memory must be "nice," i.e. not
cache-inhibited or write-through.)  Because every block in the cache is allocated,
the cache's LRU (least-recently-used) algorithm causes any modified blocks to be 
flushed out to memory.  The finish flash-invalidates the data cache and disables
 it.

Using dcbz is preferable to using a load instruction because no data actually
needs to be loaded in from memory.  Note that, because the algorithm ends
with a flash-invalidate of the cache, none of the allocated memory region
is necessarily modified in memory (unless coherency mechanisms in are in place

This algorithm does not flush the instruction cache because the instruction 
cache can not contain modified data.  If the instruction cache is to be disabled
along with the data cache, always remember to flash invalidate it (and the data
cache, if applicable) whenever it is re-enabled to ensure stale information is
not accidentally retained. 
        
***************************************************************************/

/* in case your compiler doesn't understand r numbers.... */    
#define r00     0
#define r0      0       
#define r1      1
#define r2      2
#define r12     12
#define r13     13
#define r14     14
#define r15     15
#define r16     16
#define hid0    1008    

/* Here, the USER needs to # define the appropriate variable for the
   processor they're using.  Options:    MPC603, MPC603e, MPC603ev,
   MPC604, MPC604e, MPC740, MPC750.  We'll define MPC603e as a default,
   change if necessary. 
*/
#define MPC603e 1


/*      
NUM_SETS is the number of sets in the L1 cache
NUM_WAYS is the associativity of the cache, which is also the number of
         blocks per set
BLOCK_SIZE is the size of a cache block
*/      

#define BLOCK_SIZE  32

#if    defined(MPC603)

#define NUM_SETS  128
#define NUM_WAYS  2

#elif  defined(MPC603e) || defined(MPC603ev)

#define NUM_SETS  128
#define NUM_WAYS  4

#elif  defined(MPC604)

#define NUM_SETS  128
#define NUM_WAYS  4

#elif  defined(MPC604e)

#define NUM_SETS  256
#define NUM_WAYS  4

#elif  defined(MPC740) || defined(MPC750)

#define NUM_SETS  128
#define NUM_WAYS  8

#else

#error Processor type not defined

#endif

        /* ***************************** IMPORTANT  ****************************
           Cache flushing should be done with exceptions disabled so that the LRU is
           not disturbed by execution of an exception handler.  Please make sure you 
           have exceptions disabled before executing this code.

           Also, the memory being used for dcbz should not be cache-inhibited or 
           write-through.

           The cache **must be on** before running this code or an alignment 
           exception will be generated.  Also, r1 is assumed to point to the stack.
           Make sure you have sufficient stack space....
        
         ************************************************************************/
                
data_cache_flush_and_disable:   
        /* The CTR register is loaded with NUM_SETS*NUM_WAYS and you increment by
        BLOCK_SIZE. */

        /* block_size=total number of bytes in one set of one line */
        /* num_of_ways=number of lines in cache set */
        /* num_of_sets=number of sets in cache */

        /* For this algorithm to work, it relies on being able to zero out 
        an area of memory equal to the size of the cache.  Additionally, since
        dcbz zeros out an area of memory that is mod32, the pointer must be 
        adjusted to make certain valid data is not inadvertently destroyed */

        /* Create an area on the stack where all regs touched can be saved */   
        subi  r1,r1,0x0010   
        stw   r13,0x0000(r1)   
        stw   r14,0x0004(r1)   
        stw   r15,0x0008(r1)   
        stw   r16,0x000C(r1)   

        li    r13,NUM_SETS      
        li    r14,NUM_WAYS      
        mullw r15,r13,r14               /* number of times through dcbz loop */
        mulli r16,r15,BLOCK_SIZE
        addi  r16,r16,BLOCK_SIZE
        subf  r1,r16,r1                 /* stack area allocated for dcbz region */
        mr    r13,r1                    /* make a copy of the stack pointer location */

        mtctr r15                       /* initialize ctr for use in bdnz */


        /* if the dcbz causes an allocation to a block which has been previously marked
        modified AND the address doesn't match the tag address, that LRU modified data
        will be cast out, effectively having been flushed */

#ifdef MPC750   
        /* if we're on a 750, we need to set the DCFA bit to assist the flush */
        mfspr   r15, hid0
        ori     r15,r15,0x0040
        mtspr   hid0, r15
#endif          

loop:                   
        dcbz  r0,r13           
        addi  r13,r13,BLOCK_SIZE
        bdnz  loop              

#ifdef MPC750
        /* clear DFCA bit */
        mfspr   r15,hid0                
        addis   r13,0,0xffff    /* do this in 2 steps because some compilers gripe if you
                                   try to do it in one step using addi and relying on sign extension */
        ori     r13,r13,0xffbf
        and     r15,r15,r13
        mtspr   hid0,r15
#endif
        
        /* now that the entire cache has been flushed,
           invalidate and disable it */
        mfspr   r15,hid0
        ori     r15,r15,0x0400  /* set flash invalidate bit */
        mtspr   hid0, r15

        addis   r14,0,0xffff
        ori     r14,r14,0xbbff  /* disable cache, clear invalidate bit */
        and     r15,r15,r14     
        sync                    /* sync required prior to modifying hid0 DCE */
        mtspr   hid0,r15
                        
        addi  r1,r16,r1       /* remove the scratch space from the stack */
        lwz   r13,0x0000(r1)    
        lwz   r14,0x0004(r1)    
        lwz   r15,0x0008(r1)    
        lwz   r16,0x000C(r1)    
        addi  r1,r1,0x0010    /* pop saved registers off of the stack */

        /* exit this code.... depending on how you use this piece of code, a blr or b
           or something else might be more appropriate */
        rfi                     

Zgłoś jeśli naruszono regulamin