diff options
Diffstat (limited to 'libgloss/mips/vr5xxx.S')
-rw-r--r-- | libgloss/mips/vr5xxx.S | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/libgloss/mips/vr5xxx.S b/libgloss/mips/vr5xxx.S new file mode 100644 index 000000000..4d2b38bc8 --- /dev/null +++ b/libgloss/mips/vr5xxx.S @@ -0,0 +1,457 @@ +/* + * vr5xxx.S -- CPU specific support routines + * + * Copyright (c) 1999 Cygnus Solutions + * + * The authors hereby grant permission to use, copy, modify, distribute, + * and license this software and its documentation for any purpose, provided + * that existing copyright notices are retained in all copies and that this + * notice is included verbatim in any distributions. No written agreement, + * license, or royalty fee is required for any of the authorized uses. + * Modifications to this software may be copyrighted by their authors + * and need not follow the licensing terms described here, provided that + * the new terms are clearly indicated on the first page of each file where + * they apply. + */ + +/* This file cloned from vr4300.S by dlindsay@cygnus.com + * and recoded to suit Vr5432 and Vr5000. + * Should be no worse for Vr43{00,05,10}. + * Specifically, __cpu_flush() has been changed (a) to allow for the hardware + * difference (in set associativity) between the Vr5432 and Vr5000, + * and (b) to flush the optional secondary cache of the Vr5000. + */ + +/* Processor Revision Identifier (PRID) Register: Implementation Numbers */ +#define IMPL_VR5432 0x54 + +/* Cache Constants not determinable dynamically */ +#define VR5000_2NDLINE 32 /* secondary cache line size */ +#define VR5432_LINE 32 /* I,Dcache line sizes */ +#define VR5432_SIZE (16*1024) /* I,Dcache half-size */ + + +#ifndef __mips64 + .set mips3 +#endif +#ifdef __mips16 +/* This file contains 32 bit assembly code. */ + .set nomips16 +#endif + +#include "regs.S" + + .text + .align 2 + + # Taken from "R4300 Preliminary RISC Processor Specification + # Revision 2.0 January 1995" page 39: "The Count + # register... increments at a constant rate... at one-half the + # PClock speed." + # We can use this fact to provide small polled delays. + .globl __cpu_timer_poll + .ent __cpu_timer_poll +__cpu_timer_poll: + .set noreorder + # in: a0 = (unsigned int) number of PClock ticks to wait for + # out: void + + # The Vr4300 counter updates at half PClock, so divide by 2 to + # get counter delta: + bnezl a0, 1f # continue if delta non-zero + srl a0, a0, 1 # divide ticks by 2 {DELAY SLOT} + # perform a quick return to the caller: + j ra + nop # {DELAY SLOT} +1: + mfc0 v0, $9 # C0_COUNT: get current counter value + nop + nop + # We cannot just do the simple test, of adding our delta onto + # the current value (ignoring overflow) and then checking for + # equality. The counter is incrementing every two PClocks, + # which means the counter value can change between + # instructions, making it hard to sample at the exact value + # desired. + + # However, we do know that our entry delta value is less than + # half the number space (since we divide by 2 on entry). This + # means we can use a difference in signs to indicate timer + # overflow. + addu a0, v0, a0 # unsigned add (ignore overflow) + # We know have our end value (which will have been + # sign-extended to fill the 64bit register value). +2: + # get current counter value: + mfc0 v0, $9 # C0_COUNT + nop + nop + # This is an unsigned 32bit subtraction: + subu v0, a0, v0 # delta = (end - now) {DELAY SLOT} + bgtzl v0, 2b # looping back is most likely + nop + # We have now been delayed (in the foreground) for AT LEAST + # the required number of counter ticks. + j ra # return to caller + nop # {DELAY SLOT} + .set reorder + .end __cpu_timer_poll + + # Flush the processor caches to memory: + + .globl __cpu_flush + .ent __cpu_flush +__cpu_flush: + .set noreorder + # NOTE: The Vr4300 and Vr5432 *CANNOT* have any secondary cache. + # On those, SC (bit 17 of CONFIG register) is hard-wired to 1, + # except that email from Dennis_Han@el.nec.com says that old + # versions of the Vr5432 incorrectly hard-wired this bit to 0. + # The Vr5000 has an optional direct-mapped secondary cache, + # and the SC bit correctly indicates this. + + # So, for the 4300 and 5432 we want to just + # flush the primary Data and Instruction caches. + # For the 5000 it is desired to flush the secondary cache too. + # There is an operation difference worth noting. + # The 4300 and 5000 primary caches use VA bit 14 to choose cache set, + # whereas 5432 primary caches use VA bit 0. + + # This code interprets the relevant Config register bits as + # much as possible, except for the 5432. + # The code therefore has some portability. + # However, the associativity issues mean you should not just assume + # that this code works anywhere. Also, the secondary cache set + # size is hardwired, since the 5000 series does not define codes + # for variant sizes. + + # Note: this version of the code flushes D$ before I$. + # It is difficult to construct a case where that matters, + # but it cant hurt. + + mfc0 a0, C0_PRID # a0 = Processor Revision register + nop # dlindsay: unclear why the nops, but + nop # vr4300.S had such so I do too. + srl a2, a0, PR_IMP # want bits 8..15 + andi a2, a2, 0x255 # mask: now a2 = Implementation # field + li a1, IMPL_VR5432 + beq a1, a2, 8f # use Vr5432-specific flush algorithm + nop + + # Non-Vr5432 version of the code. + # (The distinctions being: CONFIG is truthful about secondary cache, + # and we act as if the primary Icache and Dcache are direct mapped.) + + mfc0 t0, C0_CONFIG # t0 = CONFIG register + nop + nop + li a1, 1 # a1=1, a useful constant + + srl a2, t0, CR_IC # want IC field of CONFIG + andi a2, a2, 0x7 # mask: now a2= code for Icache size + add a2, a2, 12 # +12 + sllv a2, a1, a2 # a2=primary instruction cache size in bytes + + srl a3, t0, CR_DC # DC field of CONFIG + andi a3, a3, 0x7 # mask: now a3= code for Dcache size + add a3, a3, 12 # +12 + sllv a3, a1, a3 # a3=primary data cache size in bytes + + li t2, (1 << CR_IB) # t2=mask over IB boolean + and t2, t2, t0 # test IB field of CONFIG register value + beqz t2, 1f # + li a1, 16 # 16 bytes (branch shadow: always loaded.) + li a1, 32 # non-zero, then 32bytes +1: + + li t2, (1 << CR_DB) # t2=mask over DB boolean + and t2, t2, t0 # test BD field of CONFIG register value + beqz t2, 2f # + li a0, 16 # 16bytes (branch shadow: always loaded.) + li a0, 32 # non-zero, then 32bytes +2: + lui t1, ((K0BASE >> 16) & 0xFFFF) + ori t1, t1, (K0BASE & 0xFFFF) + + # At this point, + # a0 = primary Dcache line size in bytes + # a1 = primary Icache line size in bytes + # a2 = primary Icache size in bytes + # a3 = primary Dcache size in bytes + # t0 = CONFIG value + # t1 = a round unmapped cached base address (we are in kernel mode) + # t2,t3 scratch + + addi t3, t1, 0 # t3=t1=start address for any cache + add t2, t3, a3 # t2=end adress+1 of Dcache + sub t2, t2, a0 # t2=address of last line in Dcache +3: + cache INDEX_WRITEBACK_INVALIDATE_D,0(t3) + bne t3, t2, 3b # + addu t3, a0 # (delay slot) increment by Dcache line size + + + # Now check CONFIG to see if there is a secondary cache + lui t2, (1 << (CR_SC-16)) # t2=mask over SC boolean + and t2, t2, t0 # test SC in CONFIG + bnez t2, 6f + + # There is a secondary cache. Find out its sizes. + + srl t3, t0, CR_SS # want SS field of CONFIG + andi t3, t3, 0x3 # mask: now t3= code for cache size. + beqz t3, 4f + lui a3, ((512*1024)>>16) # a3= 512K, code was 0 + addu t3, -1 # decrement code + beqz t3, 4f + lui a3, ((1024*1024)>>16) # a3= 1 M, code 1 + addu t3, -1 # decrement code + beqz t3, 4f + lui a3, ((2*1024*1024)>>16) # a3= 2 M, code 2 + j 6f # no secondary cache, code 3 + +4: # a3 = secondary cache size in bytes + li a0, VR5000_2NDLINE # no codes assigned for other than 32 + + # At this point, + # a0 = secondary cache line size in bytes + # a1 = primary Icache line size in bytes + # a2 = primary Icache size in bytes + # a3 = secondary cache size in bytes + # t1 = a round unmapped cached base address (we are in kernel mode) + # t2,t3 scratch + + addi t3, t1, 0 # t3=t1=start address for any cache + add t2, t3, a3 # t2=end address+1 of secondary cache + sub t2, t2, a0 # t2=address of last line in secondary cache +5: + cache INDEX_WRITEBACK_INVALIDATE_SD,0(t3) + bne t3, t2, 5b + addu t3, a0 # (delay slot) increment by line size + + +6: # Any optional secondary cache done. Now do I-cache and return. + + # At this point, + # a1 = primary Icache line size in bytes + # a2 = primary Icache size in bytes + # t1 = a round unmapped cached base address (we are in kernel mode) + # t2,t3 scratch + + add t2, t1, a2 # t2=end adress+1 of Icache + sub t2, t2, a1 # t2=address of last line in Icache +7: + cache INDEX_INVALIDATE_I,0(t1) + bne t1, t2, 7b + addu t1, a1 # (delay slot) increment by Icache line size + + j ra # return to the caller + nop + +8: + +# Vr5432 version of the cpu_flush code. +# (The distinctions being: CONFIG can not be trusted about secondary +# cache (which does not exist). The primary caches use Virtual Address Bit 0 +# to control set selection. + +# Code does not consult CONFIG about cache sizes: knows the hardwired sizes. +# Since both I and D have the same size and line size, uses a merged loop. + + li a0, VR5432_LINE + li a1, VR5432_SIZE + lui t1, ((K0BASE >> 16) & 0xFFFF) + ori t1, t1, (K0BASE & 0xFFFF) + + # a0 = cache line size in bytes + # a1 = 1/2 cache size in bytes + # t1 = a round unmapped cached base address (we are in kernel mode) + + add t2, t1, a1 # t2=end address+1 + sub t2, t2, a0 # t2=address of last line in Icache + +9: + cache INDEX_WRITEBACK_INVALIDATE_D,0(t1) # set 0 + cache INDEX_WRITEBACK_INVALIDATE_D,1(t1) # set 1 + cache INDEX_INVALIDATE_I,0(t1) # set 0 + cache INDEX_INVALIDATE_I,1(t1) # set 1 + bne t1, t2, 9b + addu t1, a0 + + j ra # return to the caller + nop + .set reorder + .end __cpu_flush + + # NOTE: This variable should *NOT* be addressed relative to + # the $gp register since this code is executed before $gp is + # initialised... hence we leave it in the text area. This will + # cause problems if this routine is ever ROMmed: + + .globl __buserr_cnt +__buserr_cnt: + .word 0 + .align 3 +__k1_save: + .word 0 + .word 0 + .align 2 + + .ent __buserr + .globl __buserr +__buserr: + .set noat + .set noreorder + # k0 and k1 available for use: + mfc0 k0,C0_CAUSE + nop + nop + andi k0,k0,0x7c + sub k0,k0,7 << 2 + beq k0,$0,__buserr_do + nop + # call the previous handler + la k0,__previous + jr k0 + nop + # +__buserr_do: + # TODO: check that the cause is indeed a bus error + # - if not then just jump to the previous handler + la k0,__k1_save + sd k1,0(k0) + # + la k1,__buserr_cnt + lw k0,0(k1) # increment counter + addu k0,1 + sw k0,0(k1) + # + la k0,__k1_save + ld k1,0(k0) + # + mfc0 k0,C0_EPC + nop + nop + addu k0,k0,4 # skip offending instruction + mtc0 k0,C0_EPC # update EPC + nop + nop + eret +# j k0 +# rfe + .set reorder + .set at + .end __buserr + +__exception_code: + .set noreorder + lui k0,%hi(__buserr) + daddiu k0,k0,%lo(__buserr) + jr k0 + nop + .set reorder +__exception_code_end: + + .data +__previous: + .space (__exception_code_end - __exception_code) + # This subtracting two addresses is working + # but is not garenteed to continue working. + # The assemble reserves the right to put these + # two labels into different frags, and then + # cant take their difference. + + .text + + .ent __default_buserr_handler + .globl __default_buserr_handler +__default_buserr_handler: + .set noreorder + # attach our simple bus error handler: + # in: void + # out: void + mfc0 a0,C0_SR + nop + li a1,SR_BEV + and a1,a1,a0 + beq a1,$0,baseaddr + lui a0,0x8000 # delay slot + lui a0,0xbfc0 + daddiu a0,a0,0x0200 +baseaddr: + daddiu a0,a0,0x0180 + # a0 = base vector table address + la a1,__exception_code_end + la a2,__exception_code + subu a1,a1,a2 + la a3,__previous + # there must be a better way of doing this???? +copyloop: + lw v0,0(a0) + sw v0,0(a3) + lw v0,0(a2) + sw v0,0(a0) + daddiu a0,a0,4 + daddiu a2,a2,4 + daddiu a3,a3,4 + subu a1,a1,4 + bne a1,$0,copyloop + nop + la a0,__buserr_cnt + sw $0,0(a0) + j ra + nop + .set reorder + .end __default_buserr_handler + + .ent __restore_buserr_handler + .globl __restore_buserr_handler +__restore_buserr_handler: + .set noreorder + # restore original (monitor) bus error handler + # in: void + # out: void + mfc0 a0,C0_SR + nop + li a1,SR_BEV + and a1,a1,a0 + beq a1,$0,res_baseaddr + lui a0,0x8000 # delay slot + lui a0,0xbfc0 + daddiu a0,a0,0x0200 +res_baseaddr: + daddiu a0,a0,0x0180 + # a0 = base vector table address + la a1,__exception_code_end + la a3,__exception_code + subu a1,a1,a3 + la a3,__previous + # there must be a better way of doing this???? +res_copyloop: + lw v0,0(a3) + sw v0,0(a0) + daddiu a0,a0,4 + daddiu a3,a3,4 + subu a1,a1,4 + bne a1,$0,res_copyloop + nop + j ra + nop + .set reorder + .end __restore_buserr_handler + + .ent __buserr_count + .globl __buserr_count +__buserr_count: + .set noreorder + # restore original (monitor) bus error handler + # in: void + # out: unsigned int __buserr_cnt + la v0,__buserr_cnt + lw v0,0(v0) + j ra + nop + .set reorder + .end __buserr_count + +/* EOF vr5xxx.S */ |