diff options
author | Andre Przywara <osp@andrep.de> | 2022-01-20 04:14:54 +0300 |
---|---|---|
committer | Andre Przywara <osp@andrep.de> | 2022-03-06 03:47:36 +0300 |
commit | 0b49f88acf80ca57d3b82a49e21bbd26ac59a469 (patch) | |
tree | 71f89fa7e9bacf1699edb1d39b886ef463660db8 | |
parent | d5f4fd1e12e9b239b2aca0ab56c341476378aef8 (diff) |
fel: thunks: Fix fel-to-spl-thunk to be ARMv5TE compatible
Currently the thunk we upload into the SRAM is using DSB and ISB
instructions, which were introduced in ARMv7. Also it relies on
movw/movt pairs, which became available in ARMv6T2.
The Allwinner F1Cx00 SoCs are using an ARMv5TE compliant core, so they
do not know these instructions.
Change the code to be ARMv5TE compliant, so it can run on all relevant
Allwinner ARM cores:
- One movw is just used to compare two bits, replace that with a
tst/tsteq sequence to skip the load.
- The other movw/movt pairs get replaced with ldr's, that load from
literal storage at the end of the code (from Icenowy).
- The DSB and ISB get replaced with their CP15 MCR counterparts. Those
are deprecated in ARMv7, but still work, when the CP15BEN bit is set
in SCTLR. We check for this in fel.c (from Icenowy). ISB is not
implemented on the ARM926, so make this conditional. A simple branch
takes care of the desired pipeline flush for the old SoC.
Also remove the rather pointless Ruby prolog that generates the header
file. We have a less awkward version of this in the Makefile, and need
that for the other thunks there anyway, so it's just duplicated code.
Embedding a header generator in Ruby in an assembly file is a cute
gimmick, but serves no purpose anymore.
This is based on work by Icenowy, who put a similar solution in a
separate file.
Originally-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Andre Przywara <osp@andrep.de>
-rw-r--r-- | thunks/Makefile | 2 | ||||
-rw-r--r-- | thunks/fel-to-spl-thunk.S | 95 | ||||
-rw-r--r-- | thunks/fel-to-spl-thunk.h | 91 |
3 files changed, 85 insertions, 103 deletions
diff --git a/thunks/Makefile b/thunks/Makefile index 9ba6635..891ca1f 100644 --- a/thunks/Makefile +++ b/thunks/Makefile @@ -27,7 +27,7 @@ AWK_O_TO_H := LC_ALL=C awk -f objdump_to_h.awk # The SPL thunk requires a different output format. The "style" variable for # awk controls this, and causes the htole32() conversion to be omitted. fel-to-spl-thunk.h: fel-to-spl-thunk.S FORCE - $(AS) -o $(subst .S,.o,$<) $< + $(AS) -o $(subst .S,.o,$<) -march=armv5te $< $(OBJDUMP) -d $(subst .S,.o,$<) | $(AWK_O_TO_H) -v style=old > $@ $(THUNKS): %.h: %.S FORCE diff --git a/thunks/fel-to-spl-thunk.S b/thunks/fel-to-spl-thunk.S index 987a02a..577dd73 100644 --- a/thunks/fel-to-spl-thunk.S +++ b/thunks/fel-to-spl-thunk.S @@ -21,45 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -/* Usage instructions: "ruby -x fel-to-spl-thunk.S > fel-to-spl-thunk.h" */ -/*************************************************************************/ - -/* Open a comment for gas. - - Do not close the comment until after the Ruby code terminator (__END__). - Write the '*' '/' sequence of characters as "\x2a/" in string literals to - avoid doing so. - -#!/usr/bin/env ruby - -def tool_exists(tool_name) - `which #{tool_name} > /dev/null 2>&1` - return $?.to_i == 0 -end - -toolchains = [ - "arm-none-eabi-", - "arm-linux-gnueabihf-", - "arm-none-linux-gnueabi-", - "armv7a-hardfloat-linux-gnueabi-", -] - -toolchain = toolchains.find { |toolchain| tool_exists("#{toolchain}as") } -abort "Can't find any ARM crosscompiler\n" unless toolchain - -system("#{toolchain}as -o #{$PROGRAM_NAME}.o #{$PROGRAM_NAME}") -exit($?.to_i) if $?.to_i != 0 - -`#{toolchain}objdump -d #{$PROGRAM_NAME}.o`.each_line {|l| - next unless l =~ /(\h+)\:\s+(\h+)\s+(\S+)\s+([^;]*)/ - printf("\t0x%s, /* %8s: %-10s %-28s \x2a/\n", $2, $1, $3, $4.strip) -} - -__END__ -*/ - -/*************************************************************************/ +.arm BUF1 .req r0 BUF2 .req r1 @@ -75,14 +37,7 @@ entry_point: b setup_stack stack_begin: - nop - nop - nop - nop - nop - nop - nop - nop + .space 32, 0xff stack_end: nop @@ -118,15 +73,14 @@ setup_stack: /* Save the original SP, LR and CPSR to stack */ /* Check if the instructions or data cache is enabled */ mrc p15, 0, TMP1, c1, c0, 0 - movw TMP2, #((1 << 12) | (1 << 2)) - tst TMP1, TMP2 + tst TMP1, #(1 << 2) + tsteq TMP1, #(1 << 12) bne cache_is_unsupported bl swap_all_buffers verify_checksum: - movw CHECKSUM, #0x6c39 - movt CHECKSUM, #0x5f0a + ldr CHECKSUM, checksum_seed mov BUF1, SPL_ADDR ldr FULLSIZE, [BUF1, #16] check_next_word: @@ -140,13 +94,27 @@ check_next_word: bne checksum_is_bad /* Change 'eGON.BT0' -> 'eGON.FEL' */ - movw TMP1, (('F' << 8) + '.') - movt TMP1, (('L' << 8) + 'E') + ldr TMP1, egon_fel_str str TMP1, [SPL_ADDR, #8] - /* Call the SPL code */ - dsb - isb + /* + * Call the SPL code, but before that make sure the CPU sees the + * recently uploaded code. This requires a DSB and ISB. + * The "dsb" and "isb" *instructions* are not available in ARMv5TE, + * but at least for DSB we can use the CP15 register encoding. This + * works for ARMv7 and v8 as well, because we have checked our SCTLR + * before (in fel.c), so we know that CP15BEN is set. + * The ARM926 core does not implement ISB, instead the TRM recommends + * just a branch to achieve the same "flush the pipeline" effect. + * As just this is not sufficient for later cores, check the MIDR + * register, and do the DSB only for ARMv6 or later. + * The input register for the CP15 instruction is ignored. + */ + mcr p15, 0, TMP1, c7, c10, 4 /* CP15DSB */ + mrc p15, 0, TMP1, c0, c0, 0 /* read MIDR */ + and TMP1, TMP1, #(0xf << 16) /* architecture */ + cmp TMP1, #(0x6 << 16) /* ARMv5TEJ */ + mcrgt p15, 0, TMP1, c7, c5, 4 /* CP15ISB, if > ARMv5TEJ */ blx SPL_ADDR /* Return back to FEL */ @@ -154,15 +122,13 @@ check_next_word: cache_is_unsupported: /* Bail out if cache is enabled and change 'eGON.BT0' -> 'eGON.???' */ - movw TMP1, (('?' << 8) + '.') - movt TMP1, (('?' << 8) + '?') + ldr TMP1, cache_enabled_str str TMP1, [SPL_ADDR, #8] b return_to_fel_noswap checksum_is_bad: /* The checksum test failed, so change 'eGON.BT0' -> 'eGON.BAD' */ - movw TMP1, (('B' << 8) + '.') - movt TMP1, (('D' << 8) + 'A') + ldr TMP1, checksum_failed_str str TMP1, [SPL_ADDR, #8] return_to_fel: @@ -173,6 +139,15 @@ return_to_fel_noswap: ldr sp, [sp] bx lr +checksum_seed: + .word 0x5f0a6c39 +egon_fel_str: + .ascii ".FEL" +cache_enabled_str: + .ascii ".???" +checksum_failed_str: + .ascii ".BAD" + appended_data: /* * The appended data uses the following format: diff --git a/thunks/fel-to-spl-thunk.h b/thunks/fel-to-spl-thunk.h index 286ba53..47fe276 100644 --- a/thunks/fel-to-spl-thunk.h +++ b/thunks/fel-to-spl-thunk.h @@ -1,18 +1,18 @@ /* <entry_point>: */ 0xea000015, /* 0: b 5c <setup_stack> */ /* <stack_begin>: */ - 0xe1a00000, /* 4: nop */ - 0xe1a00000, /* 8: nop */ - 0xe1a00000, /* c: nop */ - 0xe1a00000, /* 10: nop */ - 0xe1a00000, /* 14: nop */ - 0xe1a00000, /* 18: nop */ - 0xe1a00000, /* 1c: nop */ - 0xe1a00000, /* 20: nop */ + 0xffffffff, /* 4: .word 0xffffffff */ + 0xffffffff, /* 8: .word 0xffffffff */ + 0xffffffff, /* c: .word 0xffffffff */ + 0xffffffff, /* 10: .word 0xffffffff */ + 0xffffffff, /* 14: .word 0xffffffff */ + 0xffffffff, /* 18: .word 0xffffffff */ + 0xffffffff, /* 1c: .word 0xffffffff */ + 0xffffffff, /* 20: .word 0xffffffff */ /* <stack_end>: */ 0xe1a00000, /* 24: nop */ /* <swap_all_buffers>: */ - 0xe28f40dc, /* 28: add r4, pc, #220 */ + 0xe28f40e8, /* 28: add r4, pc, #232 */ /* <swap_next_buffer>: */ 0xe4940004, /* 2c: ldr r0, [r4], #4 */ 0xe4941004, /* 30: ldr r1, [r4], #4 */ @@ -28,7 +28,7 @@ 0x1afffff9, /* 54: bne 40 <swap_next_word> */ 0xeafffff3, /* 58: b 2c <swap_next_buffer> */ /* <setup_stack>: */ - 0xe59f80a4, /* 5c: ldr r8, [pc, #164] */ + 0xe59f80b0, /* 5c: ldr r8, [pc, #176] */ 0xe24f0044, /* 60: sub r0, pc, #68 */ 0xe520d004, /* 64: str sp, [r0, #-4]! */ 0xe1a0d000, /* 68: mov sp, r0 */ @@ -37,43 +37,50 @@ 0xe38220c0, /* 74: orr r2, r2, #192 */ 0xe121f002, /* 78: msr CPSR_c, r2 */ 0xee112f10, /* 7c: mrc 15, 0, r2, cr1, cr0, {0} */ - 0xe3013004, /* 80: movw r3, #4100 */ - 0xe1120003, /* 84: tst r2, r3 */ - 0x1a000012, /* 88: bne d8 <cache_is_unsupported> */ + 0xe3120004, /* 80: tst r2, #4 */ + 0x03120a01, /* 84: tsteq r2, #4096 */ + 0x1a000013, /* 88: bne dc <cache_is_unsupported> */ 0xebffffe5, /* 8c: bl 28 <swap_all_buffers> */ /* <verify_checksum>: */ - 0xe3067c39, /* 90: movw r7, #27705 */ - 0xe3457f0a, /* 94: movt r7, #24330 */ - 0xe1a00008, /* 98: mov r0, r8 */ - 0xe5905010, /* 9c: ldr r5, [r0, #16] */ + 0xe59f706c, /* 90: ldr r7, [pc, #108] */ + 0xe1a00008, /* 94: mov r0, r8 */ + 0xe5905010, /* 98: ldr r5, [r0, #16] */ /* <check_next_word>: */ - 0xe4902004, /* a0: ldr r2, [r0], #4 */ - 0xe2555004, /* a4: subs r5, r5, #4 */ - 0xe0877002, /* a8: add r7, r7, r2 */ - 0x1afffffb, /* ac: bne a0 <check_next_word> */ - 0xe598200c, /* b0: ldr r2, [r8, #12] */ - 0xe0577082, /* b4: subs r7, r7, r2, lsl #1 */ - 0x1a00000a, /* b8: bne e8 <checksum_is_bad> */ - 0xe304262e, /* bc: movw r2, #17966 */ - 0xe3442c45, /* c0: movt r2, #19525 */ - 0xe5882008, /* c4: str r2, [r8, #8] */ - 0xf57ff04f, /* c8: dsb sy */ - 0xf57ff06f, /* cc: isb sy */ - 0xe12fff38, /* d0: blx r8 */ - 0xea000006, /* d4: b f4 <return_to_fel> */ + 0xe4902004, /* 9c: ldr r2, [r0], #4 */ + 0xe2555004, /* a0: subs r5, r5, #4 */ + 0xe0877002, /* a4: add r7, r7, r2 */ + 0x1afffffb, /* a8: bne 9c <check_next_word> */ + 0xe598200c, /* ac: ldr r2, [r8, #12] */ + 0xe0577082, /* b0: subs r7, r7, r2, lsl #1 */ + 0x1a00000b, /* b4: bne e8 <checksum_is_bad> */ + 0xe59f2048, /* b8: ldr r2, [pc, #72] */ + 0xe5882008, /* bc: str r2, [r8, #8] */ + 0xee072f9a, /* c0: mcr 15, 0, r2, cr7, cr10, {4} */ + 0xee102f10, /* c4: mrc 15, 0, r2, cr0, cr0, {0} */ + 0xe202280f, /* c8: and r2, r2, #983040 */ + 0xe3520806, /* cc: cmp r2, #393216 */ + 0xce072f95, /* d0: mcrgt 15, 0, r2, cr7, cr5, {4} */ + 0xe12fff38, /* d4: blx r8 */ + 0xea000004, /* d8: b f0 <return_to_fel> */ /* <cache_is_unsupported>: */ - 0xe3032f2e, /* d8: movw r2, #16174 */ - 0xe3432f3f, /* dc: movt r2, #16191 */ + 0xe59f2028, /* dc: ldr r2, [pc, #40] */ 0xe5882008, /* e0: str r2, [r8, #8] */ - 0xea000003, /* e4: b f8 <return_to_fel_noswap> */ + 0xea000002, /* e4: b f4 <return_to_fel_noswap> */ /* <checksum_is_bad>: */ - 0xe304222e, /* e8: movw r2, #16942 */ - 0xe3442441, /* ec: movt r2, #17473 */ - 0xe5882008, /* f0: str r2, [r8, #8] */ + 0xe59f2020, /* e8: ldr r2, [pc, #32] */ + 0xe5882008, /* ec: str r2, [r8, #8] */ /* <return_to_fel>: */ - 0xebffffcb, /* f4: bl 28 <swap_all_buffers> */ + 0xebffffcc, /* f0: bl 28 <swap_all_buffers> */ /* <return_to_fel_noswap>: */ - 0xe8bd4004, /* f8: pop {r2, lr} */ - 0xe121f002, /* fc: msr CPSR_c, r2 */ - 0xe59dd000, /* 100: ldr sp, [sp] */ - 0xe12fff1e, /* 104: bx lr */ + 0xe8bd4004, /* f4: pop {r2, lr} */ + 0xe121f002, /* f8: msr CPSR_c, r2 */ + 0xe59dd000, /* fc: ldr sp, [sp] */ + 0xe12fff1e, /* 100: bx lr */ + /* <checksum_seed>: */ + 0x5f0a6c39, /* 104: .word 0x5f0a6c39 */ + /* <egon_fel_str>: */ + 0x4c45462e, /* 108: .word 0x4c45462e */ + /* <cache_enabled_str>: */ + 0x3f3f3f2e, /* 10c: .word 0x3f3f3f2e */ + /* <checksum_failed_str>: */ + 0x4441422e, /* 110: .word 0x4441422e */ |