Commit 7252ae1b authored by NiteHawk's avatar NiteHawk Committed by GitHub
Browse files

Merge pull request #94 from n1tehawk/20170126_thunks

Move thunk code to a dedicated subdirectory, implement fel_clrsetbits_le32()
parents e125a9da 0858b710
......@@ -52,9 +52,12 @@ MISC_TOOLS = phoenix_info sunxi-nand-image-builder
# Note: To use this target, set/adjust CROSS_COMPILE and MKSUNXIBOOT if needed
BINFILES = jtag-loop.sunxi fel-sdboot.sunxi uart0-helloworld-sdboot.sunxi
CROSS_COMPILE ?= arm-none-eabi-
CROSS_CC ?= $(CROSS_COMPILE)gcc
MKSUNXIBOOT ?= mksunxiboot
PATH_DIRS := $(shell echo $$PATH | sed -e 's/:/ /g')
# Try to guess a suitable default ARM cross toolchain
CROSS_DEFAULT := arm-none-eabi-
CROSS_COMPILE ?= $(or $(shell find $(PATH_DIRS) -executable -name 'arm*-gcc' -printf '%f\t' | cut -f 1 | sed -e 's/-gcc/-/'),$(CROSS_DEFAULT))
CROSS_CC ?= $(CROSS_COMPILE)gcc
DESTDIR ?=
PREFIX ?= /usr/local
......@@ -128,7 +131,7 @@ PROGRESS := progress.c progress.h
SOC_INFO := soc_info.c soc_info.h
FEL_LIB := fel_lib.c fel_lib.h
sunxi-fel: fel.c fel-to-spl-thunk.h $(PROGRESS) $(SOC_INFO) $(FEL_LIB)
sunxi-fel: fel.c thunks/fel-to-spl-thunk.h $(PROGRESS) $(SOC_INFO) $(FEL_LIB)
$(CC) $(HOST_CFLAGS) $(LIBUSB_CFLAGS) $(LDFLAGS) -o $@ $(filter %.c,$^) $(LIBS) $(LIBUSB_LIBS)
sunxi-nand-part: nand-part-main.c nand-part.c nand-part-a10.h nand-part-a20.h
......@@ -173,6 +176,11 @@ boot_head_sun5i.elf: boot_head.S boot_head.lds
sunxi-bootinfo: bootinfo.c
# "preprocessed" .h files for inclusion of ARM thunk code
headers:
make -C thunks/ CROSS_COMPILE=$(CROSS_COMPILE)
# target tools
TARGET_CFLAGS = $(DEFAULT_CFLAGS) -static $(CFLAGS)
sunxi-meminfo: meminfo.c
......
......@@ -112,8 +112,9 @@ and our default target (when simply using `make`).
* `make target-tools`
builds tools that are intended for the target (Allwinner SoC), using a
cross-compiler. The toolchain prefix *CROSS_COMPILE* defaults to `arm-none-eabi-`,
adjust it if needed.
cross-compiler. The Makefile will try to auto-detect a suitable toolchain
prefix, and falls back to `arm-none-eabi-` otherwise.
If needed, you may override this by explicitly setting *CROSS_COMPILE*.
<br>_Hint:_ When compiling 'natively' on the target platform you may
simply use an empty toolchain prefix here (`make target-tools CROSS_COMPILE=`
or `make all CROSS_COMPILE=`).
......
......@@ -216,7 +216,7 @@ void aw_fel_fill(feldev_handle *dev, uint32_t offset, size_t size, unsigned char
}
static uint32_t fel_to_spl_thunk[] = {
#include "fel-to-spl-thunk.h"
#include "thunks/fel-to-spl-thunk.h"
};
#define DRAM_BASE 0x40000000
......
......@@ -484,6 +484,31 @@ void fel_memmove(feldev_handle *dev,
fel_memcpy_up(dev, dst_addr, src_addr, size);
}
/*
* Bitwise manipulation of a 32-bit word at given address, via bit masks that
* specify which bits to clear and which to set.
*/
void fel_clrsetbits_le32(feldev_handle *dev,
uint32_t addr, uint32_t clrbits, uint32_t setbits)
{
uint32_t arm_code[] = {
htole32(0xe59f0018), /* 0: ldr r0, [addr] */
htole32(0xe5901000), /* 4: ldr r1, [r0] */
htole32(0xe59f2014), /* 8: ldr r2, [clrbits] */
htole32(0xe1c11002), /* c: bic r1, r1, r2 */
htole32(0xe59f2010), /* 10: ldr r2, [setbits] */
htole32(0xe1811002), /* 14: orr r1, r1, r2 */
htole32(0xe5801000), /* 18: str r1, [r0] */
htole32(0xe12fff1e), /* 1c: bx lr */
htole32(addr), /* address */
htole32(clrbits), /* bits to clear */
htole32(setbits), /* bits to set */
};
aw_fel_write(dev, arm_code, dev->soc_info->scratch_addr, sizeof(arm_code));
aw_fel_execute(dev, dev->soc_info->scratch_addr);
}
/*
* Memory access to the SID (root) keys proved to be unreliable for certain
* SoCs. This function uses an alternative, register-based approach to retrieve
......
......@@ -70,6 +70,13 @@ void fel_writel_n(feldev_handle *dev, uint32_t addr, uint32_t *src, size_t count
void fel_memmove(feldev_handle *dev,
uint32_t dst_addr, uint32_t src_addr, size_t size);
void fel_clrsetbits_le32(feldev_handle *dev,
uint32_t addr, uint32_t clrbits, uint32_t setbits);
#define fel_clrbits_le32(dev, addr, value) \
fel_clrsetbits_le32(dev, addr, value, 0)
#define fel_setbits_le32(dev, addr, value) \
fel_clrsetbits_le32(dev, addr, 0, value)
/* retrieve SID root key */
bool fel_get_sid_root_key(feldev_handle *dev, uint32_t *result,
bool force_workaround);
......
#
# build "preprocessed" .h files for inclusion of ARM scratch code
#
SPL_THUNK := fel-to-spl-thunk.h
THUNKS := clrsetbits.h
THUNKS += memcpy.h
THUNKS += readl_writel.h
THUNKS += rmr-thunk.h
THUNKS += sid_read_root.h
all: $(SPL_THUNK) $(THUNKS)
# clean up object files afterwards
rm -f *.o
# This empty prerequisite enforces a rebuild of all the headers on every run
FORCE:
# If not specified explicitly: try to guess a suitable ARM toolchain prefix
PATH_DIRS := $(shell echo $$PATH | sed -e 's/:/ /g')
CROSS_COMPILE ?= $(shell find $(PATH_DIRS) -executable -name 'arm*-gcc' -printf '%f\t' | cut -f 1 | sed -e 's/-gcc/-/')
AS := $(CROSS_COMPILE)as
OBJDUMP := $(CROSS_COMPILE)objdump
AWK_O_TO_H := awk -f objdump_to_h.awk
# The SPL thunk requires a different output format. The "style" variable for
# awk controls this, and causes the htole32() conversion to be omitted.
fel-to-spl-thunk.h: fel-to-spl-thunk.S FORCE
$(AS) -o $(subst .S,.o,$<) $<
$(OBJDUMP) -d $(subst .S,.o,$<) | $(AWK_O_TO_H) -v style=old > $@
$(THUNKS): %.h: %.S FORCE
$(AS) -o $(subst .S,.o,$<) $<
$(OBJDUMP) -d $(subst .S,.o,$<) | $(AWK_O_TO_H) > $@
# thunks/README.md
This directory contains assembly sources for ARM [thunk] code, and
a corresponding _Makefile_. The idea is that the resulting binary routines
can be transferred to a suitable target device and then executed 'remotely',
usually via `sunxi-fel`.
Normally you don't need to change or (re)build anything within this folder.
Currently our main build process (via the parent directory's _Makefile_)
only includes `fel-to-spl-thunk.h` directly. Other _.h_ files are provided
**just for reference**. The main purpose of this folder is simply keeping
track of _.S_ sources, to help with possible future maintenance of the
various code snippets.
Please note that any files lacking explicit license information are intended
to be covered by the project's [overall license](../LICENSE.md) (GPLv2).
[thunk]: https://en.wikipedia.org/wiki/Thunk#Interoperability
/*
* Thunk code to assist with bitwise operations (set/clear) via FEL
*/
fel_clrsetbits_le32:
ldr r0, 1f /* address */
ldr r1, [r0] /* load value */
ldr r2, 2f /* clrbits mask */
bic r1, r2 /* clear bits, post-increment r1 */
ldr r2, 3f /* setbits mask */
orr r1, r2 /* set bits (logical "or") */
str r1, [r0] /* store result */
bx lr
1: .word 0 /* addr */
2: .word 0 /* clrbits (= bits to clear) */
3: .word 0 /* setbits (= bits to set) */
/* <fel_clrsetbits_le32>: */
htole32(0xe59f0018), /* 0: ldr r0, [pc, #24] */
htole32(0xe5901000), /* 4: ldr r1, [r0] */
htole32(0xe59f2014), /* 8: ldr r2, [pc, #20] */
htole32(0xe1c11002), /* c: bic r1, r1, r2 */
htole32(0xe59f2010), /* 10: ldr r2, [pc, #16] */
htole32(0xe1811002), /* 14: orr r1, r1, r2 */
htole32(0xe5801000), /* 18: str r1, [r0] */
htole32(0xe12fff1e), /* 1c: bx lr */
/* <entry_point>: */
0xea000015, /* 0: b 5c <setup_stack> */
/* <stack_begin>: */
0xe1a00000, /* 4: nop */
0xe1a00000, /* 8: nop */
0xe1a00000, /* c: nop */
......@@ -7,13 +9,17 @@
0xe1a00000, /* 18: nop */
0xe1a00000, /* 1c: nop */
0xe1a00000, /* 20: nop */
/* <stack_end>: */
0xe1a00000, /* 24: nop */
/* <swap_all_buffers>: */
0xe28f40dc, /* 28: add r4, pc, #220 */
/* <swap_next_buffer>: */
0xe4940004, /* 2c: ldr r0, [r4], #4 */
0xe4941004, /* 30: ldr r1, [r4], #4 */
0xe4946004, /* 34: ldr r6, [r4], #4 */
0xe3560000, /* 38: cmp r6, #0 */
0x012fff1e, /* 3c: bxeq lr */
/* <swap_next_word>: */
0xe5902000, /* 40: ldr r2, [r0] */
0xe5913000, /* 44: ldr r3, [r1] */
0xe2566004, /* 48: subs r6, r6, #4 */
......@@ -21,6 +27,7 @@
0xe4803004, /* 50: str r3, [r0], #4 */
0x1afffff9, /* 54: bne 40 <swap_next_word> */
0xeafffff3, /* 58: b 2c <swap_next_buffer> */
/* <setup_stack>: */
0xe59f80a4, /* 5c: ldr r8, [pc, #164] */
0xe24f0044, /* 60: sub r0, pc, #68 */
0xe520d004, /* 64: str sp, [r0, #-4]! */
......@@ -34,10 +41,12 @@
0xe1120003, /* 84: tst r2, r3 */
0x1a000012, /* 88: bne d8 <cache_is_unsupported> */
0xebffffe5, /* 8c: bl 28 <swap_all_buffers> */
/* <verify_checksum>: */
0xe3067c39, /* 90: movw r7, #27705 */
0xe3457f0a, /* 94: movt r7, #24330 */
0xe1a00008, /* 98: mov r0, r8 */
0xe5905010, /* 9c: ldr r5, [r0, #16] */
/* <check_next_word>: */
0xe4902004, /* a0: ldr r2, [r0], #4 */
0xe2555004, /* a4: subs r5, r5, #4 */
0xe0877002, /* a8: add r7, r7, r2 */
......@@ -52,14 +61,18 @@
0xf57ff06f, /* cc: isb sy */
0xe12fff38, /* d0: blx r8 */
0xea000006, /* d4: b f4 <return_to_fel> */
/* <cache_is_unsupported>: */
0xe3032f2e, /* d8: movw r2, #16174 */
0xe3432f3f, /* dc: movt r2, #16191 */
0xe5882008, /* e0: str r2, [r8, #8] */
0xea000003, /* e4: b f8 <return_to_fel_noswap> */
/* <checksum_is_bad>: */
0xe304222e, /* e8: movw r2, #16942 */
0xe3442441, /* ec: movt r2, #17473 */
0xe5882008, /* f0: str r2, [r8, #8] */
/* <return_to_fel>: */
0xebffffcb, /* f4: bl 28 <swap_all_buffers> */
/* <return_to_fel_noswap>: */
0xe8bd4004, /* f8: pop {r2, lr} */
0xe121f002, /* fc: msr CPSR_c, r2 */
0xe59dd000, /* 100: ldr sp, [sp] */
......
/*
* copy "upwards", increasing destination and source addresses
*/
fel_memcpy_up:
ldr r0, 1f /* dst_addr */
ldr r1, 2f /* src_addr */
ldr r2, 3f /* bytes */
sub r3, r1, r0
tst r3, #3 /* test LSB for word alignment */
bne copyup_tail /* unaligned access, copy byte-wise */
copyup_head:
tst r1, #3 /* word boundary? */
beq copyup_loop
ldrb r3, [r1], #1 /* load and post-inc */
strb r3, [r0], #1 /* store and post-inc */
subs r2, #1 /* r2 -= 1 */
bpl copyup_head
bx lr /* early return on small byte count (r2 < 0) */
copyup_loop:
subs r2, #4 /* r2 -= 4 */
ldrpl r3, [r1], #4 /* load and post-inc */
strpl r3, [r0], #4 /* store and post-inc */
bpl copyup_loop /* while (r2 >= 0) */
add r2, #4 /* r2 = remaining byte count */
copyup_tail:
subs r2, #1 /* r2 -= 1 */
bxmi lr /* return on (r2 < 0) */
ldrb r3, [r1], #1 /* load and post-inc */
strb r3, [r0], #1 /* store and post-inc */
b copyup_tail
1: .word 0 /* dst_addr */
2: .word 0 /* src_addr */
3: .word 0 /* bytes */
/*
* copy "downwards", using base-relative indexing
*/
fel_memcpy_down:
ldr r0, 1f /* dst_addr */
ldr r1, 2f /* src_addr */
ldr r2, 3f /* bytes */
sub r3, r0, r1
tst r3, #3 /* test LSB for word alignment */
bne copydn_tail /* unaligned access, copy byte-wise */
copydn_head:
add r3, r1, r2 /* r3 = r1 + r2, for alignment check */
tst r3, #3 /* word boundary? */
beq copydn_loop
subs r2, #1 /* r2 -= 1 */
bxmi lr /* early return on small byte count (r2 < 0) */
ldrb r3, [r1, r2] /* load byte */
strb r3, [r0, r2] /* store byte */
b copydn_head
copydn_loop:
subs r2, #4 /* r2 -= 4 */
ldrpl r3, [r1, r2] /* load word */
strpl r3, [r0, r2] /* store word */
bpl copydn_loop /* while (r2 >= 0) */
add r2, #4 /* r2 = remaining byte count */
copydn_tail:
subs r2, #1 /* r2 -= 1 */
bxmi lr /* return on (r2 < 0) */
ldrb r3, [r1, r2] /* load byte */
strb r3, [r0, r2] /* store byte */
b copydn_tail
1: .word 0 /* dst_addr */
2: .word 0 /* src_addr */
3: .word 0 /* bytes */
/* <fel_memcpy_up>: */
htole32(0xe59f0054), /* 0: ldr r0, [pc, #84] */
htole32(0xe59f1054), /* 4: ldr r1, [pc, #84] */
htole32(0xe59f2054), /* 8: ldr r2, [pc, #84] */
htole32(0xe0413000), /* c: sub r3, r1, r0 */
htole32(0xe3130003), /* 10: tst r3, #3 */
htole32(0x1a00000b), /* 14: bne 48 <copyup_tail> */
/* <copyup_head>: */
htole32(0xe3110003), /* 18: tst r1, #3 */
htole32(0x0a000004), /* 1c: beq 34 <copyup_loop> */
htole32(0xe4d13001), /* 20: ldrb r3, [r1], #1 */
htole32(0xe4c03001), /* 24: strb r3, [r0], #1 */
htole32(0xe2522001), /* 28: subs r2, r2, #1 */
htole32(0x5afffff9), /* 2c: bpl 18 <copyup_head> */
htole32(0xe12fff1e), /* 30: bx lr */
/* <copyup_loop>: */
htole32(0xe2522004), /* 34: subs r2, r2, #4 */
htole32(0x54913004), /* 38: ldrpl r3, [r1], #4 */
htole32(0x54803004), /* 3c: strpl r3, [r0], #4 */
htole32(0x5afffffb), /* 40: bpl 34 <copyup_loop> */
htole32(0xe2822004), /* 44: add r2, r2, #4 */
/* <copyup_tail>: */
htole32(0xe2522001), /* 48: subs r2, r2, #1 */
htole32(0x412fff1e), /* 4c: bxmi lr */
htole32(0xe4d13001), /* 50: ldrb r3, [r1], #1 */
htole32(0xe4c03001), /* 54: strb r3, [r0], #1 */
htole32(0xeafffffa), /* 58: b 48 <copyup_tail> */
/* <fel_memcpy_down>: */
htole32(0xe59f0058), /* 68: ldr r0, [pc, #88] */
htole32(0xe59f1058), /* 6c: ldr r1, [pc, #88] */
htole32(0xe59f2058), /* 70: ldr r2, [pc, #88] */
htole32(0xe0403001), /* 74: sub r3, r0, r1 */
htole32(0xe3130003), /* 78: tst r3, #3 */
htole32(0x1a00000c), /* 7c: bne b4 <copydn_tail> */
/* <copydn_head>: */
htole32(0xe0813002), /* 80: add r3, r1, r2 */
htole32(0xe3130003), /* 84: tst r3, #3 */
htole32(0x0a000004), /* 88: beq a0 <copydn_loop> */
htole32(0xe2522001), /* 8c: subs r2, r2, #1 */
htole32(0x412fff1e), /* 90: bxmi lr */
htole32(0xe7d13002), /* 94: ldrb r3, [r1, r2] */
htole32(0xe7c03002), /* 98: strb r3, [r0, r2] */
htole32(0xeafffff7), /* 9c: b 80 <copydn_head> */
/* <copydn_loop>: */
htole32(0xe2522004), /* a0: subs r2, r2, #4 */
htole32(0x57913002), /* a4: ldrpl r3, [r1, r2] */
htole32(0x57803002), /* a8: strpl r3, [r0, r2] */
htole32(0x5afffffb), /* ac: bpl a0 <copydn_loop> */
htole32(0xe2822004), /* b0: add r2, r2, #4 */
/* <copydn_tail>: */
htole32(0xe2522001), /* b4: subs r2, r2, #1 */
htole32(0x412fff1e), /* b8: bxmi lr */
htole32(0xe7d13002), /* bc: ldrb r3, [r1, r2] */
htole32(0xe7c03002), /* c0: strb r3, [r0, r2] */
htole32(0xeafffffa), /* c4: b b4 <copydn_tail> */
# labels
/[[:xdigit:]]+ <\w+>:/ {
# (Note: using $0 instead of $2 would also include the address)
if (style=="old")
printf "\t/* %s */\n", $2
else
printf "\t\t/* %s */\n", $2
}
# disassembly lines
/[[:xdigit:]]+:/ {
if (style=="old")
printf "\t0x%s, /* %9s %-10s", $2, $1, $3
else
printf "\t\thtole32(0x%s), /* %5s %-5s", $2, $1, $3
for (i = 4; i <= NF; i++)
if ($i == ";") {
# strip comment (anything after and including ';')
NF = i - 1
break
}
# clear $1 to $3, which re-calculates $0 (= remainder of line)
$3 = ""
$2 = ""
$1 = ""
gsub("^\\s+", "") # strip leading whitespace
if (style=="old")
printf " %-28s */\n", $0
else
printf " %-23s */\n", $0
}
/*
* Thunk code for buffered 'long' (i.e. 32-bit) read and write operations
*/
.equ MAX_WORDS, 0x100 - 12
fel_readl_n:
ldr r0, 1f /* read_addr */
adr r1, 3f /* read_data */
ldr r2, 2f /* read_count */
/* limit word count to a maximum value */
cmp r2, #MAX_WORDS
movgt r2, #MAX_WORDS
read_loop:
subs r2, #1
bxmi lr
ldr r3, [r0], #4
str r3, [r1], #4
b read_loop
1: .word 0 /* read_addr */
2: .word 0 /* read_count */
3: .word 0 /* read_data */
fel_writel_n:
ldr r0, 1f /* write_addr */
adr r1, 3f /* write_data */
ldr r2, 2f /* write_count */
/* limit word count to a maximum value */
cmp r2, #MAX_WORDS
movgt r2, #MAX_WORDS
write_loop:
subs r2, #1
bxmi lr
ldr r3, [r1], #4
str r3, [r0], #4
b write_loop
1: .word 0 /* write_addr */
2: .word 0 /* write_count */
3: .word 0 /* write_data */
/* <fel_readl_n>: */
htole32(0xe59f0020), /* 0: ldr r0, [pc, #32] */
htole32(0xe28f1024), /* 4: add r1, pc, #36 */
htole32(0xe59f201c), /* 8: ldr r2, [pc, #28] */
htole32(0xe35200f4), /* c: cmp r2, #244 */
htole32(0xc3a020f4), /* 10: movgt r2, #244 */
/* <read_loop>: */
htole32(0xe2522001), /* 14: subs r2, r2, #1 */
htole32(0x412fff1e), /* 18: bxmi lr */
htole32(0xe4903004), /* 1c: ldr r3, [r0], #4 */
htole32(0xe4813004), /* 20: str r3, [r1], #4 */
htole32(0xeafffffa), /* 24: b 14 <read_loop> */
/* <fel_writel_n>: */
htole32(0xe59f0020), /* 34: ldr r0, [pc, #32] */
htole32(0xe28f1024), /* 38: add r1, pc, #36 */
htole32(0xe59f201c), /* 3c: ldr r2, [pc, #28] */
htole32(0xe35200f4), /* 40: cmp r2, #244 */
htole32(0xc3a020f4), /* 44: movgt r2, #244 */
/* <write_loop>: */
htole32(0xe2522001), /* 48: subs r2, r2, #1 */
htole32(0x412fff1e), /* 4c: bxmi lr */
htole32(0xe4913004), /* 50: ldr r3, [r1], #4 */
htole32(0xe4803004), /* 54: str r3, [r0], #4 */
htole32(0xeafffffa), /* 58: b 48 <write_loop> */
/*
* Request AArch32/AArch64 warm reset, using RVBAR and Reset Management Register
*/
rmr_request:
ldr r0, 1f /* RVBAR register address */
ldr r1, 2f /* desired entry point (reset vector) */
str r1, [r0]
dsb
isb /* make sure we write the address */
ldr r1, 3f /* RMR mode: bit 1 = RR, bit 0 = AA64 */
mrc p15, 0, r0, c12, c0, 2 /* read RMR */
orr r0, r0, r1 /* request warm reset (according to rmr_mode) */
mcr p15, 0, r0, c12, c0, 2 /* write RMR, trigger reset */
isb
0:
wfi
b 0b /* loop */
1: .word 0 /* rvbar_reg */
2: .word 0 /* entry_point */
3: .word 0 /* rmr_mode (2 = AArch32, 3 = AArch64) */
/* <rmr_request>: */
htole32(0xe59f0028), /* 0: ldr r0, [pc, #40] */
htole32(0xe59f1028), /* 4: ldr r1, [pc, #40] */
htole32(0xe5801000), /* 8: str r1, [r0] */
htole32(0xf57ff04f), /* c: dsb sy */
htole32(0xf57ff06f), /* 10: isb sy */
htole32(0xe59f101c), /* 14: ldr r1, [pc, #28] */
htole32(0xee1c0f50), /* 18: mrc 15, 0, r0, cr12, cr0, {2} */
htole32(0xe1800001), /* 1c: orr r0, r0, r1 */
htole32(0xee0c0f50), /* 20: mcr 15, 0, r0, cr12, cr0, {2} */
htole32(0xf57ff06f), /* 24: isb sy */
htole32(0xe320f003), /* 28: wfi */
htole32(0xeafffffd), /* 2c: b 28 <rmr_request+0x28> */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment