Commit 1627b137 authored by Siarhei Siamashka's avatar Siarhei Siamashka
Browse files

fel: New command for loading U-Boot SPL binaries in eGON format



Now it is possible to load and execute the same U-Boot SPL,
as used for booting from SD cards. Just a different delivery
method (a USB OTG cable instead of an SD card) for handling
exactly the same content.

The only argument for this new command is the name of the SPL
binary file (with a eGON header generated by the 'mksunxiboot'
tool). Now the 'fel' tool can be run as:

    fel spl u-boot-sunxi-with-spl.bin

Before this change, the SPL was only able to use the memory between
addresses 0x2000 and ~0x5D00, totalling to something like ~15 KiB.
This is the biggest contiguous area in SRAM, which is not used
by the FEL code from the BROM. Unfortunately, it is rather small.
And also the unusual starting offset was making it difficult to
use the same SPL binary for booting from the SD card and via FEL.

There are surely more unused parts of SRAM, but they are scattered
across multiple locations, primarily because the FEL code from the
BROM sets up two stacks at inconvenient locations (the IRQ handler
stack at 0x2000, and a regular stack at 0x7000). Essentially, the
problem to solve here is to ensure a sufficiently large and consistent
SRAM address space for the SPL without any potentially SoC specific
holes in the case of booting over USB via FEL.

This is achieved by injecting special entry/exit thunk code, which
is moving the data in SRAM to provide a contiguous space for the SPL
at the beginning of SRAM, while still preserving the the data from
the BROM elsewhere. When the SPL tries to return control back to the
FEL code in the BROM, the thunk code moves the data back to its
original place. Additionally, the eGON checksum is verified to
ensure that no data corruption has happened due to some unexpected
clash with the FEL protocol code from the BROM.

So the thunk code takes care of the address space allocation uglyness
and provides the U-Boot SPL with a somewhat nicer abstraction.
Now the FEL booted SPL on A10/A13/A20/A31 can use up to 32 KiB of
SRAM because the BROM data is saved to different SRAM section.
There is also generic code, which does not rely on extra SRAM
sections, but just glues together the unused free space from
both BROM FEL stacks to provide something like ~21 KiB to the SPL.
Signed-off-by: default avatarSiarhei Siamashka <siarhei.siamashka@gmail.com>
Acked-by: default avatarHans de Goede <hdegoede@redhat.com>
parent 91949d62
/*
* Copyright © 2015 Siarhei Siamashka <siarhei.siamashka@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*************************************************************************/
/* Usage instructions: "ruby -x fel-to-spl-thunk.S > fel-to-spl-thunk.h" */
/*************************************************************************/
#if 0
#!/usr/bin/env ruby
def tool_exists(tool_name)
`which #{tool_name} > /dev/null 2>&1`
return $?.to_i == 0
end
toolchains = [
"arm-none-eabi-",
"arm-linux-gnueabihf-",
"arm-none-linux-gnueabi-",
"armv7a-hardfloat-linux-gnueabi-",
]
toolchain = toolchains.find { |toolchain| tool_exists("#{toolchain}gcc") }
abort "Can't find any ARM crosscompiler\n" unless toolchain
system("#{toolchain}gcc -o #{$PROGRAM_NAME}.o -c #{$PROGRAM_NAME}")
exit($?.to_i) if $?.to_i != 0
`#{toolchain}objdump -d #{$PROGRAM_NAME}.o`.each_line {|l|
next unless l =~ /(\h+)\:\s+(\h+)\s+(\S+)\s+([^;]*)/
printf("\t0x%s, /* %8s: %-10s %-28s */\n", $2, $1, $3, $4.strip)
}
__END__
#endif
/*************************************************************************/
BUF1 .req r0
BUF2 .req r1
TMP1 .req r2
TMP2 .req r3
SWAPTBL .req r4
FULLSIZE .req r5
BUFSIZE .req r6
CHECKSUM .req r7
entry_point:
b setup_stack
stack_begin:
nop
nop
nop
nop
nop
nop
nop
nop
stack_end:
nop
/* A function, which walks the table and swaps all buffers */
swap_all_buffers:
adr SWAPTBL, swaptbl_start
swap_next_buffer:
ldr BUF1, [SWAPTBL], #4
ldr BUF2, [SWAPTBL], #4
ldr BUFSIZE, [SWAPTBL], #4
cmp BUFSIZE, #0
bxeq lr
swap_next_word:
ldr TMP1, [BUF1]
ldr TMP2, [BUF2]
subs BUFSIZE, BUFSIZE, #4
str TMP1, [BUF2], #4
str TMP2, [BUF1], #4
bne swap_next_word
b swap_next_buffer
setup_stack: /* Save the original SP, LR and CPSR to stack */
adr BUF1, stack_end
str sp, [BUF1, #-4]!
mov sp, BUF1
mrs TMP1, cpsr
push {TMP1, lr}
/* Disable IRQ and FIQ */
orr TMP1, #0xc0
msr cpsr_c, TMP1
/* Check if the instructions or data cache is enabled */
mrc p15, 0, TMP1, c1, c0, 0
movw TMP2, #((1 << 12) | (1 << 2))
tst TMP1, TMP2
bne cache_is_unsupported
bl swap_all_buffers
verify_checksum:
movw CHECKSUM, #0x6c39
movt CHECKSUM, #0x5f0a
mov BUF1, #0
ldr FULLSIZE, [BUF1, #16]
check_next_word:
ldr TMP1, [BUF1], #4
subs FULLSIZE, FULLSIZE, #4
add CHECKSUM, CHECKSUM, TMP1
bne check_next_word
mov BUF1, #0
ldr TMP1, [BUF1, #12]
subs CHECKSUM, CHECKSUM, TMP1, lsl #1
bne checksum_is_bad
/* Change 'eGON.BT0' -> 'eGON.FEL' */
mov BUF1, #0
movw TMP1, (('F' << 8) + '.')
movt TMP1, (('L' << 8) + 'E')
str TMP1, [BUF1, #8]
/* Call the SPL code */
dsb
isb
blx BUF1
/* Return back to FEL */
b return_to_fel
cache_is_unsupported:
/* Bail out if cache is enabled and change 'eGON.BT0' -> 'eGON.???' */
mov BUF1, #0
movw TMP1, (('?' << 8) + '.')
movt TMP1, (('?' << 8) + '?')
str TMP1, [BUF1, #8]
b return_to_fel
checksum_is_bad:
/* The checksum test failed, so change 'eGON.BT0' -> 'eGON.BAD' */
mov BUF1, #0
movw TMP1, (('B' << 8) + '.')
movt TMP1, (('D' << 8) + 'A')
str TMP1, [BUF1, #8]
return_to_fel:
bl swap_all_buffers
pop {TMP1, lr}
msr cpsr_c, TMP1 /* Restore the original CPSR */
ldr sp, [sp]
bx lr
swaptbl_start:
0xea000015, /* 0: b 5c <setup_stack> */
0xe1a00000, /* 4: nop */
0xe1a00000, /* 8: nop */
0xe1a00000, /* c: nop */
0xe1a00000, /* 10: nop */
0xe1a00000, /* 14: nop */
0xe1a00000, /* 18: nop */
0xe1a00000, /* 1c: nop */
0xe1a00000, /* 20: nop */
0xe1a00000, /* 24: nop */
0xe28f40e4, /* 28: add r4, pc, #228 */
0xe4940004, /* 2c: ldr r0, [r4], #4 */
0xe4941004, /* 30: ldr r1, [r4], #4 */
0xe4946004, /* 34: ldr r6, [r4], #4 */
0xe3560000, /* 38: cmp r6, #0 */
0x012fff1e, /* 3c: bxeq lr */
0xe5902000, /* 40: ldr r2, [r0] */
0xe5913000, /* 44: ldr r3, [r1] */
0xe2566004, /* 48: subs r6, r6, #4 */
0xe4812004, /* 4c: str r2, [r1], #4 */
0xe4803004, /* 50: str r3, [r0], #4 */
0x1afffff9, /* 54: bne 40 <swap_next_word> */
0xeafffff3, /* 58: b 2c <swap_next_buffer> */
0xe24f0040, /* 5c: sub r0, pc, #64 */
0xe520d004, /* 60: str sp, [r0, #-4]! */
0xe1a0d000, /* 64: mov sp, r0 */
0xe10f2000, /* 68: mrs r2, CPSR */
0xe92d4004, /* 6c: push {r2, lr} */
0xe38220c0, /* 70: orr r2, r2, #192 */
0xe121f002, /* 74: msr CPSR_c, r2 */
0xee112f10, /* 78: mrc 15, 0, r2, cr1, cr0, {0} */
0xe3013004, /* 7c: movw r3, #4100 */
0xe1120003, /* 80: tst r2, r3 */
0x1a000014, /* 84: bne dc <cache_is_unsupported> */
0xebffffe6, /* 88: bl 28 <swap_all_buffers> */
0xe3067c39, /* 8c: movw r7, #27705 */
0xe3457f0a, /* 90: movt r7, #24330 */
0xe3a00000, /* 94: mov r0, #0 */
0xe5905010, /* 98: ldr r5, [r0, #16] */
0xe4902004, /* 9c: ldr r2, [r0], #4 */
0xe2555004, /* a0: subs r5, r5, #4 */
0xe0877002, /* a4: add r7, r7, r2 */
0x1afffffb, /* a8: bne 9c <check_next_word> */
0xe3a00000, /* ac: mov r0, #0 */
0xe590200c, /* b0: ldr r2, [r0, #12] */
0xe0577082, /* b4: subs r7, r7, r2, lsl #1 */
0x1a00000c, /* b8: bne f0 <checksum_is_bad> */
0xe3a00000, /* bc: mov r0, #0 */
0xe304262e, /* c0: movw r2, #17966 */
0xe3442c45, /* c4: movt r2, #19525 */
0xe5802008, /* c8: str r2, [r0, #8] */
0xf57ff04f, /* cc: dsb sy */
0xf57ff06f, /* d0: isb sy */
0xe12fff30, /* d4: blx r0 */
0xea000008, /* d8: b 100 <return_to_fel> */
0xe3a00000, /* dc: mov r0, #0 */
0xe3032f2e, /* e0: movw r2, #16174 */
0xe3432f3f, /* e4: movt r2, #16191 */
0xe5802008, /* e8: str r2, [r0, #8] */
0xea000003, /* ec: b 100 <return_to_fel> */
0xe3a00000, /* f0: mov r0, #0 */
0xe304222e, /* f4: movw r2, #16942 */
0xe3442441, /* f8: movt r2, #17473 */
0xe5802008, /* fc: str r2, [r0, #8] */
0xebffffc8, /* 100: bl 28 <swap_all_buffers> */
0xe8bd4004, /* 104: pop {r2, lr} */
0xe121f002, /* 108: msr CPSR_c, r2 */
0xe59dd000, /* 10c: ldr sp, [sp] */
0xe12fff1e, /* 110: bx lr */
......@@ -29,6 +29,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include "endian_compat.h"
......@@ -294,6 +295,237 @@ void aw_fel_fill(libusb_device_handle *usb, uint32_t offset, size_t size, unsign
aw_fel_write(usb, buf, offset, size);
}
/*
* The 'sram_swap_buffers' structure is used to describe information about
* two buffers in SRAM, the content of which needs to be exchanged before
* calling the U-Boot SPL code and then exchanged again before returning
* control back to the FEL code from the BROM.
*/
typedef struct {
uint32_t buf1; /* BROM buffer */
uint32_t buf2; /* backup storage location */
uint32_t size; /* buffer size */
} sram_swap_buffers;
/*
* Each SoC variant may have its own list of memory buffers to be exchanged
* and the information about the placement of the thunk code, which handles
* the transition of execution from the BROM FEL code to the U-Boot SPL and
* back.
*
* Note: the entries in the 'swap_buffers' tables need to be sorted by 'buf1'
* addresses. And the 'buf1' addresses are the BROM data buffers, while 'buf2'
* addresses are the intended backup locations.
*/
typedef struct {
uint32_t soc_id; /* ID of the SoC */
uint32_t thunk_addr; /* Address of the thunk code */
uint32_t thunk_size; /* Maximal size of the thunk code */
sram_swap_buffers *swap_buffers;
} soc_sram_info;
/*
* The FEL code from BROM in A10/A13/A20 sets up two stacks for itself. One
* at 0x2000 (and growing down) for the IRQ handler. And another one at 0x7000
* (and also growing down) for the regular code. In order to use the whole
* 32 KiB in the A1/A2 sections of SRAM, we need to temporarily move these
* stacks elsewhere. And the addresses above 0x7000 are also a bit suspicious,
* so it might be safer to backup the 0x7000-0x8000 area too. On A10/A13/A20
* we can use the SRAM section A3 (0x8000) for this purpose.
*/
sram_swap_buffers a10_a13_a20_sram_swap_buffers[] = {
{ .buf1 = 0x01800, .buf2 = 0x8000, .size = 0x800 },
{ .buf1 = 0x05C00, .buf2 = 0x8800, .size = 0x8000 - 0x5C00 },
{ 0 } /* End of the table */
};
/*
* A31 is very similar to A10/A13/A20, except that it has no SRAM at 0x8000.
* So we use the SRAM section at 0x44000 instead. This is the memory, which
* is normally shared with the OpenRISC core (should we do an extra check to
* ensure that this core is powered off and can't interfere?).
*/
sram_swap_buffers a31_sram_swap_buffers[] = {
{ .buf1 = 0x01800, .buf2 = 0x44000, .size = 0x800 },
{ .buf1 = 0x05C00, .buf2 = 0x44800, .size = 0x8000 - 0x5C00 },
{ 0 } /* End of the table */
};
soc_sram_info soc_sram_info_table[] = {
{
.soc_id = 0x1623, /* Allwinner A10 */
.thunk_addr = 0xAE00, .thunk_size = 0x200,
.swap_buffers = a10_a13_a20_sram_swap_buffers,
},
{
.soc_id = 0x1625, /* Allwinner A13 */
.thunk_addr = 0xAE00, .thunk_size = 0x200,
.swap_buffers = a10_a13_a20_sram_swap_buffers,
},
{
.soc_id = 0x1651, /* Allwinner A20 */
.thunk_addr = 0xAE00, .thunk_size = 0x200,
.swap_buffers = a10_a13_a20_sram_swap_buffers,
},
{
.soc_id = 0x1633, /* Allwinner A31 */
.thunk_addr = 0x46E00, .thunk_size = 0x200,
.swap_buffers = a31_sram_swap_buffers,
},
{ 0 } /* End of the table */
};
/*
* This generic record assumes BROM with similar properties to A10/A13/A20/A31,
* but no extra SRAM sections beyond 0x8000. It also assumes that the IRQ
* handler stack usage never exceeds 0x400 bytes.
*
* The users may or may not hope that the 0x7000-0x8000 area is also unused
* by the BROM and re-purpose it for the SPL stack.
*
* The size limit for the ".text + .data" sections is ~21 KiB.
*/
sram_swap_buffers generic_sram_swap_buffers[] = {
{ .buf1 = 0x01C00, .buf2 = 0x5800, .size = 0x400 },
{ 0 } /* End of the table */
};
soc_sram_info generic_sram_info = {
.thunk_addr = 0x5680, .thunk_size = 0x180,
.swap_buffers = generic_sram_swap_buffers,
};
soc_sram_info *aw_fel_get_sram_info(libusb_device_handle *usb)
{
int i;
struct aw_fel_version buf;
aw_fel_get_version(usb, &buf);
for (i = 0; soc_sram_info_table[i].swap_buffers; i++)
if (soc_sram_info_table[i].soc_id == buf.soc_id)
return &soc_sram_info_table[i];
printf("Warning: no 'soc_sram_info' data for your SoC (id=%04X)\n",
buf.soc_id);
return &generic_sram_info;
}
static uint32_t fel_to_spl_thunk[] = {
#include "fel-to-spl-thunk.h"
};
void aw_fel_write_and_execute_spl(libusb_device_handle *usb,
uint8_t *buf, size_t len)
{
soc_sram_info *sram_info = aw_fel_get_sram_info(usb);
sram_swap_buffers *swap_buffers;
char header_signature[9] = { 0 };
size_t i, thunk_size;
uint32_t *thunk_buf;
uint32_t spl_checksum, spl_len, spl_len_limit = 0x8000;
uint32_t *buf32 = (uint32_t *)buf;
uint32_t written = 0;
if (!sram_info || !sram_info->swap_buffers) {
fprintf(stderr, "SPL: Unsupported SoC type\n");
exit(1);
}
if (len < 32 || memcmp(buf + 4, "eGON.BT0", 8) != 0) {
fprintf(stderr, "SPL: eGON header is not found\n");
exit(1);
}
spl_checksum = 2 * le32toh(buf32[3]) - 0x5F0A6C39;
spl_len = le32toh(buf32[4]);
if (spl_len > len || (spl_len % 4) != 0) {
fprintf(stderr, "SPL: bad length in the eGON header\n");
exit(1);
}
len = spl_len;
for (i = 0; i < len / 4; i++)
spl_checksum -= le32toh(buf32[i]);
if (spl_checksum != 0) {
fprintf(stderr, "SPL: checksum check failed\n");
exit(1);
}
swap_buffers = sram_info->swap_buffers;
for (i = 0; swap_buffers[i].size; i++) {
if (swap_buffers[i].buf2 < spl_len_limit)
spl_len_limit = swap_buffers[i].buf2;
if (len > 0 && written < swap_buffers[i].buf1) {
uint32_t tmp = swap_buffers[i].buf1 - written;
if (tmp > len)
tmp = len;
aw_fel_write(usb, buf, written, tmp);
written += tmp;
buf += tmp;
len -= tmp;
}
if (len > 0 && written == swap_buffers[i].buf1) {
uint32_t tmp = swap_buffers[i].size;
if (tmp > len)
tmp = len;
aw_fel_write(usb, buf, swap_buffers[i].buf2, tmp);
written += tmp;
buf += tmp;
len -= tmp;
}
}
/* Clarify the SPL size limitations, and bail out if they are not met */
if (sram_info->thunk_addr < spl_len_limit)
spl_len_limit = sram_info->thunk_addr;
if (spl_len > spl_len_limit) {
fprintf(stderr, "SPL: too large (need %d, have %d)\n",
(int)spl_len, (int)spl_len_limit);
exit(1);
}
/* Write the remaining part of the SPL */
if (len > 0)
aw_fel_write(usb, buf, written, len);
thunk_size = sizeof(fel_to_spl_thunk) + (i + 1) * sizeof(*swap_buffers);
if (thunk_size > sram_info->thunk_size) {
fprintf(stderr, "SPL: bad thunk size (need %d, have %d)\n",
(int)sizeof(fel_to_spl_thunk), sram_info->thunk_size);
exit(1);
}
thunk_buf = malloc(thunk_size);
memcpy(thunk_buf, fel_to_spl_thunk, sizeof(fel_to_spl_thunk));
memcpy(thunk_buf + sizeof(fel_to_spl_thunk) / sizeof(uint32_t),
swap_buffers, (i + 1) * sizeof(*swap_buffers));
for (i = 0; i < thunk_size / sizeof(uint32_t); i++)
thunk_buf[i] = htole32(thunk_buf[i]);
aw_fel_write(usb, thunk_buf, sram_info->thunk_addr, thunk_size);
aw_fel_execute(usb, sram_info->thunk_addr);
free(thunk_buf);
/* TODO: Try to find and fix the bug, which needs this workaround */
usleep(250000);
/* Read back the result and check if everything was fine */
aw_fel_read(usb, 4, header_signature, 8);
if (strcmp(header_signature, "eGON.FEL") != 0) {
fprintf(stderr, "SPL: failure code '%s'\n",
header_signature);
exit(1);
}
}
static int aw_fel_get_endpoint(libusb_device_handle *usb)
{
struct libusb_device *dev = libusb_get_device(usb);
......@@ -352,6 +584,7 @@ int main(int argc, char **argv)
" ver[sion] Show BROM version\n"
" clear address length Clear memory\n"
" fill address length value Fill memory\n"
" spl file Load and execute U-Boot SPL\n"
, argv[0]
);
}
......@@ -417,6 +650,11 @@ int main(int argc, char **argv)
} else if (strcmp(argv[1], "fill") == 0 && argc > 3) {
aw_fel_fill(handle, strtoul(argv[2], NULL, 0), strtoul(argv[3], NULL, 0), (unsigned char)strtoul(argv[4], NULL, 0));
skip=4;
} else if (strcmp(argv[1], "spl") == 0 && argc > 2) {
size_t size;
uint8_t *buf = load_file(argv[2], &size);
aw_fel_write_and_execute_spl(handle, buf, size);
skip=2;
} else {
fprintf(stderr,"Invalid command %s\n", argv[1]);
exit(1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment