Commit 1091f3ac authored by Siarhei Siamashka's avatar Siarhei Siamashka Committed by Andre Przywara
Browse files

fel: Support uploading & executing ARM AAPCS functions on the device



This patch adds a wrapper script, which can automatically compile
and wrap a small C function, taking care of all the necessary
function arguments marshalling.

The functions 'aw_fel_remotefunc_prepare/aw_fel_remotefunc_execute'
allow using such functions in the sunxi-fel tool to get this code
executed remotely on the device.
Signed-off-by: default avatarSiarhei Siamashka <siarhei.siamashka@gmail.com>
[Andre: adjust to match upstream changes]
Signed-off-by: default avatarAndre Przywara <osp@andrep.de>
parent d0f2fbb8
#!/usr/bin/env ruby
#
# (C) Copyright 2016 Siarhei Siamashka <siarhei.siamashka@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
if ARGV.size < 2
printf("Usage: #{$PROGRAM_NAME} [c_source_input] [marshalled_header_output]\n\n")
printf("This script uses an ARM toolchain to compile native ARM code, and then\n")
printf("automatically generates the necessary wrapper code for calling it from\n")
printf("the sunxi-fel tool. Executing such compiled pieces of code natively on\n")
printf("the device may be needed for the performance critical parts.\n")
printf("\nExample input file:\n\n")
printf(" unsigned sum(unsigned a, unsigned b)\n")
printf(" {\n")
printf(" return a + b;\n")
printf(" }\n")
printf("\n")
printf("Using this example code inside of sunxi-fel:\n")
printf("\n")
printf(" uint32_t a = 1, b = 2, c;\n")
printf(" aw_fel_remotefunc_prepare_sum(dev, a, b);\n")
printf(" aw_fel_remotefunc_execute(dev, &c);\n")
printf(" printf(\"%%d + %%d = %%d\\n\", a, b, c);\n\n")
printf("If the returned result is not needed (a void function), then the second\n")
printf("argument to the 'aw_fel_remotefunc_execute' function can be NULL.\n\n")
exit(1)
end
def tool_exists(tool_name)
`which #{tool_name} > /dev/null 2>&1`
return $?.to_i == 0
end
def parse_stack_usage(filename)
return unless File.exists?(filename)
File.read(filename).strip.split("\n").map do |l|
if l =~ /\:([^\:\s]+)\s+(\d+)\s+(\S+)/
if $3 != "static"
abort sprintf("Non-static stack usage for function '%s'\n", $1)
end
{function_name: $1, stack_usage: $2.to_i}
else
abort sprintf("Failed to parse stack usage information '%s'\n", l.strip)
end
end
end
toolchains = [
"arm-none-eabi-",
"arm-linux-gnueabihf-",
"arm-none-linux-gnueabi-",
"armv7a-hardfloat-linux-gnueabi-",
]
toolchain = toolchains.find { |toolchain| tool_exists("#{toolchain}gcc") }
abort "Can't find any usable ARM crosscompiler.\n" unless toolchain
# Compile the source file
system("#{toolchain}gcc -c -Os -marm -march=armv7-a -mfloat-abi=soft -fstack-usage -fpic -o #{ARGV[0]}.o #{ARGV[0]}")
exit($?.to_i) if $?.to_i != 0
# Read the stack usage information
stack_usage = parse_stack_usage("#{ARGV[0]}.su")
if stack_usage.size != 1
abort sprintf("Expected only one function in the source file, but got %s.\n",
stack_usage.map {|a| "'" + a[:function_name] + "()'" }.join(", "))
end
`#{toolchain}size -A #{ARGV[0]}.o`.each_line do |l|
if l =~ /(\S+)\s+(\S+)/
if ($1 == ".data" || $1 == ".bss" || $1 == ".rodata") && $2.to_i > 0
abort "Can't have non-empty '.data', '.bss' or '.rodata' section."
end
end
end
`#{toolchain}objdump -t #{ARGV[0]}.o`.each_line do |l|
if l =~ /\*UND\*/
abort "External references are not allowed: '#{l.strip}'.\n"
end
end
function_name = stack_usage[0][:function_name]
# Read the source file and strip multiline C comments
sourcefile = File.read(ARGV[0]).gsub(/\/\*.*?\*\//m, "")
# Try to find the function and its arguments
unless sourcefile =~ /#{function_name}\((.*?)\)/m
abort sprintf("Can't find the function '%s()' in the source file.\n",
function_name)
end
# Extract the function argument names
function_args = $1.split(",").map {|a| if a.strip =~ /([^\*\s]+)$/ then $1 end }
# Check if there is any return value
have_retval = !(sourcefile =~ /void\s+#{function_name}/m)
###############################################################################
# Generate output file
###############################################################################
out = File.open(ARGV[1], "w")
out.printf("/* Automatically generated, do not edit! */\n\n")
out.printf("static void\n")
funcdecl = sprintf("aw_fel_remotefunc_prepare_#{function_name}(feldev_handle *dev,")
out.printf("%s\n", funcdecl)
out.printf("%s", function_args.map {|a|
" " * funcdecl.index("(") + " uint32_t " + a }.join(",\n"))
out.printf(")\n{\n")
out.printf("\tstatic uint8_t arm_code[] = {\n")
`#{toolchain}objdump -d #{ARGV[0]}.o`.each_line {|l|
next unless l =~ /(\h+)\:\s+(\h+)\s+(\S+)\s+([^;]*)/
addr = $1
opcode = $2
p1 = $3
p2 = $4.strip
opcode = opcode.scan(/../).map {|a| "0x" + a }.reverse.join(", ")
out.printf("\t\t%s, /* %4s: %-8s %-34s \x2a/\n", opcode, addr, p1, p2)
}
out.printf("\t};\n")
out.printf("\tuint32_t args[] = {\n\t\t")
out.printf("%s\n\t};\n", function_args.join(",\n\t\t"))
out.printf("\taw_fel_remotefunc_prepare(dev, %d, arm_code, sizeof(arm_code), %d, args);\n",
stack_usage[0][:stack_usage], function_args.size)
out.printf("}\n")
......@@ -243,6 +243,111 @@ void aw_fel_fill(feldev_handle *dev, uint32_t offset, size_t size, unsigned char
}
}
/*
* Upload a function (implemented in native ARM code) to the device and
* prepare for executing it. Use a subset of 32-bit ARM AAPCS calling
* conventions: all arguments are integer 32-bit values, and an optional
* return value is a 32-bit integer too. The function code needs to be
* compiled in the ARM mode (Thumb2 is not supported), it also must be
* a position independent leaf function (have no calls to anything else)
* and have no references to any global variables.
*
* 'stack_size' - the required stack size for the function (can be
* calculated using the '-fstack-usage' GCC option)
* 'arm_code' - a pointer to the memory buffer with the function code
* 'arm_code_size' - the size of the function code
* 'num_args' - the number of 32-bit function arguments
* 'args' - an array with the function argument values
*
* Note: once uploaded, the function can be executed multiple times with
* exactly the same arguments. If some internal state needs to be
* updated between function calls, then it's best to pass a pointer
* to some state structure located elsewhere in SRAM as one of the
* function arguments.
*/
bool aw_fel_remotefunc_prepare(feldev_handle *dev,
size_t stack_size,
void *arm_code,
size_t arm_code_size,
size_t num_args,
uint32_t *args)
{
size_t idx, i;
size_t tmp_buf_size;
soc_info_t *soc_info = dev->soc_info;
uint32_t *tmp_buf;
uint32_t new_sp, num_args_on_stack = (num_args <= 4 ? 0 : num_args - 4);
uint32_t entry_code[] = {
htole32(0xe58fe040), /* 0: str lr, [pc, #64] */
htole32(0xe58fd040), /* 4: str sp, [pc, #64] */
htole32(0xe59fd040), /* 8: ldr sp, [pc, #64] */
htole32(0xe28fc040), /* c: add ip, pc, #64 */
htole32(0xe1a0200d), /* 10: mov r2, sp */
htole32(0xe49c0004), /* 14: ldr r0, [ip], #4 */
htole32(0xe3500000), /* 18: cmp r0, #0 */
htole32(0x0a000003), /* 1c: beq 30 <entry+0x30> */
htole32(0xe49c1004), /* 20: ldr r1, [ip], #4 */
htole32(0xe4821004), /* 24: str r1, [r2], #4 */
htole32(0xe2500001), /* 28: subs r0, r0, #1 */
htole32(0x1afffffb), /* 2c: bne 20 <entry+0x20> */
htole32(0xe8bc000f), /* 30: ldm ip!, {r0, r1, r2, r3} */
htole32(0xe12fff3c), /* 34: blx ip */
htole32(0xe59fe008), /* 38: ldr lr, [pc, #8] */
htole32(0xe59fd008), /* 3c: ldr sp, [pc, #8] */
htole32(0xe58f0000), /* 40: str r0, [pc] */
htole32(0xe12fff1e), /* 44: bx lr */
htole32(0x00000000), /* 48: .word 0x00000000 */
htole32(0x00000000), /* 4c: .word 0x00000000 */
};
if (!soc_info)
return false;
/* Calculate the stack location */
new_sp = soc_info->scratch_addr +
sizeof(entry_code) +
2 * 4 +
num_args_on_stack * 4 +
4 * 4 +
arm_code_size +
stack_size;
new_sp = (new_sp + 7) & ~7;
tmp_buf_size = new_sp - soc_info->scratch_addr;
tmp_buf = calloc(tmp_buf_size, 1);
memcpy(tmp_buf, entry_code, sizeof(entry_code));
idx = sizeof(entry_code) / 4;
tmp_buf[idx++] = htole32(new_sp);
tmp_buf[idx++] = htole32(num_args_on_stack);
for (i = num_args - num_args_on_stack; i < num_args; i++)
tmp_buf[idx++] = htole32(args[i]);
for (i = 0; i < 4; i++)
tmp_buf[idx++] = (i < num_args ? htole32(args[i]) : 0);
memcpy(tmp_buf + idx, arm_code, arm_code_size);
aw_fel_write(dev, tmp_buf, soc_info->scratch_addr, tmp_buf_size);
free(tmp_buf);
return true;
}
/*
* Execute the previously uploaded function. The 'result' pointer allows to
* retrieve the return value.
*/
bool aw_fel_remotefunc_execute(feldev_handle *dev, uint32_t *result)
{
soc_info_t *soc_info = dev->soc_info;
if (!soc_info)
return false;
aw_fel_execute(dev, soc_info->scratch_addr);
if (result) {
aw_fel_read(dev, soc_info->scratch_addr + 0x48, result, sizeof(uint32_t));
*result = le32toh(*result);
}
return true;
}
static uint32_t fel_to_spl_thunk[] = {
#include "thunks/fel-to-spl-thunk.h"
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment