Commit b51da821 authored by Achin Gupta's avatar Achin Gupta
Browse files

Remove coherent stack usage from the warm boot path

This patch uses stacks allocated in normal memory to enable the MMU early in the
warm boot path thus removing the dependency on stacks allocated in coherent
memory. Necessary cache and stack maintenance is performed when a cpu is being
powered down and up. This avoids any coherency issues that can arise from
reading speculatively fetched stale stack memory from another CPUs cache. These
changes affect the warm boot path in both BL3-1 and BL3-2.

The EL3 system registers responsible for preserving the MMU state are not saved
and restored any longer. Static values are used to program these system
registers when a cpu is powered on or resumed from suspend.

Change-Id: I8357e2eb5eb6c5f448492c5094b82b8927603784
parent afff8cbd
......@@ -43,23 +43,9 @@
.global el3_sysregs_context_save
func el3_sysregs_context_save
mrs x10, sctlr_el3
str x10, [x0, #CTX_SCTLR_EL3]
mrs x11, cptr_el3
stp x11, xzr, [x0, #CTX_CPTR_EL3]
mrs x13, cntfrq_el0
mrs x14, mair_el3
stp x13, x14, [x0, #CTX_CNTFRQ_EL0]
mrs x15, tcr_el3
mrs x16, ttbr0_el3
stp x15, x16, [x0, #CTX_TCR_EL3]
mrs x17, daif
and x17, x17, #(DAIF_ABT_BIT | DAIF_DBG_BIT)
stp x17, xzr, [x0, #CTX_DAIF_EL3]
mrs x10, cptr_el3
mrs x11, cntfrq_el0
stp x10, x11, [x0, #CTX_CPTR_EL3]
ret
......@@ -78,27 +64,9 @@ func el3_sysregs_context_save
.global el3_sysregs_context_restore
func el3_sysregs_context_restore
ldp x11, xzr, [x0, #CTX_CPTR_EL3]
msr cptr_el3, x11
ldp x13, x14, [x0, #CTX_CNTFRQ_EL0]
msr cntfrq_el0, x13
msr mair_el3, x14
ldp x15, x16, [x0, #CTX_TCR_EL3]
msr tcr_el3, x15
msr ttbr0_el3, x16
ldp x17, xzr, [x0, #CTX_DAIF_EL3]
mrs x11, daif
orr x17, x17, x11
msr daif, x17
/* Make sure all the above changes are observed */
isb
ldr x10, [x0, #CTX_SCTLR_EL3]
msr sctlr_el3, x10
ldp x13, x14, [x0, #CTX_CPTR_EL3]
msr cptr_el3, x13
msr cntfrq_el0, x14
isb
ret
......
......@@ -403,7 +403,7 @@ smc_handler64:
mrs x17, elr_el3
mrs x18, scr_el3
stp x16, x17, [x6, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
stp x18, xzr, [x6, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
str x18, [x6, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
/* Copy SCR_EL3.NS bit to the flag to indicate caller's security */
bfi x7, x18, #0, #1
......@@ -446,7 +446,7 @@ el3_exit: ; .type el3_exit, %function
* Restore SPSR_EL3, ELR_EL3 and SCR_EL3 prior to ERET
* -----------------------------------------------------
*/
ldp x18, xzr, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
ldr x18, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
ldp x16, x17, [sp, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
msr scr_el3, x18
msr spsr_el3, x16
......
......@@ -48,6 +48,7 @@ BL31_SOURCES += bl31/bl31_main.c \
services/std_svc/psci/psci_afflvl_suspend.c \
services/std_svc/psci/psci_common.c \
services/std_svc/psci/psci_entry.S \
services/std_svc/psci/psci_helpers.S \
services/std_svc/psci/psci_main.c \
services/std_svc/psci/psci_setup.c
......
......@@ -31,6 +31,7 @@
#include <arch.h>
#include <asm_macros.S>
#include <tsp.h>
#include <xlat_tables.h>
.globl tsp_entrypoint
......@@ -204,26 +205,48 @@ func tsp_cpu_on_entry
isb
/* --------------------------------------------
* Give ourselves a small coherent stack to
* ease the pain of initializing the MMU
* Give ourselves a stack whose memory will be
* marked as Normal-IS-WBWA when the MMU is
* enabled.
* --------------------------------------------
*/
mrs x0, mpidr_el1
bl platform_set_coherent_stack
bl platform_set_stack
/* ---------------------------------------------
* Initialise the MMU
* ---------------------------------------------
/* --------------------------------------------
* Enable the MMU with the DCache disabled. It
* is safe to use stacks allocated in normal
* memory as a result. All memory accesses are
* marked nGnRnE when the MMU is disabled. So
* all the stack writes will make it to memory.
* All memory accesses are marked Non-cacheable
* when the MMU is enabled but D$ is disabled.
* So used stack memory is guaranteed to be
* visible immediately after the MMU is enabled
* Enabling the DCache at the same time as the
* MMU can lead to speculatively fetched and
* possibly stale stack memory being read from
* other caches. This can lead to coherency
* issues.
* --------------------------------------------
*/
mov x0, #DISABLE_DCACHE
bl bl32_plat_enable_mmu
/* ---------------------------------------------
* Give ourselves a stack allocated in Normal
* -IS-WBWA memory
* Enable the Data cache now that the MMU has
* been enabled. The stack has been unwound. It
* will be written first before being read. This
* will invalidate any stale cache lines resi-
* -dent in other caches. We assume that
* interconnect coherency has been enabled for
* this cluster by EL3 firmware.
* ---------------------------------------------
*/
mrs x0, mpidr_el1
bl platform_set_stack
mrs x0, sctlr_el1
orr x0, x0, #SCTLR_C_BIT
msr sctlr_el1, x0
isb
/* ---------------------------------------------
* Enter C runtime to perform any remaining
......
......@@ -76,21 +76,13 @@
* 32-bits wide but are stored as 64-bit values for convenience
******************************************************************************/
#define CTX_EL3STATE_OFFSET (CTX_GPREGS_OFFSET + CTX_GPREGS_END)
#define CTX_VBAR_EL3 0x0 /* Currently unused */
#define CTX_SCR_EL3 0x0
#define CTX_RUNTIME_SP 0x8
#define CTX_SPSR_EL3 0x10
#define CTX_ELR_EL3 0x18
#define CTX_SCR_EL3 0x20
#define CTX_SCTLR_EL3 0x28
#define CTX_CPTR_EL3 0x30
/* Unused space to allow registers to be stored as pairs */
#define CTX_CNTFRQ_EL0 0x40
#define CTX_MAIR_EL3 0x48
#define CTX_TCR_EL3 0x50
#define CTX_TTBR0_EL3 0x58
#define CTX_DAIF_EL3 0x60
/* Unused space to honour alignment requirements */
#define CTX_EL3STATE_END 0x70
#define CTX_CPTR_EL3 0x20
#define CTX_CNTFRQ_EL0 0x28
#define CTX_EL3STATE_END 0x30
/*******************************************************************************
* Constants that allow assembler code to access members of and the
......
......@@ -44,7 +44,6 @@ static int psci_afflvl0_off(aff_map_node_t *cpu_node)
{
unsigned int plat_state;
int rc;
unsigned long sctlr;
assert(cpu_node->level == MPIDR_AFFLVL0);
......@@ -70,24 +69,8 @@ static int psci_afflvl0_off(aff_map_node_t *cpu_node)
/*
* Arch. management. Perform the necessary steps to flush all
* cpu caches.
*
* TODO: This power down sequence varies across cpus so it needs to be
* abstracted out on the basis of the MIDR like in cpu_reset_handler().
* Do the bare minimal for the time being. Fix this before porting to
* Cortex models.
*/
sctlr = read_sctlr_el3();
sctlr &= ~SCTLR_C_BIT;
write_sctlr_el3(sctlr);
isb(); /* ensure MMU disable takes immediate effect */
/*
* CAUTION: This flush to the level of unification makes an assumption
* about the cache hierarchy at affinity level 0 (cpu) in the platform.
* Ideally the platform should tell psci which levels to flush to exit
* coherency.
*/
dcsw_op_louis(DCCISW);
psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
/*
* Plat. management: Perform platform specific actions to turn this
......
......@@ -359,9 +359,9 @@ static unsigned int psci_afflvl0_on_finish(aff_map_node_t *cpu_node)
}
/*
* Arch. management: Turn on mmu & restore architectural state
* Arch. management: Enable data cache and manage stack memory
*/
bl31_plat_enable_mmu(0);
psci_do_pwrup_cache_maintenance();
/*
* All the platform specific actions for turning this cpu
......
......@@ -126,8 +126,7 @@ static int psci_afflvl0_suspend(aff_map_node_t *cpu_node,
unsigned int power_state)
{
unsigned int plat_state;
unsigned long psci_entrypoint, sctlr;
el3_state_t *saved_el3_state;
unsigned long psci_entrypoint;
uint32_t ns_scr_el3 = read_scr_el3();
uint32_t ns_sctlr_el1 = read_sctlr_el1();
int rc;
......@@ -170,37 +169,14 @@ static int psci_afflvl0_suspend(aff_map_node_t *cpu_node,
*/
cm_el3_sysregs_context_save(NON_SECURE);
/*
* The EL3 state to PoC since it will be accessed after a
* reset with the caches turned off
*/
saved_el3_state = get_el3state_ctx(cm_get_context(NON_SECURE));
flush_dcache_range((uint64_t) saved_el3_state, sizeof(*saved_el3_state));
/* Set the secure world (EL3) re-entry point after BL1 */
psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
/*
* Arch. management. Perform the necessary steps to flush all
* cpu caches.
*
* TODO: This power down sequence varies across cpus so it needs to be
* abstracted out on the basis of the MIDR like in cpu_reset_handler().
* Do the bare minimal for the time being. Fix this before porting to
* Cortex models.
*/
sctlr = read_sctlr_el3();
sctlr &= ~SCTLR_C_BIT;
write_sctlr_el3(sctlr);
isb(); /* ensure MMU disable takes immediate effect */
/*
* CAUTION: This flush to the level of unification makes an assumption
* about the cache hierarchy at affinity level 0 (cpu) in the platform.
* Ideally the platform should tell psci which levels to flush to exit
* coherency.
*/
dcsw_op_louis(DCCISW);
psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
/*
* Plat. management: Allow the platform to perform the
......@@ -467,9 +443,11 @@ static unsigned int psci_afflvl0_suspend_finish(aff_map_node_t *cpu_node)
/* Get the index for restoring the re-entry information */
/*
* Arch. management: Restore the stashed EL3 architectural
* context from the 'cpu_context' structure for this cpu.
* Arch. management: Enable the data cache, manage stack memory and
* restore the stashed EL3 architectural context from the 'cpu_context'
* structure for this cpu.
*/
psci_do_pwrup_cache_maintenance();
cm_el3_sysregs_context_restore(NON_SECURE);
/*
......@@ -575,4 +553,3 @@ const afflvl_power_on_finisher_t psci_afflvl_suspend_finishers[] = {
psci_afflvl1_suspend_finish,
psci_afflvl2_suspend_finish,
};
......@@ -31,6 +31,7 @@
#include <arch.h>
#include <asm_macros.S>
#include <psci.h>
#include <xlat_tables.h>
.globl psci_aff_on_finish_entry
.globl psci_aff_suspend_finish_entry
......@@ -78,8 +79,34 @@ psci_aff_common_finish_entry:
*/
msr spsel, #0
/* --------------------------------------------
* Give ourselves a stack whose memory will be
* marked as Normal-IS-WBWA when the MMU is
* enabled.
* --------------------------------------------
*/
mrs x0, mpidr_el1
bl platform_set_coherent_stack
bl platform_set_stack
/* --------------------------------------------
* Enable the MMU with the DCache disabled. It
* is safe to use stacks allocated in normal
* memory as a result. All memory accesses are
* marked nGnRnE when the MMU is disabled. So
* all the stack writes will make it to memory.
* All memory accesses are marked Non-cacheable
* when the MMU is enabled but D$ is disabled.
* So used stack memory is guaranteed to be
* visible immediately after the MMU is enabled
* Enabling the DCache at the same time as the
* MMU can lead to speculatively fetched and
* possibly stale stack memory being read from
* other caches. This can lead to coherency
* issues.
* --------------------------------------------
*/
mov x0, #DISABLE_DCACHE
bl bl31_plat_enable_mmu
/* ---------------------------------------------
* Call the finishers starting from affinity
......@@ -95,60 +122,10 @@ psci_aff_common_finish_entry:
mov x0, #MPIDR_AFFLVL0
bl psci_afflvl_power_on_finish
/* --------------------------------------------
* Give ourselves a stack allocated in Normal
* -IS-WBWA memory
* --------------------------------------------
*/
mrs x0, mpidr_el1
bl platform_set_stack
b el3_exit
_panic:
b _panic
/* -----------------------------------------------------
* The following two stubs give the calling cpu a
* coherent stack to allow flushing of caches without
* suffering from stack coherency issues
* -----------------------------------------------------
*/
func __psci_cpu_off
func_prologue
sub sp, sp, #0x10
stp x19, x20, [sp, #0]
mov x19, sp
mrs x0, mpidr_el1
bl platform_set_coherent_stack
bl psci_cpu_off
mov sp, x19
ldp x19, x20, [sp,#0]
add sp, sp, #0x10
func_epilogue
ret
func __psci_cpu_suspend
func_prologue
sub sp, sp, #0x20
stp x19, x20, [sp, #0]
stp x21, x22, [sp, #0x10]
mov x19, sp
mov x20, x0
mov x21, x1
mov x22, x2
mrs x0, mpidr_el1
bl platform_set_coherent_stack
mov x0, x20
mov x1, x21
mov x2, x22
bl psci_cpu_suspend
mov sp, x19
ldp x21, x22, [sp,#0x10]
ldp x19, x20, [sp,#0]
add sp, sp, #0x20
func_epilogue
ret
/* --------------------------------------------
* This function is called to indicate to the
* power controller that it is safe to power
......
/*
* Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of ARM nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <arch.h>
#include <asm_macros.S>
#include <platform_def.h>
.globl psci_do_pwrdown_cache_maintenance
.globl psci_do_pwrup_cache_maintenance
/* -----------------------------------------------------------------------
* void psci_do_pwrdown_cache_maintenance(uint32_t affinity level);
*
* This function performs cache maintenance before this cpu is powered
* off. The levels of cache affected are determined by the affinity level
* which is passed as the argument. Additionally, this function also
* ensures that stack memory is correctly flushed out to avoid coherency
* issues due to a change in its memory attributes after the data cache
* is disabled.
* -----------------------------------------------------------------------
*/
func psci_do_pwrdown_cache_maintenance
stp x29, x30, [sp,#-16]!
stp x19, x20, [sp,#-16]!
/* ---------------------------------------------
* Disable the Data Cache.
* ---------------------------------------------
*/
mrs x1, sctlr_el3
bic x1, x1, #SCTLR_C_BIT
msr sctlr_el3, x1
isb
/* ---------------------------------------------
* Determine to how many levels of cache will be
* subject to cache maintenance. Affinity level
* 0 implies that only the cpu is being powered
* down. Only the L1 data cache needs to be
* flushed to the PoU in this case. For a higher
* affinity level we are assuming that a flush
* of L1 data and L2 unified cache is enough.
* This information should be provided by the
* platform.
* ---------------------------------------------
*/
cmp x0, #MPIDR_AFFLVL0
mov x0, #DCCISW
b.ne flush_caches_to_poc
/* ---------------------------------------------
* Flush L1 cache to PoU.
* ---------------------------------------------
*/
bl dcsw_op_louis
b do_stack_maintenance
/* ---------------------------------------------
* Flush L1 and L2 caches to PoC.
* ---------------------------------------------
*/
flush_caches_to_poc:
bl dcsw_op_all
/* ---------------------------------------------
* TODO: Intra-cluster coherency should be
* turned off here once cpu-specific
* abstractions are in place.
* ---------------------------------------------
*/
/* ---------------------------------------------
* Do stack maintenance by flushing the used
* stack to the main memory and invalidating the
* remainder.
* ---------------------------------------------
*/
do_stack_maintenance:
mrs x0, mpidr_el1
bl platform_get_stack
/* ---------------------------------------------
* Calculate and store the size of the used
* stack memory in x1.
* ---------------------------------------------
*/
mov x19, x0
mov x1, sp
sub x1, x0, x1
mov x0, sp
bl flush_dcache_range
/* ---------------------------------------------
* Calculate and store the size of the unused
* stack memory in x1. Calculate and store the
* stack base address in x0.
* ---------------------------------------------
*/
sub x0, x19, #PLATFORM_STACK_SIZE
sub x1, sp, x0
bl inv_dcache_range
ldp x19, x20, [sp], #16
ldp x29, x30, [sp], #16
ret
/* -----------------------------------------------------------------------
* void psci_do_pwrup_cache_maintenance(void);
*
* This function performs cache maintenance after this cpu is powered up.
* Currently, this involves managing the used stack memory before turning
* on the data cache.
* -----------------------------------------------------------------------
*/
func psci_do_pwrup_cache_maintenance
stp x29, x30, [sp,#-16]!
/* ---------------------------------------------
* Ensure any inflight stack writes have made it
* to main memory.
* ---------------------------------------------
*/
dmb st
/* ---------------------------------------------
* Calculate and store the size of the used
* stack memory in x1. Calculate and store the
* stack base address in x0.
* ---------------------------------------------
*/
mrs x0, mpidr_el1
bl platform_get_stack
mov x1, sp
sub x1, x0, x1
mov x0, sp
bl inv_dcache_range
/* ---------------------------------------------
* Enable the data cache.
* ---------------------------------------------
*/
mrs x0, sctlr_el3
orr x0, x0, #SCTLR_C_BIT
msr sctlr_el3, x0
isb
ldp x29, x30, [sp], #16
ret
......@@ -230,10 +230,10 @@ uint64_t psci_smc_handler(uint32_t smc_fid,
SMC_RET1(handle, psci_version());
case PSCI_CPU_OFF:
SMC_RET1(handle, __psci_cpu_off());
SMC_RET1(handle, psci_cpu_off());
case PSCI_CPU_SUSPEND_AARCH32:
SMC_RET1(handle, __psci_cpu_suspend(x1, x2, x3));
SMC_RET1(handle, psci_cpu_suspend(x1, x2, x3));
case PSCI_CPU_ON_AARCH32:
SMC_RET1(handle, psci_cpu_on(x1, x2, x3));
......@@ -258,7 +258,7 @@ uint64_t psci_smc_handler(uint32_t smc_fid,
switch (smc_fid) {
case PSCI_CPU_SUSPEND_AARCH64:
SMC_RET1(handle, __psci_cpu_suspend(x1, x2, x3));
SMC_RET1(handle, psci_cpu_suspend(x1, x2, x3));
case PSCI_CPU_ON_AARCH64:
SMC_RET1(handle, psci_cpu_on(x1, x2, x3));
......
......@@ -128,5 +128,8 @@ int psci_afflvl_suspend(unsigned long,
int);
unsigned int psci_afflvl_suspend_finish(int, int);
/* Private exported functions from psci_helpers.S */
void psci_do_pwrdown_cache_maintenance(uint32_t affinity_level);
void psci_do_pwrup_cache_maintenance(void);
#endif /* __PSCI_PRIVATE_H__ */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment