Commit 510a9de7 authored by davidcunado-arm's avatar davidcunado-arm Committed by GitHub
Browse files

Merge pull request #860 from jeenu-arm/hw-asstd-coh

Patches for platforms with hardware-assisted coherency
parents 28ee754d b0408e87
......@@ -306,6 +306,11 @@ ifeq (${ARCH},aarch32)
endif
endif
# When building for systems with hardware-assisted coherency, there's no need to
# use USE_COHERENT_MEM. Require that USE_COHERENT_MEM must be set to 0 too.
ifeq ($(HW_ASSISTED_COHERENCY)-$(USE_COHERENT_MEM),1-1)
$(error USE_COHERENT_MEM cannot be enabled with HW_ASSISTED_COHERENCY)
endif
################################################################################
# Process platform overrideable behaviour
......@@ -386,6 +391,7 @@ $(eval $(call assert_boolean,ENABLE_PSCI_STAT))
$(eval $(call assert_boolean,ENABLE_RUNTIME_INSTRUMENTATION))
$(eval $(call assert_boolean,ERROR_DEPRECATED))
$(eval $(call assert_boolean,GENERATE_COT))
$(eval $(call assert_boolean,HW_ASSISTED_COHERENCY))
$(eval $(call assert_boolean,LOAD_IMAGE_V2))
$(eval $(call assert_boolean,NS_TIMER_SWITCH))
$(eval $(call assert_boolean,PL011_GENERIC_UART))
......@@ -420,6 +426,7 @@ $(eval $(call add_define,ENABLE_PMF))
$(eval $(call add_define,ENABLE_PSCI_STAT))
$(eval $(call add_define,ENABLE_RUNTIME_INSTRUMENTATION))
$(eval $(call add_define,ERROR_DEPRECATED))
$(eval $(call add_define,HW_ASSISTED_COHERENCY))
$(eval $(call add_define,LOAD_IMAGE_V2))
$(eval $(call add_define,LOG_LEVEL))
$(eval $(call add_define,NS_TIMER_SWITCH))
......
......@@ -180,24 +180,29 @@ func bl31_warm_entrypoint
_init_c_runtime=0 \
_exception_vectors=runtime_exceptions
/* --------------------------------------------
* Enable the MMU with the DCache disabled. It
* is safe to use stacks allocated in normal
* memory as a result. All memory accesses are
* marked nGnRnE when the MMU is disabled. So
* all the stack writes will make it to memory.
* All memory accesses are marked Non-cacheable
* when the MMU is enabled but D$ is disabled.
* So used stack memory is guaranteed to be
* visible immediately after the MMU is enabled
* Enabling the DCache at the same time as the
* MMU can lead to speculatively fetched and
* possibly stale stack memory being read from
* other caches. This can lead to coherency
* issues.
* --------------------------------------------
/*
* We're about to enable MMU and participate in PSCI state coordination.
*
* The PSCI implementation invokes platform routines that enable CPUs to
* participate in coherency. On a system where CPUs are not
* cache-coherent out of reset, having caches enabled until such time
* might lead to coherency issues (resulting from stale data getting
* speculatively fetched, among others). Therefore we keep data caches
* disabled while enabling the MMU, thereby forcing data accesses to
* have non-cacheable, nGnRnE attributes (these will always be coherent
* with main memory).
*
* On systems with hardware-assisted coherency, where CPUs are expected
* to be cache-coherent out of reset without needing explicit software
* intervention, PSCI need not invoke platform routines to enter
* coherency (as CPUs already are); and there's no reason to have caches
* disabled either.
*/
#if HW_ASSISTED_COHERENCY
mov x0, #0
#else
mov x0, #DISABLE_DCACHE
#endif
bl bl31_plat_enable_mmu
bl psci_warmboot_entrypoint
......
......@@ -231,24 +231,27 @@ func sp_min_warm_entrypoint
_init_c_runtime=0 \
_exception_vectors=sp_min_vector_table
/* --------------------------------------------
* Enable the MMU with the DCache disabled. It
* is safe to use stacks allocated in normal
* memory as a result. All memory accesses are
* marked nGnRnE when the MMU is disabled. So
* all the stack writes will make it to memory.
* All memory accesses are marked Non-cacheable
* when the MMU is enabled but D$ is disabled.
* So used stack memory is guaranteed to be
* visible immediately after the MMU is enabled
* Enabling the DCache at the same time as the
* MMU can lead to speculatively fetched and
* possibly stale stack memory being read from
* other caches. This can lead to coherency
* issues.
* --------------------------------------------
/*
* We're about to enable MMU and participate in PSCI state coordination.
*
* The PSCI implementation invokes platform routines that enable CPUs to
* participate in coherency. On a system where CPUs are not
* cache-coherent out of reset, having caches enabled until such time
* might lead to coherency issues (resulting from stale data getting
* speculatively fetched, among others). Therefore we keep data caches
* disabled while enabling the MMU, thereby forcing data accesses to
* have non-cacheable, nGnRnE attributes (these will always be coherent
* with main memory).
*
* On systems where CPUs are cache-coherent out of reset, however, PSCI
* need not invoke platform routines to enter coherency (as CPUs already
* are), and there's no reason to have caches disabled either.
*/
#if HW_ASSISTED_COHERENCY
mov r0, #0
#else
mov r0, #DISABLE_DCACHE
#endif
bl bl32_plat_enable_mmu
bl sp_min_warm_boot
......
......@@ -176,7 +176,9 @@ interfaces are:
* The page tables must be setup and the MMU enabled
* The C runtime environment must be setup and stack initialized
* The Data cache must be enabled prior to invoking any of the PSCI library
interfaces except for `psci_warmboot_entrypoint()`.
interfaces except for `psci_warmboot_entrypoint()`. For
`psci_warmboot_entrypoint()`, if the build option `HW_ASSISTED_COHERENCY`
is enabled however, data caches are expected to be enabled.
Further requirements for each interface can be found in the interface
description.
......@@ -270,11 +272,11 @@ wakes up, it will start execution from the warm reset address.
Return : void
This function performs the warm boot initialization/restoration as mandated by
[PSCI spec]. For AArch32, on wakeup from power down the CPU resets to secure
SVC mode and the EL3 Runtime Software must perform the prerequisite
initializations mentioned at top of this section. This function must be called
with Data cache disabled but with MMU initialized and enabled. The major
actions performed by this function are:
[PSCI spec]. For AArch32, on wakeup from power down the CPU resets to secure SVC
mode and the EL3 Runtime Software must perform the prerequisite initializations
mentioned at top of this section. This function must be called with Data cache
disabled (unless build option `HW_ASSISTED_COHERENCY` is enabled) but with MMU
initialized and enabled. The major actions performed by this function are:
* Invalidates the stack and enables the data cache.
* Initializes architecture and PSCI state coordination.
......
......@@ -328,6 +328,15 @@ performed.
* `HANDLE_EA_EL3_FIRST`: When defined External Aborts and SError Interrupts
will be always trapped in EL3 i.e. in BL31 at runtime.
* `HW_ASSISTED_COHERENCY`: On most ARM systems to-date, platform-specific
software operations are required for CPUs to enter and exit coherency.
However, there exists newer systems where CPUs' entry to and exit from
coherency is managed in hardware. Such systems require software to only
initiate the operations, and the rest is managed in hardware, minimizing
active software management. In such systems, this boolean option enables ARM
Trusted Firmware to carry out build and run-time optimizations during boot
and power management operations. This option defaults to 0.
* `LOAD_IMAGE_V2`: Boolean option to enable support for new version (v2) of
image loading, which provides more flexibility and scalability around what
images are loaded and executed during boot. Default is 0.
......
......@@ -79,7 +79,8 @@ __section("tzfw_coherent_mem")
#endif
;
DEFINE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
/* Lock for PSCI state coordination */
DEFINE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
......@@ -247,6 +248,50 @@ static plat_local_state_t *psci_get_req_local_pwr_states(unsigned int pwrlvl,
return &psci_req_local_pwr_states[pwrlvl - 1][cpu_idx];
}
/*
* psci_non_cpu_pd_nodes can be placed either in normal memory or coherent
* memory.
*
* With !USE_COHERENT_MEM, psci_non_cpu_pd_nodes is placed in normal memory,
* it's accessed by both cached and non-cached participants. To serve the common
* minimum, perform a cache flush before read and after write so that non-cached
* participants operate on latest data in main memory.
*
* When USE_COHERENT_MEM is used, psci_non_cpu_pd_nodes is placed in coherent
* memory. With HW_ASSISTED_COHERENCY, all PSCI participants are cache-coherent.
* In both cases, no cache operations are required.
*/
/*
* Retrieve local state of non-CPU power domain node from a non-cached CPU,
* after any required cache maintenance operation.
*/
static plat_local_state_t get_non_cpu_pd_node_local_state(
unsigned int parent_idx)
{
#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY
flush_dcache_range(
(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
sizeof(psci_non_cpu_pd_nodes[parent_idx]));
#endif
return psci_non_cpu_pd_nodes[parent_idx].local_state;
}
/*
* Update local state of non-CPU power domain node from a cached CPU; perform
* any required cache maintenance operation afterwards.
*/
static void set_non_cpu_pd_node_local_state(unsigned int parent_idx,
plat_local_state_t state)
{
psci_non_cpu_pd_nodes[parent_idx].local_state = state;
#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY
flush_dcache_range(
(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
sizeof(psci_non_cpu_pd_nodes[parent_idx]));
#endif
}
/******************************************************************************
* Helper function to return the current local power state of each power domain
* from the current cpu power domain to its ancestor at the 'end_pwrlvl'. This
......@@ -264,18 +309,7 @@ void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
/* Copy the local power state from node to state_info */
for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
#if !USE_COHERENT_MEM
/*
* If using normal memory for psci_non_cpu_pd_nodes, we need
* to flush before reading the local power state as another
* cpu in the same power domain could have updated it and this
* code runs before caches are enabled.
*/
flush_dcache_range(
(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
sizeof(psci_non_cpu_pd_nodes[parent_idx]));
#endif
pd_state[lvl] = psci_non_cpu_pd_nodes[parent_idx].local_state;
pd_state[lvl] = get_non_cpu_pd_node_local_state(parent_idx);
parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
}
......@@ -299,21 +333,16 @@ static void psci_set_target_local_pwr_states(unsigned int end_pwrlvl,
psci_set_cpu_local_state(pd_state[PSCI_CPU_PWR_LVL]);
/*
* Need to flush as local_state will be accessed with Data Cache
* Need to flush as local_state might be accessed with Data Cache
* disabled during power on
*/
flush_cpu_data(psci_svc_cpu_data.local_state);
psci_flush_cpu_data(psci_svc_cpu_data.local_state);
parent_idx = psci_cpu_pd_nodes[plat_my_core_pos()].parent_node;
/* Copy the local_state from state_info */
for (lvl = 1; lvl <= end_pwrlvl; lvl++) {
psci_non_cpu_pd_nodes[parent_idx].local_state = pd_state[lvl];
#if !USE_COHERENT_MEM
flush_dcache_range(
(uintptr_t)&psci_non_cpu_pd_nodes[parent_idx],
sizeof(psci_non_cpu_pd_nodes[parent_idx]));
#endif
set_non_cpu_pd_node_local_state(parent_idx, pd_state[lvl]);
parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
}
}
......@@ -347,13 +376,8 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl)
/* Reset the local_state to RUN for the non cpu power domains. */
for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
psci_non_cpu_pd_nodes[parent_idx].local_state =
PSCI_LOCAL_STATE_RUN;
#if !USE_COHERENT_MEM
flush_dcache_range(
(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
sizeof(psci_non_cpu_pd_nodes[parent_idx]));
#endif
set_non_cpu_pd_node_local_state(parent_idx,
PSCI_LOCAL_STATE_RUN);
psci_set_req_local_pwr_state(lvl,
cpu_idx,
PSCI_LOCAL_STATE_RUN);
......@@ -364,7 +388,7 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl)
psci_set_aff_info_state(AFF_STATE_ON);
psci_set_cpu_local_state(PSCI_LOCAL_STATE_RUN);
flush_cpu_data(psci_svc_cpu_data);
psci_flush_cpu_data(psci_svc_cpu_data);
}
/******************************************************************************
......@@ -969,3 +993,33 @@ int psci_get_suspend_afflvl(void)
}
#endif
/*******************************************************************************
* Initiate power down sequence, by calling power down operations registered for
* this CPU.
******************************************************************************/
void psci_do_pwrdown_sequence(unsigned int power_level)
{
#if HW_ASSISTED_COHERENCY
/*
* With hardware-assisted coherency, the CPU drivers only initiate the
* power down sequence, without performing cache-maintenance operations
* in software. Data caches and MMU remain enabled both before and after
* this call.
*/
prepare_cpu_pwr_dwn(power_level);
#else
/*
* Without hardware-assisted coherency, the CPU drivers disable data
* caches and MMU, then perform cache-maintenance operations in
* software.
*
* We ought to call prepare_cpu_pwr_dwn() to initiate power down
* sequence. We currently have data caches and MMU enabled, but the
* function will return with data caches and MMU disabled. We must
* ensure that the stack memory is flushed out to memory before we start
* popping from it again.
*/
psci_do_pwrdown_cache_maintenance(power_level);
#endif
}
......@@ -119,10 +119,9 @@ int psci_do_cpu_off(unsigned int end_pwrlvl)
#endif
/*
* Arch. management. Perform the necessary steps to flush all
* cpu caches.
* Arch. management. Initiate power down sequence.
*/
psci_do_pwrdown_cache_maintenance(psci_find_max_off_lvl(&state_info));
psci_do_pwrdown_sequence(psci_find_max_off_lvl(&state_info));
#if ENABLE_RUNTIME_INSTRUMENTATION
PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
......@@ -154,17 +153,17 @@ exit:
*/
if (rc == PSCI_E_SUCCESS) {
/*
* Set the affinity info state to OFF. This writes directly to
* main memory as caches are disabled, so cache maintenance is
* Set the affinity info state to OFF. When caches are disabled,
* this writes directly to main memory, so cache maintenance is
* required to ensure that later cached reads of aff_info_state
* return AFF_STATE_OFF. A dsbish() ensures ordering of the
* update to the affinity info state prior to cache line
* invalidation.
*/
flush_cpu_data(psci_svc_cpu_data.aff_info_state);
psci_flush_cpu_data(psci_svc_cpu_data.aff_info_state);
psci_set_aff_info_state(AFF_STATE_OFF);
dsbish();
inv_cpu_data(psci_svc_cpu_data.aff_info_state);
psci_dsbish();
psci_inv_cpu_data(psci_svc_cpu_data.aff_info_state);
#if ENABLE_RUNTIME_INSTRUMENTATION
......
/*
* Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
* Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......@@ -165,10 +165,12 @@ void psci_cpu_on_finish(unsigned int cpu_idx,
*/
psci_plat_pm_ops->pwr_domain_on_finish(state_info);
#if !HW_ASSISTED_COHERENCY
/*
* Arch. management: Enable data cache and manage stack memory
*/
psci_do_pwrup_cache_maintenance();
#endif
/*
* All the platform specific actions for turning this cpu
......
......@@ -38,17 +38,60 @@
#include <psci.h>
#include <spinlock.h>
#if HW_ASSISTED_COHERENCY
/*
* The following helper macros abstract the interface to the Bakery
* Lock API.
* On systems with hardware-assisted coherency, make PSCI cache operations NOP,
* as PSCI participants are cache-coherent, and there's no need for explicit
* cache maintenance operations or barriers to coordinate their state.
*/
#define psci_lock_init(non_cpu_pd_node, idx) \
((non_cpu_pd_node)[(idx)].lock_index = (idx))
#define psci_flush_dcache_range(addr, size)
#define psci_flush_cpu_data(member)
#define psci_inv_cpu_data(member)
#define psci_dsbish()
/*
* On systems where participant CPUs are cache-coherent, we can use spinlocks
* instead of bakery locks.
*/
#define DEFINE_PSCI_LOCK(_name) spinlock_t _name
#define DECLARE_PSCI_LOCK(_name) extern DEFINE_PSCI_LOCK(_name)
#define psci_lock_get(non_cpu_pd_node) \
spin_lock(&psci_locks[(non_cpu_pd_node)->lock_index])
#define psci_lock_release(non_cpu_pd_node) \
spin_unlock(&psci_locks[(non_cpu_pd_node)->lock_index])
#else
/*
* If not all PSCI participants are cache-coherent, perform cache maintenance
* and issue barriers wherever required to coordinate state.
*/
#define psci_flush_dcache_range(addr, size) flush_dcache_range(addr, size)
#define psci_flush_cpu_data(member) flush_cpu_data(member)
#define psci_inv_cpu_data(member) inv_cpu_data(member)
#define psci_dsbish() dsbish()
/*
* Use bakery locks for state coordination as not all PSCI participants are
* cache coherent.
*/
#define DEFINE_PSCI_LOCK(_name) DEFINE_BAKERY_LOCK(_name)
#define DECLARE_PSCI_LOCK(_name) DECLARE_BAKERY_LOCK(_name)
#define psci_lock_get(non_cpu_pd_node) \
bakery_lock_get(&psci_locks[(non_cpu_pd_node)->lock_index])
#define psci_lock_release(non_cpu_pd_node) \
bakery_lock_release(&psci_locks[(non_cpu_pd_node)->lock_index])
#endif
#define psci_lock_init(non_cpu_pd_node, idx) \
((non_cpu_pd_node)[(idx)].lock_index = (idx))
/*
* The PSCI capability which are provided by the generic code but does not
* depend on the platform or spd capabilities.
......@@ -166,8 +209,8 @@ extern non_cpu_pd_node_t psci_non_cpu_pd_nodes[PSCI_NUM_NON_CPU_PWR_DOMAINS];
extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
extern unsigned int psci_caps;
/* One bakery lock is required for each non-cpu power domain */
DECLARE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
/* One lock is required per non-CPU power domain node */
DECLARE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
/*******************************************************************************
* SPD's power management hooks registered with PSCI
......@@ -204,6 +247,14 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl);
void psci_print_power_domain_map(void);
unsigned int psci_is_last_on_cpu(void);
int psci_spd_migrate_info(u_register_t *mpidr);
void psci_do_pwrdown_sequence(unsigned int power_level);
/*
* CPU power down is directly called only when HW_ASSISTED_COHERENCY is
* available. Otherwise, this needs post-call stack maintenance, which is
* handled in assembly.
*/
void prepare_cpu_pwr_dwn(unsigned int power_level);
/* Private exported functions from psci_on.c */
int psci_cpu_on_start(u_register_t target_cpu,
......
......@@ -86,7 +86,7 @@ static void psci_init_pwr_domain_node(unsigned int node_idx,
/* Set the power state to OFF state */
svc_cpu_data->local_state = PLAT_MAX_OFF_STATE;
flush_dcache_range((uintptr_t)svc_cpu_data,
psci_flush_dcache_range((uintptr_t)svc_cpu_data,
sizeof(*svc_cpu_data));
cm_set_context_by_index(node_idx,
......@@ -242,9 +242,9 @@ int psci_setup(const psci_lib_args_t *lib_args)
/*
* Flush `psci_plat_pm_ops` as it will be accessed by secondary CPUs
* during warm boot before data cache is enabled.
* during warm boot, possibly before data cache is enabled.
*/
flush_dcache_range((uintptr_t)&psci_plat_pm_ops,
psci_flush_dcache_range((uintptr_t)&psci_plat_pm_ops,
sizeof(psci_plat_pm_ops));
/* Initialize the psci capability */
......
......@@ -91,10 +91,10 @@ static void psci_suspend_to_pwrdown_start(unsigned int end_pwrlvl,
psci_set_suspend_pwrlvl(end_pwrlvl);
/*
* Flush the target power level as it will be accessed on power up with
* Flush the target power level as it might be accessed on power up with
* Data cache disabled.
*/
flush_cpu_data(psci_svc_cpu_data.target_pwrlvl);
psci_flush_cpu_data(psci_svc_cpu_data.target_pwrlvl);
/*
* Call the cpu suspend handler registered by the Secure Payload
......@@ -121,13 +121,11 @@ static void psci_suspend_to_pwrdown_start(unsigned int end_pwrlvl,
#endif
/*
* Arch. management. Perform the necessary steps to flush all
* cpu caches. Currently we assume that the power level correspond
* the cache level.
* Arch. management. Initiate power down sequence.
* TODO : Introduce a mechanism to query the cache level to flush
* and the cpu-ops power down to perform from the platform.
*/
psci_do_pwrdown_cache_maintenance(max_off_lvl);
psci_do_pwrdown_sequence(max_off_lvl);
#if ENABLE_RUNTIME_INSTRUMENTATION
PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
......@@ -304,12 +302,10 @@ void psci_cpu_suspend_finish(unsigned int cpu_idx,
*/
psci_plat_pm_ops->pwr_domain_suspend_finish(state_info);
/*
* Arch. management: Enable the data cache, manage stack memory and
* restore the stashed EL3 architectural context from the 'cpu_context'
* structure for this cpu.
*/
#if !HW_ASSISTED_COHERENCY
/* Arch. management: Enable the data cache, stack memory maintenance. */
psci_do_pwrup_cache_maintenance();
#endif
/* Re-init the cntfrq_el0 register */
counter_freq = plat_get_syscnt_freq2();
......
......@@ -105,6 +105,10 @@ FWU_FIP_NAME := fwu_fip.bin
# For Chain of Trust
GENERATE_COT := 0
# Whether system coherency is managed in hardware, without explicit software
# operations.
HW_ASSISTED_COHERENCY := 0
# Flag to enable new version of image loading
LOAD_IMAGE_V2 := 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment