diff --git a/plat/nvidia/tegra/include/drivers/memctrl_v2.h b/plat/nvidia/tegra/include/drivers/memctrl_v2.h
index fe7f7a02133f801b76d71bb128230c3fc847311f..04c0e8d17d2b214742ac8692186158f7b43ab658 100644
--- a/plat/nvidia/tegra/include/drivers/memctrl_v2.h
+++ b/plat/nvidia/tegra/include/drivers/memctrl_v2.h
@@ -31,7 +31,6 @@
 #ifndef __MEMCTRLV2_H__
 #define __MEMCTRLV2_H__
 
-#include <mmio.h>
 #include <tegra_def.h>
 
 /*******************************************************************************
@@ -297,6 +296,8 @@
 #define MC_TXN_OVERRIDE_CGID_TAG_ADR		3
 #define MC_TXN_OVERRIDE_CGID_TAG_MASK		3
 
+#ifndef __ASSEMBLY__
+
 /*******************************************************************************
  * Structure to hold the transaction override settings to use to override
  * client inputs
@@ -347,6 +348,8 @@ typedef struct mc_streamid_security_cfg {
 			.override_enable = OVERRIDE_ ## access \
 		}
 
+#endif /* __ASSMEBLY__ */
+
 /*******************************************************************************
  * TZDRAM carveout configuration registers
  ******************************************************************************/
@@ -545,6 +548,10 @@ typedef struct mc_streamid_security_cfg {
 #define  MC_SMMU_CLIENT_CONFIG5_APEDMAW_UNORDERED	(0 << 0)
 #define  MC_SMMU_CLIENT_CONFIG5_APEDMAW_MASK	(1 << 0)
 
+#ifndef __ASSEMBLY__
+
+#include <mmio.h>
+
 static inline uint32_t tegra_mc_read_32(uint32_t off)
 {
 	return mmio_read_32(TEGRA_MC_BASE + off);
@@ -601,5 +608,6 @@ static inline void tegra_mc_streamid_write_32(uint32_t off, uint32_t val)
 			MC_TXN_OVERRIDE_CONFIG_AXID_OVERRIDE_CGID | \
 			MC_TXN_OVERRIDE_CONFIG_AXID_OVERRIDE_SO_DEV_CGID_SO_DEV_CLIENT); \
 	}
+#endif /* __ASSMEBLY__ */
 
 #endif /* __MEMCTRLV2_H__ */
diff --git a/plat/nvidia/tegra/include/drivers/smmu.h b/plat/nvidia/tegra/include/drivers/smmu.h
index bb08a559f2c3d6971459c6c7b8183ddefdf70019..0867c11ada5bedf05abd2a8297fa0d7560ada88b 100644
--- a/plat/nvidia/tegra/include/drivers/smmu.h
+++ b/plat/nvidia/tegra/include/drivers/smmu.h
@@ -627,6 +627,6 @@ static inline void tegra_smmu_write_32(uint32_t off, uint32_t val)
 }
 
 void tegra_smmu_init(void);
-void tegra_smmu_save_context(void);
+void tegra_smmu_save_context(uint64_t smmu_ctx_addr);
 
 #endif /*__SMMU_H */
diff --git a/plat/nvidia/tegra/include/tegra_private.h b/plat/nvidia/tegra/include/tegra_private.h
index 012bfd77bce81645768f01e91e611573b94507b2..39006f6faf24d6e954334490da9e8e05846b2e57 100644
--- a/plat/nvidia/tegra/include/tegra_private.h
+++ b/plat/nvidia/tegra/include/tegra_private.h
@@ -119,4 +119,7 @@ void plat_early_platform_setup(void);
 /* Declarations for tegra_delay_timer.c */
 void tegra_delay_timer_init(void);
 
+void tegra_secure_entrypoint(void);
+void tegra186_cpu_reset_handler(void);
+
 #endif /* __TEGRA_PRIVATE_H__ */
diff --git a/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c b/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c
index 2940f5837cd402755edea46fa56db77ba0827f80..7f4589595e1248625bb81403f8a189f069cbbe9a 100644
--- a/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c
+++ b/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c
@@ -31,7 +31,10 @@
 #include <assert.h>
 #include <debug.h>
 #include <memctrl_v2.h>
+#include <platform_def.h>
 #include <smmu.h>
+#include <string.h>
+#include <tegra_private.h>
 
 typedef struct smmu_regs {
 	uint32_t reg;
@@ -133,7 +136,7 @@ typedef struct smmu_regs {
 		.val = 0xFFFFFFFF, \
 	}
 
-static smmu_regs_t smmu_ctx_regs[] = {
+static __attribute__((aligned(16))) smmu_regs_t smmu_ctx_regs[] = {
 	_START_OF_TABLE_,
 	mc_make_sid_security_cfg(SCEW),
 	mc_make_sid_security_cfg(AFIR),
@@ -421,12 +424,15 @@ static smmu_regs_t smmu_ctx_regs[] = {
 };
 
 /*
- * Save SMMU settings before "System Suspend"
+ * Save SMMU settings before "System Suspend" to TZDRAM
  */
-void tegra_smmu_save_context(void)
+void tegra_smmu_save_context(uint64_t smmu_ctx_addr)
 {
 	uint32_t i;
 #if DEBUG
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	uint64_t tzdram_base = params_from_bl2->tzdram_base;
+	uint64_t tzdram_end = tzdram_base + params_from_bl2->tzdram_size;
 	uint32_t reg_id1, pgshift, cb_size;
 
 	/* sanity check SMMU settings c*/
@@ -438,6 +444,8 @@ void tegra_smmu_save_context(void)
 	assert(!((pgshift != PGSHIFT) || (cb_size != CB_SIZE)));
 #endif
 
+	assert((smmu_ctx_addr >= tzdram_base) && (smmu_ctx_addr <= tzdram_end));
+
 	/* index of _END_OF_TABLE_ */
 	smmu_ctx_regs[0].val = ARRAY_SIZE(smmu_ctx_regs) - 1;
 
@@ -445,11 +453,15 @@ void tegra_smmu_save_context(void)
 	for (i = 1; i < ARRAY_SIZE(smmu_ctx_regs) - 1; i++)
 		smmu_ctx_regs[i].val = mmio_read_32(smmu_ctx_regs[i].reg);
 
+	/* Save SMMU config settings */
+	memcpy16((void *)(uintptr_t)smmu_ctx_addr, (void *)smmu_ctx_regs,
+		 sizeof(smmu_ctx_regs));
+
 	/* save the SMMU table address */
 	mmio_write_32(TEGRA_SCRATCH_BASE + SECURE_SCRATCH_RSV11_LO,
-		(uint32_t)(unsigned long)smmu_ctx_regs);
+		(uint32_t)smmu_ctx_addr);
 	mmio_write_32(TEGRA_SCRATCH_BASE + SECURE_SCRATCH_RSV11_HI,
-		(uint32_t)(((unsigned long)smmu_ctx_regs) >> 32));
+		(uint32_t)(smmu_ctx_addr >> 32));
 }
 
 /*
diff --git a/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c b/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c
index 7e35cc6b29070dcf92c4c6cf7e43852fede59476..7c6868c7b07032fba173f0d0534e4e182bd1ebe0 100644
--- a/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c
+++ b/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c
@@ -39,10 +39,17 @@
 #include <mce.h>
 #include <psci.h>
 #include <smmu.h>
+#include <string.h>
 #include <t18x_ari.h>
 #include <tegra_private.h>
 
 extern void prepare_cpu_pwr_dwn(void);
+extern void tegra186_cpu_reset_handler(void);
+extern uint32_t __tegra186_cpu_reset_handler_data,
+		__tegra186_cpu_reset_handler_end;
+
+/* TZDRAM offset for saving SMMU context */
+#define TEGRA186_SMMU_CTX_OFFSET	16
 
 /* state id mask */
 #define TEGRA186_STATE_ID_MASK		0xF
@@ -50,7 +57,7 @@ extern void prepare_cpu_pwr_dwn(void);
 #define TEGRA186_WAKE_TIME_MASK		0xFFFFFF
 #define TEGRA186_WAKE_TIME_SHIFT	4
 /* context size to save during system suspend */
-#define TEGRA186_SE_CONTEXT_SIZE		3
+#define TEGRA186_SE_CONTEXT_SIZE	3
 
 static uint32_t se_regs[TEGRA186_SE_CONTEXT_SIZE];
 static unsigned int wake_time[PLATFORM_CORE_COUNT];
@@ -98,6 +105,8 @@ int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 	int impl = (read_midr() >> MIDR_IMPL_SHIFT) & MIDR_IMPL_MASK;
 	cpu_context_t *ctx = cm_get_context(NON_SECURE);
 	gp_regs_t *gp_regs = get_gpregs_ctx(ctx);
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	uint64_t smmu_ctx_base;
 	uint32_t val;
 
 	assert(ctx);
@@ -147,8 +156,12 @@ int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 		val = mmio_read_32(TEGRA_MISC_BASE + MISCREG_PFCFG);
 		mmio_write_32(TEGRA_SCRATCH_BASE + SECURE_SCRATCH_RSV6, val);
 
-		/* save SMMU context */
-		tegra_smmu_save_context();
+		/* save SMMU context to TZDRAM */
+		smmu_ctx_base = params_from_bl2->tzdram_base +
+			((uintptr_t)&__tegra186_cpu_reset_handler_data -
+			 (uintptr_t)tegra186_cpu_reset_handler) +
+			TEGRA186_SMMU_CTX_OFFSET;
+		tegra_smmu_save_context((uintptr_t)smmu_ctx_base);
 
 		/* Prepare for system suspend */
 		write_ctx_reg(gp_regs, CTX_GPREG_X4, 1);
@@ -157,7 +170,7 @@ int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 		(void)mce_command_handler(MCE_CMD_UPDATE_CSTATE_INFO,
 			TEGRA_ARI_CLUSTER_CC7, 0, TEGRA_ARI_SYSTEM_SC7);
 
-		/* Enter system suspend state */
+		/* Instruct the MCE to enter system suspend state */
 		(void)mce_command_handler(MCE_CMD_ENTER_CSTATE,
 			TEGRA_ARI_CORE_C7, MCE_CORE_SLEEP_TIME_INFINITE, 0);
 
@@ -169,6 +182,31 @@ int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 	return PSCI_E_SUCCESS;
 }
 
+int tegra_soc_pwr_domain_power_down_wfi(const psci_power_state_t *target_state)
+{
+	const plat_local_state_t *pwr_domain_state =
+		target_state->pwr_domain_state;
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	unsigned int stateid_afflvl2 = pwr_domain_state[PLAT_MAX_PWR_LVL] &
+		TEGRA186_STATE_ID_MASK;
+	uint32_t val;
+
+	if (stateid_afflvl2 == PSTATE_ID_SOC_POWERDN) {
+		/*
+		 * The TZRAM loses power when we enter system suspend. To
+		 * allow graceful exit from system suspend, we need to copy
+		 * BL3-1 over to TZDRAM.
+		 */
+		val = params_from_bl2->tzdram_base +
+			((uintptr_t)&__tegra186_cpu_reset_handler_end -
+			 (uintptr_t)tegra186_cpu_reset_handler);
+		memcpy16((void *)(uintptr_t)val, (void *)(uintptr_t)BL31_BASE,
+			 (uintptr_t)&__BL31_END__ - (uintptr_t)BL31_BASE);
+	}
+
+	return PSCI_E_SUCCESS;
+}
+
 int tegra_soc_pwr_domain_on(u_register_t mpidr)
 {
 	int target_cpu = mpidr & MPIDR_CPU_MASK;
@@ -191,6 +229,8 @@ int tegra_soc_pwr_domain_on(u_register_t mpidr)
 int tegra_soc_pwr_domain_on_finish(const psci_power_state_t *target_state)
 {
 	int state_id = target_state->pwr_domain_state[PLAT_MAX_PWR_LVL];
+	cpu_context_t *ctx = cm_get_context(NON_SECURE);
+	gp_regs_t *gp_regs = get_gpregs_ctx(ctx);
 
 	/*
 	 * Check if we are exiting from deep sleep and restore SE
@@ -206,6 +246,17 @@ int tegra_soc_pwr_domain_on_finish(const psci_power_state_t *target_state)
 
 		/* Init SMMU */
 		tegra_smmu_init();
+
+		/*
+		 * Reset power state info for the last core doing SC7 entry and exit,
+		 * we set deepest power state as CC7 and SC7 for SC7 entry which
+		 * may not be requested by non-secure SW which controls idle states.
+		 */
+		write_ctx_reg(gp_regs, CTX_GPREG_X4, 0);
+		write_ctx_reg(gp_regs, CTX_GPREG_X5, 0);
+		write_ctx_reg(gp_regs, CTX_GPREG_X6, 1);
+		(void)mce_command_handler(MCE_CMD_UPDATE_CSTATE_INFO,
+			TEGRA_ARI_CLUSTER_CC7, 0, TEGRA_ARI_SYSTEM_SC1);
 	}
 
 	return PSCI_E_SUCCESS;
diff --git a/plat/nvidia/tegra/soc/t186/plat_secondary.c b/plat/nvidia/tegra/soc/t186/plat_secondary.c
index df802891a9be45bfb371922364a5860a4a21fd59..406c1e08d4d12462818393493c44072559599cb5 100644
--- a/plat/nvidia/tegra/soc/t186/plat_secondary.c
+++ b/plat/nvidia/tegra/soc/t186/plat_secondary.c
@@ -28,10 +28,13 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <arch_helpers.h>
 #include <debug.h>
 #include <mce.h>
 #include <mmio.h>
+#include <string.h>
 #include <tegra_def.h>
+#include <tegra_private.h>
 
 #define MISCREG_CPU_RESET_VECTOR	0x2000
 #define MISCREG_AA64_RST_LOW		0x2004
@@ -42,7 +45,8 @@
 
 #define CPU_RESET_MODE_AA64		1
 
-extern void tegra_secure_entrypoint(void);
+extern uint64_t tegra_bl31_phys_base;
+extern uint64_t __tegra186_cpu_reset_handler_end;
 
 /*******************************************************************************
  * Setup secondary CPU vectors
@@ -50,12 +54,31 @@ extern void tegra_secure_entrypoint(void);
 void plat_secondary_setup(void)
 {
 	uint32_t addr_low, addr_high;
-	uint64_t reset_addr = (uint64_t)tegra_secure_entrypoint;
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	uint64_t cpu_reset_handler_base;
 
 	INFO("Setting up secondary CPU boot\n");
 
-	addr_low = (uint32_t)reset_addr | CPU_RESET_MODE_AA64;
-	addr_high = (uint32_t)((reset_addr >> 32) & 0x7ff);
+	if ((tegra_bl31_phys_base >= TEGRA_TZRAM_BASE) &&
+	    (tegra_bl31_phys_base <= (TEGRA_TZRAM_BASE + TEGRA_TZRAM_SIZE))) {
+
+		/*
+		 * The BL31 code resides in the TZSRAM which loses state
+		 * when we enter System Suspend. Copy the wakeup trampoline
+		 * code to TZDRAM to help us exit from System Suspend.
+		 */
+		cpu_reset_handler_base = params_from_bl2->tzdram_base;
+		memcpy16((void *)((uintptr_t)cpu_reset_handler_base),
+			 (void *)(uintptr_t)tegra186_cpu_reset_handler,
+			 (uintptr_t)&__tegra186_cpu_reset_handler_end -
+			 (uintptr_t)tegra186_cpu_reset_handler);
+
+	} else {
+		cpu_reset_handler_base = (uintptr_t)tegra_secure_entrypoint;
+	}
+
+	addr_low = (uint32_t)cpu_reset_handler_base | CPU_RESET_MODE_AA64;
+	addr_high = (uint32_t)((cpu_reset_handler_base >> 32) & 0x7ff);
 
 	/* write lower 32 bits first, then the upper 11 bits */
 	mmio_write_32(TEGRA_MISC_BASE + MISCREG_AA64_RST_LOW, addr_low);
diff --git a/plat/nvidia/tegra/soc/t186/plat_trampoline.S b/plat/nvidia/tegra/soc/t186/plat_trampoline.S
new file mode 100644
index 0000000000000000000000000000000000000000..5e0a9d7e5e01a6c7b35bc3f342b8a092050abebd
--- /dev/null
+++ b/plat/nvidia/tegra/soc/t186/plat_trampoline.S
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <memctrl_v2.h>
+#include <tegra_def.h>
+
+#define TEGRA186_SMMU_CTX_SIZE		0x420
+
+	.align 4
+	.globl	tegra186_cpu_reset_handler
+
+/* CPU reset handler routine */
+func tegra186_cpu_reset_handler
+	/*
+	 * The Memory Controller loses state during System Suspend. We
+	 * use this information to decide if the reset handler is running
+	 * after a System Suspend. Resume from system suspend requires
+	 * restoring the entire state from TZDRAM to TZRAM.
+	 */
+	mov	x1, #TEGRA_MC_BASE
+	ldr	w0, [x1, #MC_SECURITY_CFG3_0]
+	lsl	x0, x0, #32
+	ldr	w0, [x1, #MC_SECURITY_CFG0_0]
+	adr	x1, tegra186_cpu_reset_handler
+	cmp	x0, x1
+	beq	boot_cpu
+
+	/* resume from system suspend */
+	mov	x0, #BL31_BASE
+	adr	x1, __tegra186_cpu_reset_handler_end
+	adr	x2, __tegra186_cpu_reset_handler_data
+	ldr	x2, [x2, #8]
+
+	/* memcpy16 */
+m_loop16:
+	cmp	x2, #16
+	b.lt	m_loop1
+	ldp	x3, x4, [x1], #16
+	stp	x3, x4, [x0], #16
+	sub	x2, x2, #16
+	b	m_loop16
+	/* copy byte per byte */
+m_loop1:
+	cbz	x2, boot_cpu
+	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	m_loop1
+
+boot_cpu:
+	adr	x0, __tegra186_cpu_reset_handler_data
+	ldr	x0, [x0]
+	br	x0
+endfunc tegra186_cpu_reset_handler
+
+	/*
+	 * Tegra186 reset data (offset 0x0 - 0x430)
+	 *
+	 * 0x000: secure world's entrypoint
+	 * 0x008: BL31 size (RO + RW)
+	 * 0x00C: SMMU context start
+	 * 0x42C: SMMU context end
+	 */
+
+	.align 4
+	.type	__tegra186_cpu_reset_handler_data, %object
+	.globl	__tegra186_cpu_reset_handler_data
+__tegra186_cpu_reset_handler_data:
+	.quad	tegra_secure_entrypoint
+	.quad	__BL31_END__ - BL31_BASE
+	.rept	TEGRA186_SMMU_CTX_SIZE
+	.quad	0
+	.endr
+	.size	__tegra186_cpu_reset_handler_data, \
+		. - __tegra186_cpu_reset_handler_data
+
+	.align 4
+	.globl	__tegra186_cpu_reset_handler_end
+__tegra186_cpu_reset_handler_end:
diff --git a/plat/nvidia/tegra/soc/t186/platform_t186.mk b/plat/nvidia/tegra/soc/t186/platform_t186.mk
index b8eaa7a6f87a3c18178392fc45c541b3e5c76419..0387a0a894851ef99c80108f3374d9ed90b29588 100644
--- a/plat/nvidia/tegra/soc/t186/platform_t186.mk
+++ b/plat/nvidia/tegra/soc/t186/platform_t186.mk
@@ -29,9 +29,6 @@
 #
 
 # platform configs
-ENABLE_NS_L2_CPUECTRL_RW_ACCESS		:= 1
-$(eval $(call add_define,ENABLE_NS_L2_CPUECTRL_RW_ACCESS))
-
 ENABLE_ROC_FOR_ORDERING_CLIENT_REQUESTS	:= 1
 $(eval $(call add_define,ENABLE_ROC_FOR_ORDERING_CLIENT_REQUESTS))
 
@@ -45,10 +42,10 @@ $(eval $(call add_define,PLATFORM_CLUSTER_COUNT))
 PLATFORM_MAX_CPUS_PER_CLUSTER		:= 4
 $(eval $(call add_define,PLATFORM_MAX_CPUS_PER_CLUSTER))
 
-MAX_XLAT_TABLES				:= 16
+MAX_XLAT_TABLES				:= 20
 $(eval $(call add_define,MAX_XLAT_TABLES))
 
-MAX_MMAP_REGIONS			:= 16
+MAX_MMAP_REGIONS			:= 20
 $(eval $(call add_define,MAX_MMAP_REGIONS))
 
 # platform files
@@ -65,4 +62,5 @@ BL31_SOURCES		+=	lib/cpus/aarch64/denver.S		\
 				${SOC_DIR}/plat_psci_handlers.c		\
 				${SOC_DIR}/plat_setup.c			\
 				${SOC_DIR}/plat_secondary.c		\
-				${SOC_DIR}/plat_sip_calls.c
+				${SOC_DIR}/plat_sip_calls.c		\
+				${SOC_DIR}/plat_trampoline.S