diff --git a/plat/nvidia/tegra/common/drivers/bpmp/bpmp.c b/plat/nvidia/tegra/common/drivers/bpmp/bpmp.c
new file mode 100644
index 0000000000000000000000000000000000000000..c78b91236855e6991f7d0c8fecd2488c5934791c
--- /dev/null
+++ b/plat/nvidia/tegra/common/drivers/bpmp/bpmp.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bpmp.h>
+#include <common/debug.h>
+#include <delay_timer.h>
+#include <errno.h>
+#include <mmio.h>
+#include <platform.h>
+#include <stdbool.h>
+#include <string.h>
+#include <tegra_def.h>
+
+#define BPMP_TIMEOUT_10US	10
+
+static uint32_t channel_base[NR_CHANNELS];
+static uint32_t bpmp_init_state = BPMP_INIT_PENDING;
+
+static uint32_t channel_field(unsigned int ch)
+{
+	return mmio_read_32(TEGRA_RES_SEMA_BASE + STA_OFFSET) & CH_MASK(ch);
+}
+
+static bool master_free(unsigned int ch)
+{
+	return channel_field(ch) == MA_FREE(ch);
+}
+
+static bool master_acked(unsigned int ch)
+{
+	return channel_field(ch) == MA_ACKD(ch);
+}
+
+static void signal_slave(unsigned int ch)
+{
+	mmio_write_32(TEGRA_RES_SEMA_BASE + CLR_OFFSET, CH_MASK(ch));
+}
+
+static void free_master(unsigned int ch)
+{
+	mmio_write_32(TEGRA_RES_SEMA_BASE + CLR_OFFSET,
+		      MA_ACKD(ch) ^ MA_FREE(ch));
+}
+
+/* should be called with local irqs disabled */
+int32_t tegra_bpmp_send_receive_atomic(int mrq, const void *ob_data, int ob_sz,
+		void *ib_data, int ib_sz)
+{
+	unsigned int ch = (unsigned int)plat_my_core_pos();
+	mb_data_t *p = (mb_data_t *)(uintptr_t)channel_base[ch];
+	int32_t ret = -ETIMEDOUT, timeout = 0;
+
+	if (bpmp_init_state == BPMP_INIT_COMPLETE) {
+
+		/* loop until BPMP is free */
+		for (timeout = 0; timeout < BPMP_TIMEOUT_10US; timeout++) {
+			if (master_free(ch) == true) {
+				break;
+			}
+
+			udelay(1);
+		}
+
+		if (timeout != BPMP_TIMEOUT_10US) {
+
+			/* generate the command struct */
+			p->code = mrq;
+			p->flags = DO_ACK;
+			(void)memcpy((void *)p->data, ob_data, (size_t)ob_sz);
+
+			/* signal command ready to the BPMP */
+			signal_slave(ch);
+			mmio_write_32(TEGRA_PRI_ICTLR_BASE + CPU_IEP_FIR_SET,
+				      (1UL << INT_SHR_SEM_OUTBOX_FULL));
+
+			/* loop until the command is executed */
+			for (timeout = 0; timeout < BPMP_TIMEOUT_10US; timeout++) {
+				if (master_acked(ch) == true) {
+					break;
+				}
+
+				udelay(1);
+			}
+
+			if (timeout != BPMP_TIMEOUT_10US) {
+
+				/* get the command response */
+				(void)memcpy(ib_data, (const void *)p->data,
+					     (size_t)ib_sz);
+
+				/* return error code */
+				ret = p->code;
+
+				/* free this channel */
+				free_master(ch);
+			}
+		}
+
+	} else {
+		/* return error code */
+		ret = -EINVAL;
+	}
+
+	if (timeout == BPMP_TIMEOUT_10US) {
+		ERROR("Timed out waiting for bpmp's response");
+	}
+
+	return ret;
+}
+
+int tegra_bpmp_init(void)
+{
+	uint32_t val, base;
+	unsigned int ch;
+	int ret = 0;
+
+	if (bpmp_init_state != BPMP_INIT_COMPLETE) {
+
+		/* check if the bpmp processor is alive. */
+		val = mmio_read_32(TEGRA_RES_SEMA_BASE + STA_OFFSET);
+		if (val != SIGN_OF_LIFE) {
+			ERROR("BPMP precessor not available\n");
+			ret = -ENOTSUP;
+		}
+
+		/* check if clock for the atomics block is enabled */
+		val = mmio_read_32(TEGRA_CAR_RESET_BASE + TEGRA_CLK_ENB_V);
+		if ((val & CAR_ENABLE_ATOMICS) == 0) {
+			ERROR("Clock to the atomics block is disabled\n");
+		}
+
+		/* check if the atomics block is out of reset */
+		val = mmio_read_32(TEGRA_CAR_RESET_BASE + TEGRA_RST_DEV_CLR_V);
+		if ((val & CAR_ENABLE_ATOMICS) == 0) {
+			ERROR("Reset to the atomics block is asserted\n");
+		}
+
+		/* base address to get the result from Atomics */
+		base = TEGRA_ATOMICS_BASE + RESULT0_REG_OFFSET;
+
+		/* channel area is setup by BPMP before signaling handshake */
+		for (ch = 0; ch < NR_CHANNELS; ch++) {
+
+			/* issue command to get the channel base address */
+			mmio_write_32(base, (ch << TRIGGER_ID_SHIFT) |
+				      ATOMIC_CMD_GET);
+
+			/* get the base address for the channel */
+			channel_base[ch] = mmio_read_32(base);
+
+			/* increment result register offset */
+			base += 4UL;
+		}
+
+		/* mark state as "initialized" */
+		if (ret == 0)
+			bpmp_init_state = BPMP_INIT_COMPLETE;
+
+		/* the channel values have to be visible across all cpus */
+		flush_dcache_range((uint64_t)channel_base, sizeof(channel_base));
+		flush_dcache_range((uint64_t)&bpmp_init_state,
+				   sizeof(bpmp_init_state));
+
+		INFO("%s: done\n", __func__);
+	}
+
+	return ret;
+}
diff --git a/plat/nvidia/tegra/include/drivers/bpmp.h b/plat/nvidia/tegra/include/drivers/bpmp.h
new file mode 100644
index 0000000000000000000000000000000000000000..27f57df4bedf691eb917a755f5b62bbdf4063435
--- /dev/null
+++ b/plat/nvidia/tegra/include/drivers/bpmp.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef BPMP_H
+#define BPMP_H
+
+#include <stdint.h>
+
+/* macro to enable clock to the Atomics block */
+#define CAR_ENABLE_ATOMICS	(1UL << 16)
+
+/* command to get the channel base addresses from bpmp */
+#define ATOMIC_CMD_GET		4UL
+
+/* Hardware IRQ # used to signal bpmp of an incoming command */
+#define INT_SHR_SEM_OUTBOX_FULL	6UL
+
+/* macros to decode the bpmp's state */
+#define CH_MASK(ch)		(0x3UL << ((ch) * 2UL))
+#define MA_FREE(ch)		(0x2UL << ((ch) * 2UL))
+#define MA_ACKD(ch)		(0x3UL << ((ch) * 2UL))
+
+/* response from bpmp to indicate it has powered up */
+#define SIGN_OF_LIFE		0xAAAAAAAAUL
+
+/* flags to indicate bpmp driver's state */
+#define BPMP_INIT_COMPLETE	0xBEEFF00DUL
+#define BPMP_INIT_PENDING	0xDEADBEEFUL
+
+/* requests serviced by the bpmp */
+#define MRQ_PING		0
+#define MRQ_QUERY_TAG		1
+#define MRQ_DO_IDLE		2
+#define MRQ_TOLERATE_IDLE	3
+#define MRQ_MODULE_LOAD		4
+#define MRQ_MODULE_UNLOAD	5
+#define MRQ_SWITCH_CLUSTER	6
+#define MRQ_TRACE_MODIFY	7
+#define MRQ_WRITE_TRACE		8
+#define MRQ_THREADED_PING	9
+#define MRQ_CPUIDLE_USAGE	10
+#define MRQ_MODULE_MAIL		11
+#define MRQ_SCX_ENABLE		12
+#define MRQ_BPMPIDLE_USAGE	14
+#define MRQ_HEAP_USAGE		15
+#define MRQ_SCLK_SKIP_SET_RATE	16
+#define MRQ_ENABLE_SUSPEND	17
+#define MRQ_PASR_MASK		18
+#define MRQ_DEBUGFS		19
+#define MRQ_THERMAL		27
+
+/* Tegra PM states as known to BPMP */
+#define TEGRA_PM_CC1		9
+#define TEGRA_PM_CC4		12
+#define TEGRA_PM_CC6		14
+#define TEGRA_PM_CC7		15
+#define TEGRA_PM_SC1		17
+#define TEGRA_PM_SC2		18
+#define TEGRA_PM_SC3		19
+#define TEGRA_PM_SC4		20
+#define TEGRA_PM_SC7		23
+
+/* flag to indicate if entry into a CCx power state is allowed */
+#define BPMP_CCx_ALLOWED	0UL
+
+/* number of communication channels to interact with the bpmp */
+#define NR_CHANNELS		4U
+
+/* flag to ask bpmp to acknowledge command packet */
+#define NO_ACK			(0UL << 0UL)
+#define DO_ACK			(1UL << 0UL)
+
+/* size of the command/response data */
+#define MSG_DATA_MAX_SZ		120U
+
+/**
+ * command/response packet to/from the bpmp
+ *
+ * command
+ * -------
+ * code: MRQ_* command
+ * flags: DO_ACK or NO_ACK
+ * data:
+ * 	[0] = cpu #
+ * 	[1] = cluster power state (TEGRA_PM_CCx)
+ * 	[2] = system power state (TEGRA_PM_SCx)
+ *
+ * response
+ * ---------
+ * code: error code
+ * flags: not used
+ * data:
+ * 	[0-3] = response value
+ */
+typedef struct mb_data {
+	int32_t code;
+	uint32_t flags;
+	uint8_t data[MSG_DATA_MAX_SZ];
+} mb_data_t;
+
+/**
+ * Function to initialise the interface with the bpmp
+ */
+int tegra_bpmp_init(void);
+
+/**
+ * Handler to send a MRQ_* command to the bpmp
+ */
+int32_t tegra_bpmp_send_receive_atomic(int mrq, const void *ob_data, int ob_sz,
+		void *ib_data, int ib_sz);
+
+#endif /* BPMP_H */
diff --git a/plat/nvidia/tegra/include/t210/tegra_def.h b/plat/nvidia/tegra/include/t210/tegra_def.h
index ec9b68efef1c86889d8c8a7a3e003311d2d6db43..5565c7288ab37741f6cd26c978d30b547a1e432f 100644
--- a/plat/nvidia/tegra/include/t210/tegra_def.h
+++ b/plat/nvidia/tegra/include/t210/tegra_def.h
@@ -32,6 +32,12 @@
 #define PLAT_MAX_RET_STATE		U(1)
 #define PLAT_MAX_OFF_STATE		(PSTATE_ID_SOC_POWERDN + U(1))
 
+/*******************************************************************************
+ * iRAM memory constants
+ ******************************************************************************/
+#define TEGRA_IRAMA_BASE		0x40000000
+#define TEGRA_IRAMB_BASE		0x40010000
+
 /*******************************************************************************
  * GIC memory map
  ******************************************************************************/
@@ -55,6 +61,20 @@
 					 ENABLE_WRAP_INCR_MASTER1_BIT | \
 					 ENABLE_WRAP_INCR_MASTER0_BIT)
 
+/*******************************************************************************
+ * Tegra Resource Semaphore constants
+ ******************************************************************************/
+#define TEGRA_RES_SEMA_BASE		0x60001000UL
+#define  STA_OFFSET			0UL
+#define  SET_OFFSET			4UL
+#define  CLR_OFFSET			8UL
+
+/*******************************************************************************
+ * Tegra Primary Interrupt Controller constants
+ ******************************************************************************/
+#define TEGRA_PRI_ICTLR_BASE		0x60004000UL
+#define  CPU_IEP_FIR_SET		0x18UL
+
 /*******************************************************************************
  * Tegra micro-seconds timer constants
  ******************************************************************************/
@@ -67,6 +87,8 @@
 #define TEGRA_CAR_RESET_BASE		U(0x60006000)
 #define TEGRA_GPU_RESET_REG_OFFSET	U(0x28C)
 #define  GPU_RESET_BIT			(U(1) << 24)
+#define TEGRA_RST_DEV_CLR_V		U(0x434)
+#define TEGRA_CLK_ENB_V			U(0x440)
 
 /*******************************************************************************
  * Tegra Flow Controller constants
@@ -108,6 +130,15 @@
  ******************************************************************************/
 #define TEGRA_PMC_BASE			U(0x7000E400)
 
+/*******************************************************************************
+ * Tegra Atomics constants
+ ******************************************************************************/
+#define TEGRA_ATOMICS_BASE		0x70016000UL
+#define  TRIGGER0_REG_OFFSET		0UL
+#define  TRIGGER_WIDTH_SHIFT		4UL
+#define  TRIGGER_ID_SHIFT		16UL
+#define  RESULT0_REG_OFFSET		0xC00UL
+
 /*******************************************************************************
  * Tegra Memory Controller constants
  ******************************************************************************/
diff --git a/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c b/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
index bd4beaa1ecb4b06a17733e6046ee50e22d3feaae..b7f4b90ca13ee3eb9be999fc08e2c2d08958325e 100644
--- a/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
+++ b/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
@@ -5,9 +5,6 @@
  */
 
 #include <assert.h>
-
-#include <platform_def.h>
-
 #include <arch_helpers.h>
 #include <common/debug.h>
 #include <drivers/delay_timer.h>
@@ -15,12 +12,14 @@
 #include <lib/psci/psci.h>
 #include <plat/common/platform.h>
 
+#include <bpmp.h>
 #include <flowctrl.h>
 #include <pmc.h>
+#include <platform_def.h>
+#include <security_engine.h>
 #include <tegra_def.h>
 #include <tegra_private.h>
 #include <tegra_platform.h>
-#include <security_engine.h>
 
 /*
  * Register used to clear CPU reset signals. Each CPU has two reset
@@ -57,7 +56,7 @@ int32_t tegra_soc_validate_power_state(unsigned int power_state,
 		 * Cluster powerdown/idle request only for afflvl 1
 		 */
 		req_state->pwr_domain_state[MPIDR_AFFLVL1] = state_id;
-		req_state->pwr_domain_state[MPIDR_AFFLVL0] = state_id;
+		req_state->pwr_domain_state[MPIDR_AFFLVL0] = PSTATE_ID_CORE_POWERDN;
 
 		break;
 
@@ -89,9 +88,11 @@ plat_local_state_t tegra_soc_get_target_pwr_state(unsigned int lvl,
 					     const plat_local_state_t *states,
 					     unsigned int ncpu)
 {
-	plat_local_state_t target = *states;
+	plat_local_state_t target = PSCI_LOCAL_STATE_RUN;
 	int cpu = plat_my_core_pos();
 	int core_pos = read_mpidr() & MPIDR_CPU_MASK;
+	uint32_t bpmp_reply, data[3];
+	int ret;
 
 	/* get the power state at this level */
 	if (lvl == MPIDR_AFFLVL1)
@@ -99,19 +100,57 @@ plat_local_state_t tegra_soc_get_target_pwr_state(unsigned int lvl,
 	if (lvl == MPIDR_AFFLVL2)
 		target = *(states + cpu);
 
-	/* Cluster idle/power-down */
-	if ((lvl == MPIDR_AFFLVL1) && ((target == PSTATE_ID_CLUSTER_IDLE) ||
-	    (target == PSTATE_ID_CLUSTER_POWERDN))) {
-		return target;
-	}
+	if ((lvl == MPIDR_AFFLVL1) && (target == PSTATE_ID_CLUSTER_IDLE)) {
+
+		/* initialize the bpmp interface */
+		(void)tegra_bpmp_init();
+
+		/* Cluster idle */
+		data[0] = (uint32_t)cpu;
+		data[1] = TEGRA_PM_CC6;
+		data[2] = TEGRA_PM_SC1;
+		ret = tegra_bpmp_send_receive_atomic(MRQ_DO_IDLE,
+				(void *)&data, (int)sizeof(data),
+				(void *)&bpmp_reply, (int)sizeof(bpmp_reply));
+
+		/* check if cluster idle entry is allowed */
+		if ((ret != 0L) || (bpmp_reply != BPMP_CCx_ALLOWED)) {
+
+			/* Cluster idle not allowed */
+			target = PSCI_LOCAL_STATE_RUN;
+		}
 
-	/* System Suspend */
-	if (((lvl == MPIDR_AFFLVL2) || (lvl == MPIDR_AFFLVL1)) &&
-	    (target == PSTATE_ID_SOC_POWERDN))
-		return PSTATE_ID_SOC_POWERDN;
+	} else if ((lvl == MPIDR_AFFLVL1) && (target == PSTATE_ID_CLUSTER_POWERDN)) {
 
-	/* default state */
-	return PSCI_LOCAL_STATE_RUN;
+		/* initialize the bpmp interface */
+		(void)tegra_bpmp_init();
+
+		/* Cluster power-down */
+		data[0] = (uint32_t)cpu;
+		data[1] = TEGRA_PM_CC7;
+		data[2] = TEGRA_PM_SC1;
+		ret = tegra_bpmp_send_receive_atomic(MRQ_DO_IDLE,
+				(void *)&data, (int)sizeof(data),
+				(void *)&bpmp_reply, (int)sizeof(bpmp_reply));
+
+		/* check if cluster power down is allowed */
+		if ((ret != 0L) || (bpmp_reply != BPMP_CCx_ALLOWED)) {
+
+			/* Cluster power down not allowed */
+			target = PSCI_LOCAL_STATE_RUN;
+		}
+
+	} else if (((lvl == MPIDR_AFFLVL2) || (lvl == MPIDR_AFFLVL1)) &&
+	    (target == PSTATE_ID_SOC_POWERDN)) {
+
+		/* System Suspend */
+		target = PSTATE_ID_SOC_POWERDN;
+
+	} else {
+		; /* do nothing */
+	}
+
+	return target;
 }
 
 int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
@@ -132,6 +171,7 @@ int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 			(stateid_afflvl1 == PSTATE_ID_SOC_POWERDN));
 
 		if (tegra_chipid_is_t210_b01()) {
+
 			/* Suspend se/se2 and pka1 */
 			if (tegra_se_suspend() != 0) {
 				ret = PSCI_E_INTERN_FAIL;
@@ -143,21 +183,21 @@ int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 			}
 		}
 
-		/* suspend the entire soc */
+		/* enter system suspend */
 		if (ret == PSCI_E_SUCCESS) {
 			tegra_fc_soc_powerdn(mpidr);
 		}
 
 	} else if (stateid_afflvl1 == PSTATE_ID_CLUSTER_IDLE) {
 
-		assert(stateid_afflvl0 == PSTATE_ID_CLUSTER_IDLE);
+		assert(stateid_afflvl0 == PSTATE_ID_CORE_POWERDN);
 
 		/* Prepare for cluster idle */
 		tegra_fc_cluster_idle(mpidr);
 
 	} else if (stateid_afflvl1 == PSTATE_ID_CLUSTER_POWERDN) {
 
-		assert(stateid_afflvl0 == PSTATE_ID_CLUSTER_POWERDN);
+		assert(stateid_afflvl0 == PSTATE_ID_CORE_POWERDN);
 
 		/* Prepare for cluster powerdn */
 		tegra_fc_cluster_powerdn(mpidr);
@@ -168,7 +208,8 @@ int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
 		tegra_fc_cpu_powerdn(mpidr);
 
 	} else {
-		ERROR("%s: Unknown state id\n", __func__);
+		ERROR("%s: Unknown state id (%d, %d, %d)\n", __func__,
+			stateid_afflvl2, stateid_afflvl1, stateid_afflvl0);
 		ret = PSCI_E_NOT_SUPPORTED;
 	}
 
diff --git a/plat/nvidia/tegra/soc/t210/plat_setup.c b/plat/nvidia/tegra/soc/t210/plat_setup.c
index 0c2fe965296df6ba632eca5e69aa5f8a3375db7e..451da13e6f3abbfa40359e62f0cd90eef898b1c3 100644
--- a/plat/nvidia/tegra/soc/t210/plat_setup.c
+++ b/plat/nvidia/tegra/soc/t210/plat_setup.c
@@ -5,6 +5,7 @@
  */
 
 #include <arch_helpers.h>
+#include <bpmp.h>
 #include <common/bl_common.h>
 #include <drivers/console.h>
 #include <lib/xlat_tables/xlat_tables_v2.h>
@@ -24,6 +25,10 @@
  * Table of regions to map using the MMU.
  */
 static const mmap_region_t tegra_mmap[] = {
+	MAP_REGION_FLAT(TEGRA_IRAMA_BASE, 0x10000, /* 64KB */
+			MT_DEVICE | MT_RW | MT_SECURE),
+	MAP_REGION_FLAT(TEGRA_IRAMB_BASE, 0x10000, /* 64KB */
+			MT_DEVICE | MT_RW | MT_SECURE),
 	MAP_REGION_FLAT(MMIO_RANGE_0_ADDR, MMIO_RANGE_SIZE,
 			MT_DEVICE | MT_RW | MT_SECURE),
 	MAP_REGION_FLAT(MMIO_RANGE_1_ADDR, MMIO_RANGE_SIZE,
diff --git a/plat/nvidia/tegra/soc/t210/platform_t210.mk b/plat/nvidia/tegra/soc/t210/platform_t210.mk
index 3cc71b28ec176a1ca79f366e3b4c2f57658fa0be..4749d76647f77ffc42e987948fcbcd0ebfa72a7e 100644
--- a/plat/nvidia/tegra/soc/t210/platform_t210.mk
+++ b/plat/nvidia/tegra/soc/t210/platform_t210.mk
@@ -26,6 +26,7 @@ PLAT_INCLUDES		+=	-I${SOC_DIR}/drivers/se
 
 BL31_SOURCES		+=	lib/cpus/aarch64/cortex_a53.S			\
 				lib/cpus/aarch64/cortex_a57.S			\
+				${COMMON_DIR}/drivers/bpmp/bpmp.c	\
 				${COMMON_DIR}/drivers/flowctrl/flowctrl.c	\
 				${COMMON_DIR}/drivers/memctrl/memctrl_v1.c	\
 				${SOC_DIR}/plat_psci_handlers.c			\