diff --git a/Makefile b/Makefile
index c82c7a20582ab3e486aa58b09758b62d1c5358a2..6ad4b4d7b605ea1e07065af83a1555d449263099 100644
--- a/Makefile
+++ b/Makefile
@@ -63,6 +63,8 @@ ARM_GIC_ARCH		:=	2
 # Flag used to indicate if ASM_ASSERTION should be enabled for the build.
 # This defaults to being present in DEBUG builds only.
 ASM_ASSERTION		:=	${DEBUG}
+# Build option to choose whether Trusted firmware uses Coherent memory or not.
+USE_COHERENT_MEM	:=	1
 # Default FIP file name
 FIP_NAME		:= fip.bin
 
@@ -230,6 +232,10 @@ $(eval $(call add_define,ASM_ASSERTION))
 # Process LOG_LEVEL flag
 $(eval $(call add_define,LOG_LEVEL))
 
+# Process USE_COHERENT_MEM flag
+$(eval $(call assert_boolean,USE_COHERENT_MEM))
+$(eval $(call add_define,USE_COHERENT_MEM))
+
 ASFLAGS			+= 	-nostdinc -ffreestanding -Wa,--fatal-warnings	\
 				-Werror -Wmissing-include-dirs			\
 				-mgeneral-regs-only -D__ASSEMBLY__		\
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index f53a41ff7f49c4fec1a4e8fd47a29d737291824c..4c25a60a3ac4d419e4b01b9d8ef662bb698ab5af 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -40,7 +40,6 @@ BL31_SOURCES		+=	bl31/bl31_main.c				\
 				bl31/aarch64/runtime_exceptions.S		\
 				bl31/aarch64/crash_reporting.S			\
 				lib/cpus/aarch64/cpu_helpers.S			\
-				lib/locks/bakery/bakery_lock.c			\
 				lib/locks/exclusive/spinlock.S			\
 				services/std_svc/std_svc_setup.c		\
 				services/std_svc/psci/psci_afflvl_off.c		\
@@ -53,6 +52,12 @@ BL31_SOURCES		+=	bl31/bl31_main.c				\
 				services/std_svc/psci/psci_setup.c		\
 				services/std_svc/psci/psci_system_off.c
 
+ifeq (${USE_COHERENT_MEM}, 1)
+BL31_SOURCES		+=	lib/locks/bakery/bakery_lock_coherent.c
+else
+BL31_SOURCES		+=	lib/locks/bakery/bakery_lock_normal.c
+endif
+
 BL31_LINKERFILE		:=	bl31/bl31.ld.S
 
 # Flag used by the generic interrupt management framework to  determine if
diff --git a/include/bl31/cpu_data.h b/include/bl31/cpu_data.h
index c886e2b4ed69f5769cae5055cb0361baed4d2488..1926e292a9752b15547238f4c08ecd75555e5ee0 100644
--- a/include/bl31/cpu_data.h
+++ b/include/bl31/cpu_data.h
@@ -32,7 +32,7 @@
 #define __CPU_DATA_H__
 
 /* Offsets for the cpu_data structure */
-#define CPU_DATA_CRASH_BUF_OFFSET	0x20
+#define CPU_DATA_CRASH_BUF_OFFSET	0x18
 #if CRASH_REPORTING
 #define CPU_DATA_LOG2SIZE		7
 #else
@@ -45,10 +45,20 @@
 #ifndef __ASSEMBLY__
 
 #include <arch_helpers.h>
+#include <cassert.h>
 #include <platform_def.h>
 #include <psci.h>
 #include <stdint.h>
 
+/* Offsets for the cpu_data structure */
+#define CPU_DATA_PSCI_LOCK_OFFSET	__builtin_offsetof\
+		(cpu_data_t, psci_svc_cpu_data.pcpu_bakery_info)
+
+#if PLAT_PCPU_DATA_SIZE
+#define CPU_DATA_PLAT_PCPU_OFFSET	__builtin_offsetof\
+		(cpu_data_t, platform_cpu_data)
+#endif
+
 /*******************************************************************************
  * Function & variable prototypes
  ******************************************************************************/
@@ -69,9 +79,12 @@
 typedef struct cpu_data {
 	void *cpu_context[2];
 	uint64_t cpu_ops_ptr;
-	struct psci_cpu_data psci_svc_cpu_data;
 #if CRASH_REPORTING
 	uint64_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
+#endif
+	struct psci_cpu_data psci_svc_cpu_data;
+#if PLAT_PCPU_DATA_SIZE
+	uint8_t platform_cpu_data[PLAT_PCPU_DATA_SIZE];
 #endif
 } __aligned(CACHE_WRITEBACK_GRANULE) cpu_data_t;
 
diff --git a/include/bl31/services/psci.h b/include/bl31/services/psci.h
index 6c23f1bf878d100df92c304e31a1d0d3a0a0769a..dc6cc04c46e827fd540eb187fd9a9d9df274903b 100644
--- a/include/bl31/services/psci.h
+++ b/include/bl31/services/psci.h
@@ -31,6 +31,17 @@
 #ifndef __PSCI_H__
 #define __PSCI_H__
 
+#include <bakery_lock.h>
+#include <platform_def.h>	/* for PLATFORM_NUM_AFFS */
+
+/*******************************************************************************
+ * Number of affinity instances whose state this psci imp. can track
+ ******************************************************************************/
+#ifdef PLATFORM_NUM_AFFS
+#define PSCI_NUM_AFFS		PLATFORM_NUM_AFFS
+#else
+#define PSCI_NUM_AFFS		(2 * PLATFORM_CORE_COUNT)
+#endif
 
 /*******************************************************************************
  * Defines for runtime services func ids
@@ -140,6 +151,9 @@ typedef struct psci_cpu_data {
 	uint32_t power_state;
 	uint32_t max_phys_off_afflvl;	/* Highest affinity level in physically
 					   powered off state */
+#if !USE_COHERENT_MEM
+	bakery_info_t pcpu_bakery_info[PSCI_NUM_AFFS];
+#endif
 } psci_cpu_data_t;
 
 /*******************************************************************************
diff --git a/include/lib/bakery_lock.h b/include/lib/bakery_lock.h
index 95634cf5480dba6949f141e16cabe080f3351bc2..9736f850accdfd5c54bb8afa851a178dec524f32 100644
--- a/include/lib/bakery_lock.h
+++ b/include/lib/bakery_lock.h
@@ -35,6 +35,11 @@
 
 #define BAKERY_LOCK_MAX_CPUS		PLATFORM_CORE_COUNT
 
+#ifndef __ASSEMBLY__
+#include <stdint.h>
+
+#if USE_COHERENT_MEM
+
 typedef struct bakery_lock {
 	int owner;
 	volatile char entering[BAKERY_LOCK_MAX_CPUS];
@@ -48,4 +53,21 @@ void bakery_lock_get(bakery_lock_t *bakery);
 void bakery_lock_release(bakery_lock_t *bakery);
 int bakery_lock_try(bakery_lock_t *bakery);
 
+#else
+
+typedef struct bakery_info {
+	/*
+	 * The lock_data is a bit-field of 2 members:
+	 * Bit[0]       : choosing. This field is set when the CPU is
+	 *                choosing its bakery number.
+	 * Bits[1 - 15] : number. This is the bakery number allocated.
+	 */
+	volatile uint16_t lock_data;
+} bakery_info_t;
+
+void bakery_lock_get(unsigned int id, unsigned int offset);
+void bakery_lock_release(unsigned int id, unsigned int offset);
+
+#endif /* __USE_COHERENT_MEM__ */
+#endif /* __ASSEMBLY__ */
 #endif /* __BAKERY_LOCK_H__ */
diff --git a/lib/locks/bakery/bakery_lock.c b/lib/locks/bakery/bakery_lock_coherent.c
similarity index 98%
rename from lib/locks/bakery/bakery_lock.c
rename to lib/locks/bakery/bakery_lock_coherent.c
index 6bdc35a5eb5fa8682b79a5a205965968c0482514..5d538ce2c6afc301ed9d0ec6f92c2121f0ced92d 100644
--- a/lib/locks/bakery/bakery_lock.c
+++ b/lib/locks/bakery/bakery_lock_coherent.c
@@ -31,11 +31,13 @@
 #include <arch_helpers.h>
 #include <assert.h>
 #include <bakery_lock.h>
+#include <cpu_data.h>
 #include <platform.h>
 #include <string.h>
 
 /*
- * Functions in this file implement Bakery Algorithm for mutual exclusion.
+ * Functions in this file implement Bakery Algorithm for mutual exclusion with the
+ * bakery lock data structures in coherent memory.
  *
  * ARM architecture offers a family of exclusive access instructions to
  * efficiently implement mutual exclusion with hardware support. However, as
diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c
new file mode 100644
index 0000000000000000000000000000000000000000..a325fd4feb2716ceff07c2ae9f2a39c948218b11
--- /dev/null
+++ b/lib/locks/bakery/bakery_lock_normal.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bakery_lock.h>
+#include <cpu_data.h>
+#include <platform.h>
+#include <string.h>
+
+/*
+ * Functions in this file implement Bakery Algorithm for mutual exclusion with the
+ * bakery lock data structures in cacheable and Normal memory.
+ *
+ * ARM architecture offers a family of exclusive access instructions to
+ * efficiently implement mutual exclusion with hardware support. However, as
+ * well as depending on external hardware, these instructions have defined
+ * behavior only on certain memory types (cacheable and Normal memory in
+ * particular; see ARMv8 Architecture Reference Manual section B2.10). Use cases
+ * in trusted firmware are such that mutual exclusion implementation cannot
+ * expect that accesses to the lock have the specific type required by the
+ * architecture for these primitives to function (for example, not all
+ * contenders may have address translation enabled).
+ *
+ * This implementation does not use mutual exclusion primitives. It expects
+ * memory regions where the locks reside to be cacheable and Normal.
+ *
+ * Note that the ARM architecture guarantees single-copy atomicity for aligned
+ * accesses regardless of status of address translation.
+ */
+
+/* Convert a ticket to priority */
+#define PRIORITY(t, pos)	(((t) << 8) | (pos))
+
+#define CHOOSING_TICKET		0x1
+#define CHOOSING_DONE		0x0
+
+#define bakery_is_choosing(info)	(info & 0x1)
+#define bakery_ticket_number(info)	((info >> 1) & 0x7FFF)
+#define make_bakery_data(choosing, number) \
+		(((choosing & 0x1) | (number << 1)) & 0xFFFF)
+
+/* This macro assumes that the bakery_info array is located at the offset specified */
+#define get_my_bakery_info(offset, id)		\
+	(((bakery_info_t *) (((uint8_t *)_cpu_data()) + offset)) + id)
+
+#define get_bakery_info_by_index(offset, id, ix)	\
+	(((bakery_info_t *) (((uint8_t *)_cpu_data_by_index(ix)) + offset)) + id)
+
+#define write_cache_op(addr, cached)	\
+				do {	\
+					(cached ? dccvac((uint64_t)addr) :\
+						dcivac((uint64_t)addr));\
+						dsbish();\
+				} while (0)
+
+#define read_cache_op(addr, cached)	if (cached) \
+					    dccivac((uint64_t)addr)
+
+static unsigned int bakery_get_ticket(int id, unsigned int offset,
+						unsigned int me, int is_cached)
+{
+	unsigned int my_ticket, their_ticket;
+	unsigned int they;
+	bakery_info_t *my_bakery_info, *their_bakery_info;
+
+	/*
+	 * Obtain a reference to the bakery information for this cpu and ensure
+	 * it is not NULL.
+	 */
+	my_bakery_info = get_my_bakery_info(offset, id);
+	assert(my_bakery_info);
+
+	/*
+	 * Tell other contenders that we are through the bakery doorway i.e.
+	 * going to allocate a ticket for this cpu.
+	 */
+	my_ticket = 0;
+	my_bakery_info->lock_data = make_bakery_data(CHOOSING_TICKET, my_ticket);
+
+	write_cache_op(my_bakery_info, is_cached);
+
+	/*
+	 * Iterate through the bakery information of each contender to allocate
+	 * the highest ticket number for this cpu.
+	 */
+	for (they = 0; they < BAKERY_LOCK_MAX_CPUS; they++) {
+		if (me == they)
+			continue;
+
+		/*
+		 * Get a reference to the other contender's bakery info and
+		 * ensure that a stale copy is not read.
+		 */
+		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		assert(their_bakery_info);
+
+		read_cache_op(their_bakery_info, is_cached);
+
+		/*
+		 * Update this cpu's ticket number if a higher ticket number is
+		 * seen
+		 */
+		their_ticket = bakery_ticket_number(their_bakery_info->lock_data);
+		if (their_ticket > my_ticket)
+			my_ticket = their_ticket;
+	}
+
+	/*
+	 * Compute ticket; then signal to other contenders waiting for us to
+	 * finish calculating our ticket value that we're done
+	 */
+	++my_ticket;
+	my_bakery_info->lock_data = make_bakery_data(CHOOSING_DONE, my_ticket);
+
+	write_cache_op(my_bakery_info, is_cached);
+
+	return my_ticket;
+}
+
+void bakery_lock_get(unsigned int id, unsigned int offset)
+{
+	unsigned int they, me, is_cached;
+	unsigned int my_ticket, my_prio, their_ticket;
+	bakery_info_t *their_bakery_info;
+	uint16_t their_bakery_data;
+
+	me = platform_get_core_pos(read_mpidr_el1());
+
+	is_cached = read_sctlr_el3() & SCTLR_C_BIT;
+
+	/* Get a ticket */
+	my_ticket = bakery_get_ticket(id, offset, me, is_cached);
+
+	/*
+	 * Now that we got our ticket, compute our priority value, then compare
+	 * with that of others, and proceed to acquire the lock
+	 */
+	my_prio = PRIORITY(my_ticket, me);
+	for (they = 0; they < BAKERY_LOCK_MAX_CPUS; they++) {
+		if (me == they)
+			continue;
+
+		/*
+		 * Get a reference to the other contender's bakery info and
+		 * ensure that a stale copy is not read.
+		 */
+		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		assert(their_bakery_info);
+		read_cache_op(their_bakery_info, is_cached);
+
+		their_bakery_data = their_bakery_info->lock_data;
+
+		/* Wait for the contender to get their ticket */
+		while (bakery_is_choosing(their_bakery_data)) {
+			read_cache_op(their_bakery_info, is_cached);
+			their_bakery_data = their_bakery_info->lock_data;
+		}
+
+		/*
+		 * If the other party is a contender, they'll have non-zero
+		 * (valid) ticket value. If they do, compare priorities
+		 */
+		their_ticket = bakery_ticket_number(their_bakery_data);
+		if (their_ticket && (PRIORITY(their_ticket, they) < my_prio)) {
+			/*
+			 * They have higher priority (lower value). Wait for
+			 * their ticket value to change (either release the lock
+			 * to have it dropped to 0; or drop and probably content
+			 * again for the same lock to have an even higher value)
+			 */
+			do {
+				wfe();
+				read_cache_op(their_bakery_info, is_cached);
+			} while (their_ticket
+				== bakery_ticket_number(their_bakery_info->lock_data));
+		}
+	}
+}
+
+void bakery_lock_release(unsigned int id, unsigned int offset)
+{
+	bakery_info_t *my_bakery_info;
+	unsigned int is_cached = read_sctlr_el3() & SCTLR_C_BIT;
+
+	my_bakery_info = get_my_bakery_info(offset, id);
+	my_bakery_info->lock_data = 0;
+	write_cache_op(my_bakery_info, is_cached);
+	sev();
+}
diff --git a/plat/fvp/drivers/pwrc/fvp_pwrc.c b/plat/fvp/drivers/pwrc/fvp_pwrc.c
index c32c322bf2fc92085ad98c601f4b42a473aa008b..0497c2b81c22a2dc753b8c0a00a711c809c7f3b5 100644
--- a/plat/fvp/drivers/pwrc/fvp_pwrc.c
+++ b/plat/fvp/drivers/pwrc/fvp_pwrc.c
@@ -31,13 +31,19 @@
 #include <bakery_lock.h>
 #include <mmio.h>
 #include "../../fvp_def.h"
+#include "../../fvp_private.h"
 #include "fvp_pwrc.h"
 
 /*
  * TODO: Someday there will be a generic power controller api. At the moment
  * each platform has its own pwrc so just exporting functions is fine.
  */
+#if USE_COHERENT_MEM
 static bakery_lock_t pwrc_lock __attribute__ ((section("tzfw_coherent_mem")));
+#define LOCK_ARG	&pwrc_lock
+#else
+#define LOCK_ARG	FVP_PWRC_BAKERY_ID
+#endif
 
 unsigned int fvp_pwrc_get_cpu_wkr(unsigned long mpidr)
 {
@@ -47,54 +53,55 @@ unsigned int fvp_pwrc_get_cpu_wkr(unsigned long mpidr)
 unsigned int fvp_pwrc_read_psysr(unsigned long mpidr)
 {
 	unsigned int rc;
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PSYSR_OFF, (unsigned int) mpidr);
 	rc = mmio_read_32(PWRC_BASE + PSYSR_OFF);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 	return rc;
 }
 
 void fvp_pwrc_write_pponr(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PPONR_OFF, (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_write_ppoffr(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PPOFFR_OFF, (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_set_wen(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PWKUPR_OFF,
 		      (unsigned int) (PWKUPR_WEN | mpidr));
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_clr_wen(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PWKUPR_OFF,
 		      (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_write_pcoffr(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PCOFFR_OFF, (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 /* Nothing else to do here apart from initializing the lock */
 int fvp_pwrc_setup(void)
 {
-	bakery_lock_init(&pwrc_lock);
+	fvp_lock_init(LOCK_ARG);
+
 	return 0;
 }
 
diff --git a/plat/fvp/fvp_private.h b/plat/fvp/fvp_private.h
index 2dcb327ff1ac87ce998a9474d2b15c03f414d5a8..6f1a637e770128e49514d4ac1f79606dca5aeb15 100644
--- a/plat/fvp/fvp_private.h
+++ b/plat/fvp/fvp_private.h
@@ -31,7 +31,9 @@
 #ifndef __FVP_PRIVATE_H__
 #define __FVP_PRIVATE_H__
 
+#include <bakery_lock.h>
 #include <bl_common.h>
+#include <cpu_data.h>
 #include <platform_def.h>
 
 
@@ -55,10 +57,60 @@ typedef struct bl2_to_bl31_params_mem {
 	entry_point_info_t bl31_ep_info;
 } bl2_to_bl31_params_mem_t;
 
+#if USE_COHERENT_MEM
+/*
+ * These are wrapper macros to the Coherent Memory Bakery Lock API.
+ */
+#define fvp_lock_init(_lock_arg)	bakery_lock_init(_lock_arg)
+#define fvp_lock_get(_lock_arg)		bakery_lock_get(_lock_arg)
+#define fvp_lock_release(_lock_arg)	bakery_lock_release(_lock_arg)
+
+#else
+
 /*******************************************************************************
- * Forward declarations
+ * Constants to specify how many bakery locks this platform implements. These
+ * are used if the platform chooses not to use coherent memory for bakery lock
+ * data structures.
  ******************************************************************************/
-struct meminfo;
+#define FVP_MAX_BAKERIES	1
+#define FVP_PWRC_BAKERY_ID	0
+
+/*******************************************************************************
+ * Definition of structure which holds platform specific per-cpu data. Currently
+ * it holds only the bakery lock information for each cpu. Constants to
+ * specify how many bakeries this platform implements and bakery ids are
+ * specified in fvp_def.h
+ ******************************************************************************/
+typedef struct fvp_cpu_data {
+	bakery_info_t pcpu_bakery_info[FVP_MAX_BAKERIES];
+} fvp_cpu_data_t;
+
+/* Macro to define the offset of bakery_info_t in fvp_cpu_data_t */
+#define FVP_CPU_DATA_LOCK_OFFSET	__builtin_offsetof\
+					    (fvp_cpu_data_t, pcpu_bakery_info)
+
+
+/*******************************************************************************
+ * Helper macros for bakery lock api when using the above fvp_cpu_data_t for
+ * bakery lock data structures. It assumes that the bakery_info is at the
+ * beginning of the platform specific per-cpu data.
+ ******************************************************************************/
+#define fvp_lock_init(_lock_arg)	/* No init required */
+#define fvp_lock_get(_lock_arg)		bakery_lock_get(_lock_arg,  	    \
+						CPU_DATA_PLAT_PCPU_OFFSET + \
+						FVP_CPU_DATA_LOCK_OFFSET)
+#define fvp_lock_release(_lock_arg)	bakery_lock_release(_lock_arg,	    \
+						CPU_DATA_PLAT_PCPU_OFFSET + \
+						FVP_CPU_DATA_LOCK_OFFSET)
+
+/*
+ * Ensure that the size of the FVP specific per-cpu data structure and the size
+ * of the memory allocated in generic per-cpu data for the platform are the same.
+ */
+CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(fvp_cpu_data_t),	\
+	fvp_pcpu_data_size_mismatch);
+
+#endif /* __USE_COHERENT_MEM__ */
 
 /*******************************************************************************
  * Function and variable prototypes
@@ -75,6 +127,7 @@ void fvp_configure_mmu_el3(unsigned long total_base,
 			   unsigned long,
 			   unsigned long,
 			   unsigned long);
+
 int fvp_config_setup(void);
 
 void fvp_cci_init(void);
diff --git a/plat/fvp/include/platform_def.h b/plat/fvp/include/platform_def.h
index 5364a3da2f491f0593b6adf156cea333aeb830c9..e3c48e67193f4f21b9e67fadf6f449d94144e123 100644
--- a/plat/fvp/include/platform_def.h
+++ b/plat/fvp/include/platform_def.h
@@ -169,5 +169,12 @@
 #define CACHE_WRITEBACK_SHIFT   6
 #define CACHE_WRITEBACK_GRANULE (1 << CACHE_WRITEBACK_SHIFT)
 
+#if !USE_COHERENT_MEM
+/*******************************************************************************
+ * Size of the per-cpu data in bytes that should be reserved in the generic
+ * per-cpu data structure for the FVP port.
+ ******************************************************************************/
+#define PLAT_PCPU_DATA_SIZE	2
+#endif
 
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/juno/include/platform_def.h b/plat/juno/include/platform_def.h
index ee77b83237fdf2977393ce7cd576f559d55549ed..cd077021ebdb48157ac39f86b2a9a85fd8c73918 100644
--- a/plat/juno/include/platform_def.h
+++ b/plat/juno/include/platform_def.h
@@ -174,4 +174,12 @@
 #define CACHE_WRITEBACK_SHIFT   6
 #define CACHE_WRITEBACK_GRANULE (1 << CACHE_WRITEBACK_SHIFT)
 
+#if !USE_COHERENT_MEM
+/*******************************************************************************
+ * Size of the per-cpu data in bytes that should be reserved in the generic
+ * per-cpu data structure for the Juno port.
+ ******************************************************************************/
+#define PLAT_PCPU_DATA_SIZE	2
+#endif
+
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/juno/juno_private.h b/plat/juno/juno_private.h
index 14d7af4dad2eab290ecdb0bd428e470d959c8b32..b7ef4488a5edc20e798c163489ffe9578c3caae8 100644
--- a/plat/juno/juno_private.h
+++ b/plat/juno/juno_private.h
@@ -31,7 +31,9 @@
 #ifndef __JUNO_PRIVATE_H__
 #define __JUNO_PRIVATE_H__
 
+#include <bakery_lock.h>
 #include <bl_common.h>
+#include <cpu_data.h>
 #include <platform_def.h>
 #include <stdint.h>
 
@@ -59,6 +61,68 @@ typedef struct bl2_to_bl31_params_mem {
 	struct entry_point_info bl31_ep_info;
 } bl2_to_bl31_params_mem_t;
 
+#if IMAGE_BL31
+#if USE_COHERENT_MEM
+/*
+ * These are wrapper macros to the Coherent Memory Bakery Lock API.
+ */
+#define juno_lock_init(_lock_arg)		bakery_lock_init(_lock_arg)
+#define juno_lock_get(_lock_arg)		bakery_lock_get(_lock_arg)
+#define juno_lock_release(_lock_arg)		bakery_lock_release(_lock_arg)
+
+#else
+
+/*******************************************************************************
+ * Constants that specify how many bakeries this platform implements and bakery
+ * ids.
+ ******************************************************************************/
+#define JUNO_MAX_BAKERIES	1
+#define JUNO_MHU_BAKERY_ID	0
+
+/*******************************************************************************
+ * Definition of structure which holds platform specific per-cpu data. Currently
+ * it holds only the bakery lock information for each cpu. Constants to specify
+ * how many bakeries this platform implements and bakery ids are specified in
+ * juno_def.h
+ ******************************************************************************/
+typedef struct juno_cpu_data {
+	bakery_info_t pcpu_bakery_info[JUNO_MAX_BAKERIES];
+} juno_cpu_data_t;
+
+/* Macro to define the offset of bakery_info_t in juno_cpu_data_t */
+#define JUNO_CPU_DATA_LOCK_OFFSET	__builtin_offsetof\
+					    (juno_cpu_data_t, pcpu_bakery_info)
+
+/*******************************************************************************
+ * Helper macros for bakery lock api when using the above juno_cpu_data_t for
+ * bakery lock data structures. It assumes that the bakery_info is at the
+ * beginning of the platform specific per-cpu data.
+ ******************************************************************************/
+#define juno_lock_init(_lock_arg)		/* No init required */
+#define juno_lock_get(_lock_arg)		bakery_lock_get(_lock_arg,	\
+						    CPU_DATA_PLAT_PCPU_OFFSET + \
+						    JUNO_CPU_DATA_LOCK_OFFSET)
+#define juno_lock_release(_lock_arg)		bakery_lock_release(_lock_arg,	\
+						    CPU_DATA_PLAT_PCPU_OFFSET + \
+						    JUNO_CPU_DATA_LOCK_OFFSET)
+
+/*
+ * Ensure that the size of the Juno specific per-cpu data structure and the size
+ * of the memory allocated in generic per-cpu data for the platform are the same.
+ */
+CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(juno_cpu_data_t),	\
+	juno_pcpu_data_size_mismatch);
+#endif /* __USE_COHERENT_MEM__ */
+#else
+/*
+ * Dummy wrapper macros for all other BL stages other than BL3-1
+ */
+#define juno_lock_init(_lock_arg)
+#define juno_lock_get(_lock_arg)
+#define juno_lock_release(_lock_arg)
+
+#endif /* __IMAGE_BL31__ */
+
 /*******************************************************************************
  * Function and variable prototypes
  ******************************************************************************/
diff --git a/plat/juno/mhu.c b/plat/juno/mhu.c
index b6541a8873e8cc8ca95aead4adbf50413a7c2cd6..c1c414c29a7298a71044395c6d83f94edf7e768f 100644
--- a/plat/juno/mhu.c
+++ b/plat/juno/mhu.c
@@ -32,6 +32,7 @@
 #include <bakery_lock.h>
 #include <mmio.h>
 #include "juno_def.h"
+#include "juno_private.h"
 #include "mhu.h"
 
 /* SCP MHU secure channel registers */
@@ -44,13 +45,20 @@
 #define CPU_INTR_S_SET		0x308
 #define CPU_INTR_S_CLEAR	0x310
 
-
+#if IMAGE_BL31
+#if USE_COHERENT_MEM
 static bakery_lock_t mhu_secure_lock __attribute__ ((section("tzfw_coherent_mem")));
-
+#define LOCK_ARG		&mhu_secure_lock
+#else
+#define LOCK_ARG		JUNO_MHU_BAKERY_ID
+#endif /*__USE_COHERENT_MEM__ */
+#else
+#define LOCK_ARG	/* Locks required only for BL3-1 images */
+#endif /* __IMAGE_BL31__ */
 
 void mhu_secure_message_start(void)
 {
-	bakery_lock_get(&mhu_secure_lock);
+	juno_lock_get(LOCK_ARG);
 
 	/* Make sure any previous command has finished */
 	while (mmio_read_32(MHU_BASE + CPU_INTR_S_STAT) != 0)
@@ -80,12 +88,12 @@ void mhu_secure_message_end(void)
 	/* Clear any response we got by writing all ones to the CLEAR register */
 	mmio_write_32(MHU_BASE + SCP_INTR_S_CLEAR, 0xffffffffu);
 
-	bakery_lock_release(&mhu_secure_lock);
+	juno_lock_release(LOCK_ARG);
 }
 
 void mhu_secure_init(void)
 {
-	bakery_lock_init(&mhu_secure_lock);
+	juno_lock_init(LOCK_ARG);
 
 	/*
 	 * Clear the CPU's INTR register to make sure we don't see a stale
diff --git a/plat/juno/platform.mk b/plat/juno/platform.mk
index 6ca219d98204d6d779cd32c6e93442415ebec2e8..158e3ace354863b9f59090c0e947cde2d5bd7a86 100644
--- a/plat/juno/platform.mk
+++ b/plat/juno/platform.mk
@@ -66,7 +66,6 @@ BL1_SOURCES		+=	drivers/arm/cci400/cci400.c		\
 				plat/juno/aarch64/juno_common.c
 
 BL2_SOURCES		+=	drivers/arm/tzc400/tzc400.c		\
-				lib/locks/bakery/bakery_lock.c		\
 				plat/common/aarch64/platform_up_stack.S	\
 				plat/juno/bl2_plat_setup.c		\
 				plat/juno/mhu.c				\
diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c
index 155f842e26fb6f41360061bd4e0230e608f1208d..c984e9ecf46bd5a438310860396cd91a201cae9e 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/services/std_svc/psci/psci_common.c
@@ -246,7 +246,8 @@ void psci_acquire_afflvl_locks(int start_afflvl,
 	for (level = start_afflvl; level <= end_afflvl; level++) {
 		if (mpidr_nodes[level] == NULL)
 			continue;
-		bakery_lock_get(&mpidr_nodes[level]->lock);
+
+		psci_lock_get(mpidr_nodes[level]);
 	}
 }
 
@@ -264,7 +265,8 @@ void psci_release_afflvl_locks(int start_afflvl,
 	for (level = end_afflvl; level >= start_afflvl; level--) {
 		if (mpidr_nodes[level] == NULL)
 			continue;
-		bakery_lock_release(&mpidr_nodes[level]->lock);
+
+		psci_lock_release(mpidr_nodes[level]);
 	}
 }
 
diff --git a/services/std_svc/psci/psci_private.h b/services/std_svc/psci/psci_private.h
index 24a5604e7dcf4aff3e45e9f729fd6158f56a28d0..9a8ef73de4c12750156660247d09c179bce38586 100644
--- a/services/std_svc/psci/psci_private.h
+++ b/services/std_svc/psci/psci_private.h
@@ -33,14 +33,22 @@
 
 #include <arch.h>
 #include <bakery_lock.h>
-#include <platform_def.h>	/* for PLATFORM_NUM_AFFS */
 #include <psci.h>
 
-/* Number of affinity instances whose state this psci imp. can track */
-#ifdef PLATFORM_NUM_AFFS
-#define PSCI_NUM_AFFS		PLATFORM_NUM_AFFS
+/*
+ * The following helper macros abstract the interface to the Bakery
+ * Lock API.
+ */
+#if USE_COHERENT_MEM
+#define psci_lock_init(aff_map, idx)	bakery_lock_init(&(aff_map)[(idx)].lock)
+#define psci_lock_get(node)		bakery_lock_get(&((node)->lock))
+#define psci_lock_release(node)		bakery_lock_release(&((node)->lock))
 #else
-#define PSCI_NUM_AFFS		(2 * PLATFORM_CORE_COUNT)
+#define psci_lock_init(aff_map, idx)	((aff_map)[(idx)].aff_map_index = (idx))
+#define psci_lock_get(node)		bakery_lock_get((node)->aff_map_index,	  \
+						CPU_DATA_PSCI_LOCK_OFFSET)
+#define psci_lock_release(node)		bakery_lock_release((node)->aff_map_index,\
+						CPU_DATA_PSCI_LOCK_OFFSET)
 #endif
 
 /*******************************************************************************
@@ -49,10 +57,15 @@
  ******************************************************************************/
 typedef struct aff_map_node {
 	unsigned long mpidr;
-	unsigned short ref_count;
+	unsigned char ref_count;
 	unsigned char state;
 	unsigned char level;
+#if USE_COHERENT_MEM
 	bakery_lock_t lock;
+#else
+	/* For indexing the bakery_info array in per CPU data */
+	unsigned char aff_map_index;
+#endif
 } aff_map_node_t;
 
 typedef struct aff_limits_node {
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index e0bc8331393414f2849a6e34869ab0667852ba91..a750256e349787f441f85be474be2c3252f53e07 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -181,7 +181,7 @@ static void psci_init_aff_map_node(unsigned long mpidr,
 	uint32_t linear_id;
 	psci_aff_map[idx].mpidr = mpidr;
 	psci_aff_map[idx].level = level;
-	bakery_lock_init(&psci_aff_map[idx].lock);
+	psci_lock_init(psci_aff_map, idx);
 
 	/*
 	 * If an affinity instance is present then mark it as OFF to begin with.