diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 987d30e04ef4fa2ab44fa448423991acfdff2095..1d2622974de1535ad684188a438fe58ae2950efd 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -81,6 +81,20 @@ func bl2_entrypoint
 	cmp	x0, x20
 	b.ne	_panic
 
+	/* ---------------------------------------------
+	 * Invalidate the RW memory used by the BL2
+	 * image. This includes the data and NOBITS
+	 * sections. This is done to safeguard against
+	 * possible corruption of this memory by dirty
+	 * cache lines in a system cache as a result of
+	 * use by an earlier boot loader stage.
+	 * ---------------------------------------------
+	 */
+	adr	x0, __RW_START__
+	adr	x1, __RW_END__
+	sub	x1, x1, x0
+	bl	inv_dcache_range
+
 	/* ---------------------------------------------
 	 * Zero out NOBITS sections. There are 2 of them:
 	 *   - the .bss section;
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index 33588e69806857e7a4de163131d3b1142f980134..a660bda63c51516ecd9ad2c83d15aa57714ea76e 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -68,6 +68,12 @@ SECTIONS
         __RO_END__ = .;
     } >RAM
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -121,6 +127,11 @@ SECTIONS
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL2_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 5ba0f9cc4b60588196bf97edc3f7a407c55e56ab..636b1d287c6f5f63cf6ba369470dd14056f8440d 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -113,5 +113,22 @@ func bl31_entrypoint
 	 */
 	bl	bl31_main
 
+	/* -------------------------------------------------------------
+	 * Clean the .data & .bss sections to main memory. This ensures
+	 * that any global data which was initialised by the primary CPU
+	 * is visible to secondary CPUs before they enable their data
+	 * caches and participate in coherency.
+	 * -------------------------------------------------------------
+	 */
+	adr	x0, __DATA_START__
+	adr	x1, __DATA_END__
+	sub	x1, x1, x0
+	bl	clean_dcache_range
+
+	adr	x0, __BSS_START__
+	adr	x1, __BSS_END__
+	sub	x1, x1, x0
+	bl	clean_dcache_range
+
 	b	el3_exit
 endfunc bl31_entrypoint
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 0639d8170afb13a4577f548b8fc185f31a8305fe..725079116738cbe6f51b3934f4c60848963f8e3a 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -81,6 +81,12 @@ SECTIONS
     ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
            "cpu_ops not defined for this platform.")
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -165,6 +171,11 @@ SECTIONS
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL31_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index a1a371058373a43e19021ee8e5f64fac43d6b394..a244a5cc968a9799f2f1c7087b91e69653e04734 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -87,9 +87,6 @@ void bl31_main(void)
 	INFO("BL3-1: Initializing runtime services\n");
 	runtime_svc_init();
 
-	/* Clean caches before re-entering normal world */
-	dcsw_op_all(DCCSW);
-
 	/*
 	 * All the cold boot actions on the primary cpu are done. We now need to
 	 * decide which is the next image (BL32 or BL33) and how to execute it.
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 4e8da7454fd16dfdb4b531f3cd9a81eec41cfd36..9732ff2cdf80200e0868ef9fe71500c95818a374 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -98,6 +98,20 @@ func tsp_entrypoint
 	msr	sctlr_el1, x0
 	isb
 
+	/* ---------------------------------------------
+	 * Invalidate the RW memory used by the BL32
+	 * image. This includes the data and NOBITS
+	 * sections. This is done to safeguard against
+	 * possible corruption of this memory by dirty
+	 * cache lines in a system cache as a result of
+	 * use by an earlier boot loader stage.
+	 * ---------------------------------------------
+	 */
+	adr	x0, __RW_START__
+	adr	x1, __RW_END__
+	sub	x1, x1, x0
+	bl	inv_dcache_range
+
 	/* ---------------------------------------------
 	 * Zero out NOBITS sections. There are 2 of them:
 	 *   - the .bss section;
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index d411ad021219dfba6566abedfc2cc2874b38c928..41c4b4aa18df90c889408bc6ec1abab22b14748c 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -62,6 +62,12 @@ SECTIONS
         __RO_END__ = .;
     } >RAM
 
+    /*
+     * Define a linker symbol to mark start of the RW memory area for this
+     * image.
+     */
+    __RW_START__ = . ;
+
     .data . : {
         __DATA_START__ = .;
         *(.data*)
@@ -119,6 +125,11 @@ SECTIONS
     } >RAM
 #endif
 
+    /*
+     * Define a linker symbol to mark the end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
     __BL32_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
diff --git a/include/common/el3_common_macros.S b/include/common/el3_common_macros.S
index 7946e728c5afbb88695ab234657773aeceb37a8f..87e172e84063682e9996441ddd9a8464b0eb5034 100644
--- a/include/common/el3_common_macros.S
+++ b/include/common/el3_common_macros.S
@@ -214,6 +214,21 @@
 	 * ---------------------------------------------------------------------
 	 */
 	.if \_init_c_runtime
+#if IMAGE_BL31
+		/* -------------------------------------------------------------
+		 * Invalidate the RW memory used by the BL31 image. This
+		 * includes the data and NOBITS sections. This is done to
+		 * safeguard against possible corruption of this memory by
+		 * dirty cache lines in a system cache as a result of use by
+		 * an earlier boot loader stage.
+		 * -------------------------------------------------------------
+		 */
+		adr	x0, __RW_START__
+		adr	x1, __RW_END__
+		sub	x1, x1, x0
+		bl	inv_dcache_range
+#endif /* IMAGE_BL31 */
+
 		ldr	x0, =__BSS_START__
 		ldr	x1, =__BSS_SIZE__
 		bl	zeromem16
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index b7ab3da69ab431f90b65327d8e3114fa97f76d51..d01ea315417d3ccba2193f24bd9be7c394df9220 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -145,6 +145,7 @@ DEFINE_SYSOP_TYPE_PARAM_FUNC(at, s12e0r)
 DEFINE_SYSOP_TYPE_PARAM_FUNC(at, s12e0w)
 
 void flush_dcache_range(uint64_t, uint64_t);
+void clean_dcache_range(uint64_t, uint64_t);
 void inv_dcache_range(uint64_t, uint64_t);
 void dcsw_op_louis(uint32_t);
 void dcsw_op_all(uint32_t);
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 0dbab1bdf561955cedb9672b9c0da8cdb875001c..476b906e068e02933377a88cfe77ca62aafdbf9d 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -32,6 +32,7 @@
 #include <asm_macros.S>
 
 	.globl	flush_dcache_range
+	.globl	clean_dcache_range
 	.globl	inv_dcache_range
 	.globl	dcsw_op_louis
 	.globl	dcsw_op_all
@@ -39,25 +40,39 @@
 	.globl	dcsw_op_level2
 	.globl	dcsw_op_level3
 
-	/* ------------------------------------------
-	 * Clean+Invalidate from base address till
-	 * size. 'x0' = addr, 'x1' = size
-	 * ------------------------------------------
-	 */
-func flush_dcache_range
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op
 	dcache_line_size x2, x3
 	add	x1, x0, x1
 	sub	x3, x2, #1
 	bic	x0, x0, x3
-flush_loop:
-	dc	civac, x0
+loop_\op:
+	dc	\op, x0
 	add	x0, x0, x2
 	cmp	x0, x1
-	b.lo    flush_loop
+	b.lo    loop_\op
 	dsb	sy
 	ret
+.endm
+	/* ------------------------------------------
+	 * Clean+Invalidate from base address till
+	 * size. 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func flush_dcache_range
+	do_dcache_maintenance_by_mva civac
 endfunc flush_dcache_range
 
+	/* ------------------------------------------
+	 * Clean from base address till size.
+	 * 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func clean_dcache_range
+	do_dcache_maintenance_by_mva cvac
+endfunc clean_dcache_range
 
 	/* ------------------------------------------
 	 * Invalidate from base address till
@@ -65,17 +80,7 @@ endfunc flush_dcache_range
 	 * ------------------------------------------
 	 */
 func inv_dcache_range
-	dcache_line_size x2, x3
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-inv_loop:
-	dc	ivac, x0
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo    inv_loop
-	dsb	sy
-	ret
+	do_dcache_maintenance_by_mva ivac
 endfunc inv_dcache_range
 
 
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index 5f80b597deadc44e4efe3f6c87a246c7e2007231..e7c246ea2374383320fb459839fdf53553ad697f 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -141,9 +141,6 @@ endfunc memcpy16
 
 /* ---------------------------------------------------------------------------
  * Disable the MMU at EL3
- * This is implemented in assembler to ensure that the data cache is cleaned
- * and invalidated after the MMU is disabled without any intervening cacheable
- * data accesses
  * ---------------------------------------------------------------------------
  */
 
@@ -154,8 +151,8 @@ do_disable_mmu:
 	bic	x0, x0, x1
 	msr	sctlr_el3, x0
 	isb				// ensure MMU is off
-	mov	x0, #DCCISW		// DCache clean and invalidate
-	b	dcsw_op_all
+	dsb	sy
+	ret
 endfunc disable_mmu_el3
 
 
diff --git a/services/std_svc/psci/psci_on.c b/services/std_svc/psci/psci_on.c
index cf1a782a51fa2738315dac63ce2641f0c2cce595..c37adc2efab56b42a88f2768664bfa8c94f9d725 100644
--- a/services/std_svc/psci/psci_on.c
+++ b/services/std_svc/psci/psci_on.c
@@ -203,7 +203,4 @@ void psci_cpu_on_finish(unsigned int cpu_idx,
 	 * call to set this cpu on its way.
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
-
-	/* Clean caches before re-entering normal world */
-	dcsw_op_louis(DCCSW);
 }
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index 7a80187398d07ba2afdbeda3dc5002856d7bc44d..cd1bb09236c59a9d841c33537eb3c81b746c12d1 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -221,18 +221,6 @@ int psci_setup(void)
 	psci_cpu_pd_nodes[plat_my_core_pos()].mpidr =
 		read_mpidr() & MPIDR_AFFINITY_MASK;
 
-#if !USE_COHERENT_MEM
-	/*
-	 * The psci_non_cpu_pd_nodes only needs flushing when it's not allocated in
-	 * coherent memory.
-	 */
-	flush_dcache_range((uintptr_t) &psci_non_cpu_pd_nodes,
-			   sizeof(psci_non_cpu_pd_nodes));
-#endif
-
-	flush_dcache_range((uintptr_t) &psci_cpu_pd_nodes,
-			   sizeof(psci_cpu_pd_nodes));
-
 	psci_init_req_local_pwr_states();
 
 	/*
diff --git a/services/std_svc/psci/psci_suspend.c b/services/std_svc/psci/psci_suspend.c
index 675ef9e26ad3b379ee50faf27ecb64a96345ec71..bd0c5dbcd751334fcc9287d3b91bd9de9947beae 100644
--- a/services/std_svc/psci/psci_suspend.c
+++ b/services/std_svc/psci/psci_suspend.c
@@ -261,7 +261,4 @@ void psci_cpu_suspend_finish(unsigned int cpu_idx,
 	 * call to set this cpu on its way.
 	 */
 	cm_prepare_el3_exit(NON_SECURE);
-
-	/* Clean caches before re-entering normal world */
-	dcsw_op_louis(DCCSW);
 }