arm64: optimize __asm_{flush, invalidate}_dcache_all

__asm_dcache_all can directly return to the caller of
__asm_{flush,invalidate}_dcache_all.

We do not have to waste x16 register here.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: York Sun <york.sun@nxp.com>
diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
index a9f4fec..1c71a2f 100644
--- a/arch/arm/cpu/armv8/cache.S
+++ b/arch/arm/cpu/armv8/cache.S
@@ -104,19 +104,13 @@
 ENDPROC(__asm_dcache_all)
 
 ENTRY(__asm_flush_dcache_all)
-	mov	x16, lr
 	mov	x0, #0
-	bl	__asm_dcache_all
-	mov	lr, x16
-	ret
+	b	__asm_dcache_all
 ENDPROC(__asm_flush_dcache_all)
 
 ENTRY(__asm_invalidate_dcache_all)
-	mov	x16, lr
 	mov	x0, #0x1
-	bl	__asm_dcache_all
-	mov	lr, x16
-	ret
+	b	__asm_dcache_all
 ENDPROC(__asm_invalidate_dcache_all)
 
 /*