imx6: Added DEK blob generator command

Freescale's SEC block has built-in Data Encryption
Key(DEK) Blob Protocol which provides a method for
protecting a DEK for non-secure memory storage.
SEC block protects data in a data structure called
a Secret Key Blob, which provides both confidentiality
and integrity protection.
Every time the blob encapsulation is executed,
a AES-256 key is randomly generated to encrypt the DEK.
This key is encrypted with the OTP Secret key
from SoC. The resulting blob consists of the encrypted
AES-256 key, the encrypted DEK, and a 16-bit MAC.

During decapsulation, the reverse process is performed
to get back the original DEK. A caveat to the blob
decapsulation process,  is that the DEK is decrypted
in secure-memory and can only be read by FSL SEC HW.
The DEK is used to decrypt data during encrypted boot.

Commands added
--------------
  dek_blob - encapsulating DEK as a cryptgraphic blob

Commands Syntax
---------------
  dek_blob src dst len

    Encapsulate and create blob of a len-bits DEK at
    address src and store the result at address dst.

Signed-off-by: Raul Cardenas <Ulises.Cardenas@freescale.com>
Signed-off-by: Nitin Garg <nitin.garg@freescale.com>

Signed-off-by: Ulises Cardenas <ulises.cardenas@freescale.com>

Signed-off-by: Ulises Cardenas-B45798 <Ulises.Cardenas@freescale.com>
diff --git a/drivers/crypto/fsl/Makefile b/drivers/crypto/fsl/Makefile
index c0cf642..4aa91e4 100644
--- a/drivers/crypto/fsl/Makefile
+++ b/drivers/crypto/fsl/Makefile
@@ -8,5 +8,5 @@
 
 obj-y += sec.o
 obj-$(CONFIG_FSL_CAAM) += jr.o fsl_hash.o jobdesc.o error.o
-obj-$(CONFIG_CMD_BLOB) += fsl_blob.o
+obj-$(CONFIG_CMD_BLOB)$(CONFIG_CMD_DEKBLOB) += fsl_blob.o
 obj-$(CONFIG_RSA_FREESCALE_EXP) += fsl_rsa.o
diff --git a/drivers/crypto/fsl/desc.h b/drivers/crypto/fsl/desc.h
index 504f2b0..18e2ec8 100644
--- a/drivers/crypto/fsl/desc.h
+++ b/drivers/crypto/fsl/desc.h
@@ -12,11 +12,18 @@
 #ifndef DESC_H
 #define DESC_H
 
+#define KEY_BLOB_SIZE		32
+#define MAC_SIZE			16
+
 /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
 #define MAX_CAAM_DESCSIZE	64
 
+/* Size of DEK Blob  descriptor, inclusive of header */
+#define DEK_BLOB_DESCSIZE	9
+
 /* Block size of any entity covered/uncovered with a KEK/TKEK */
 #define KEK_BLOCKSIZE		16
+
 /*
  * Supported descriptor command types as they show up
  * inside a descriptor command word.
@@ -273,6 +280,13 @@
 #define LDLEN_SET_OFIFO_OFFSET_MASK	(3 << LDLEN_SET_OFIFO_OFFSET_SHIFT)
 
 /*
+ * AAD Definitions
+ */
+#define AES_KEY_SHIFT		8
+#define LD_CCM_MODE		0x66
+#define KEY_AES_SRC		(0x55 << AES_KEY_SHIFT)
+
+/*
  * FIFO_LOAD/FIFO_STORE/SEQ_FIFO_LOAD/SEQ_FIFO_STORE
  * Command Constructs
  */
@@ -418,6 +432,7 @@
 #define OP_PCLID_MASK		(0xff << 16)
 
 /* Assuming OP_TYPE = OP_TYPE_UNI_PROTOCOL */
+#define OP_PCLID_SECMEM		0x08
 #define OP_PCLID_BLOB		(0x0d << OP_PCLID_SHIFT)
 #define OP_PCLID_SECRETKEY	(0x11 << OP_PCLID_SHIFT)
 #define OP_PCLID_PUBLICKEYPAIR	(0x14 << OP_PCLID_SHIFT)
diff --git a/drivers/crypto/fsl/fsl_blob.c b/drivers/crypto/fsl/fsl_blob.c
index 9923bcb..8b25921 100644
--- a/drivers/crypto/fsl/fsl_blob.c
+++ b/drivers/crypto/fsl/fsl_blob.c
@@ -7,6 +7,8 @@
 
 #include <common.h>
 #include <malloc.h>
+#include <fsl_sec.h>
+#include <asm-generic/errno.h>
 #include "jobdesc.h"
 #include "desc.h"
 #include "jr.h"
@@ -59,3 +61,53 @@
 	free(desc);
 	return ret;
 }
+
+#ifdef CONFIG_CMD_DEKBLOB
+int blob_dek(const u8 *src, u8 *dst, u8 len)
+{
+	int ret, size, i = 0;
+	u32 *desc;
+
+	int out_sz =  WRP_HDR_SIZE + len + KEY_BLOB_SIZE + MAC_SIZE;
+
+	puts("\nEncapsulating provided DEK to form blob\n");
+	desc = memalign(ARCH_DMA_MINALIGN,
+			sizeof(uint32_t) * DEK_BLOB_DESCSIZE);
+	if (!desc) {
+		debug("Not enough memory for descriptor allocation\n");
+		return -ENOMEM;
+	}
+
+	ret = inline_cnstr_jobdesc_blob_dek(desc, src, dst, len);
+	if (ret) {
+		debug("Error in Job Descriptor Construction:  %d\n", ret);
+	} else {
+		size = roundup(sizeof(uint32_t) * DEK_BLOB_DESCSIZE,
+			      ARCH_DMA_MINALIGN);
+		flush_dcache_range((unsigned long)desc,
+				   (unsigned long)desc + size);
+		size = roundup(sizeof(uint8_t) * out_sz, ARCH_DMA_MINALIGN);
+		flush_dcache_range((unsigned long)dst,
+				   (unsigned long)dst + size);
+
+		ret = run_descriptor_jr(desc);
+	}
+
+	if (ret) {
+		debug("Error in Encapsulation %d\n", ret);
+	   goto err;
+	}
+
+	size = roundup(out_sz, ARCH_DMA_MINALIGN);
+	invalidate_dcache_range((unsigned long)dst, (unsigned long)dst+size);
+
+	puts("DEK Blob\n");
+	for (i = 0; i < out_sz; i++)
+		printf("%02X", ((uint8_t *)dst)[i]);
+	printf("\n");
+
+err:
+	free(desc);
+	return ret;
+}
+#endif
diff --git a/drivers/crypto/fsl/jobdesc.c b/drivers/crypto/fsl/jobdesc.c
index cc0dced..5695bef 100644
--- a/drivers/crypto/fsl/jobdesc.c
+++ b/drivers/crypto/fsl/jobdesc.c
@@ -9,12 +9,157 @@
  */
 
 #include <common.h>
+#include <fsl_sec.h>
 #include "desc_constr.h"
 #include "jobdesc.h"
 #include "rsa_caam.h"
 
-#define KEY_BLOB_SIZE			32
-#define MAC_SIZE			16
+#ifdef CONFIG_MX6
+/*!
+ * Secure memory run command
+ *
+ * @param   sec_mem_cmd  Secure memory command register
+ * @return  cmd_status  Secure memory command status register
+ */
+uint32_t secmem_set_cmd(uint32_t sec_mem_cmd)
+{
+	uint32_t temp_reg;
+
+	sec_out32(CAAM_SMCJR0, sec_mem_cmd);
+
+	do {
+		temp_reg = sec_in32(CAAM_SMCSJR0);
+	} while (temp_reg & CMD_COMPLETE);
+
+	return temp_reg;
+}
+
+/*!
+ * CAAM page allocation:
+ * Allocates a partition from secure memory, with the id
+ * equal to partion_num. This will de-allocate the page
+ * if it is already allocated. The partition will have
+ * full access permissions. The permissions are set before,
+ * running a job descriptor. A memory page of secure RAM
+ * is allocated for the partition.
+ *
+ * @param   page  Number of the page to allocate.
+ * @param   partition  Number of the partition to allocate.
+ * @return  0 on success, ERROR_IN_PAGE_ALLOC otherwise
+ */
+int caam_page_alloc(uint8_t page_num, uint8_t partition_num)
+{
+	uint32_t temp_reg;
+
+	/*
+	 * De-Allocate partition_num if already allocated to ARM core
+	 */
+	if (sec_in32(CAAM_SMPO_0) & PARTITION_OWNER(partition_num)) {
+		temp_reg = secmem_set_cmd(PARTITION(partition_num) |
+						CMD_PART_DEALLOC);
+		if (temp_reg & SMCSJR_AERR) {
+			printf("Error: De-allocation status 0x%X\n", temp_reg);
+			return ERROR_IN_PAGE_ALLOC;
+		}
+	}
+
+	/* set the access rights to allow full access */
+	sec_out32(CAAM_SMAG1JR0(partition_num), 0xF);
+	sec_out32(CAAM_SMAG2JR0(partition_num), 0xF);
+	sec_out32(CAAM_SMAPJR0(partition_num), 0xFF);
+
+	/* Now need to allocate partition_num of secure RAM. */
+	/* De-Allocate page_num by starting with a page inquiry command */
+	temp_reg = secmem_set_cmd(PAGE(page_num) | CMD_INQUIRY);
+
+	/* if the page is owned, de-allocate it */
+	if ((temp_reg & SMCSJR_PO) == PAGE_OWNED) {
+		temp_reg = secmem_set_cmd(PAGE(page_num) | CMD_PAGE_DEALLOC);
+		if (temp_reg & SMCSJR_AERR) {
+			printf("Error: Allocation status 0x%X\n", temp_reg);
+			return ERROR_IN_PAGE_ALLOC;
+		}
+	}
+
+	/* Allocate page_num to partition_num */
+	temp_reg = secmem_set_cmd(PAGE(page_num) | PARTITION(partition_num)
+						| CMD_PAGE_ALLOC);
+	if (temp_reg & SMCSJR_AERR) {
+		printf("Error: Allocation status 0x%X\n", temp_reg);
+		return ERROR_IN_PAGE_ALLOC;
+	}
+	/* page inquiry command to ensure that the page was allocated */
+	temp_reg = secmem_set_cmd(PAGE(page_num) | CMD_INQUIRY);
+
+	/* if the page is not owned => problem */
+	if ((temp_reg & SMCSJR_PO) != PAGE_OWNED) {
+		printf("Allocation of page %d in partition %d failed 0x%X\n",
+		       temp_reg, page_num, partition_num);
+
+		return ERROR_IN_PAGE_ALLOC;
+	}
+
+	return 0;
+}
+
+int inline_cnstr_jobdesc_blob_dek(uint32_t *desc, const uint8_t *plain_txt,
+				       uint8_t *dek_blob, uint32_t in_sz)
+{
+	uint32_t ret = 0;
+	u32 aad_w1, aad_w2;
+	/* output blob will have 32 bytes key blob in beginning and
+	 * 16 byte HMAC identifier at end of data blob */
+	uint32_t out_sz = in_sz + KEY_BLOB_SIZE + MAC_SIZE;
+	/* Setting HDR for blob */
+	uint8_t wrapped_key_hdr[8] = {HDR_TAG, 0x00, WRP_HDR_SIZE + out_sz,
+			     HDR_PAR, HAB_MOD, HAB_ALG, in_sz, HAB_FLG};
+
+	/* initialize the blob array */
+	memset(dek_blob, 0, out_sz + 8);
+	/* Copy the header into the DEK blob buffer */
+	memcpy(dek_blob, wrapped_key_hdr, sizeof(wrapped_key_hdr));
+
+	/* allocating secure memory */
+	ret = caam_page_alloc(PAGE_1, PARTITION_1);
+	if (ret)
+		return ret;
+
+	/* Write DEK to secure memory */
+	memcpy((uint32_t *)SEC_MEM_PAGE1, (uint32_t *)plain_txt, in_sz);
+
+	unsigned long start = (unsigned long)SEC_MEM_PAGE1 &
+				~(ARCH_DMA_MINALIGN - 1);
+	unsigned long end = ALIGN(start + 0x1000, ARCH_DMA_MINALIGN);
+	flush_dcache_range(start, end);
+
+	/* Now configure the access rights of the partition */
+	sec_out32(CAAM_SMAG1JR0(PARTITION_1), KS_G1); /* set group 1 */
+	sec_out32(CAAM_SMAG2JR0(PARTITION_1), 0);     /* clear group 2 */
+	sec_out32(CAAM_SMAPJR0(PARTITION_1), PERM);   /* set perm & locks */
+
+	/* construct aad for AES */
+	aad_w1 = (in_sz << OP_ALG_ALGSEL_SHIFT) | KEY_AES_SRC | LD_CCM_MODE;
+	aad_w2 = 0x0;
+
+	init_job_desc(desc, 0);
+
+	append_cmd(desc, CMD_LOAD | CLASS_2 | KEY_IMM | KEY_ENC |
+				(0x0c << LDST_OFFSET_SHIFT) | 0x08);
+
+	append_u32(desc, aad_w1);
+
+	append_u32(desc, aad_w2);
+
+	append_cmd_ptr(desc, (dma_addr_t)SEC_MEM_PAGE1, in_sz, CMD_SEQ_IN_PTR);
+
+	append_cmd_ptr(desc, (dma_addr_t)dek_blob + 8, out_sz, CMD_SEQ_OUT_PTR);
+
+	append_operation(desc, OP_TYPE_ENCAP_PROTOCOL | OP_PCLID_BLOB |
+						OP_PCLID_SECMEM);
+
+	return ret;
+}
+#endif
 
 void inline_cnstr_jobdesc_hash(uint32_t *desc,
 			  const uint8_t *msg, uint32_t msgsz, uint8_t *digest,
diff --git a/drivers/crypto/fsl/jobdesc.h b/drivers/crypto/fsl/jobdesc.h
index 84b3edd..112404c 100644
--- a/drivers/crypto/fsl/jobdesc.h
+++ b/drivers/crypto/fsl/jobdesc.h
@@ -14,6 +14,20 @@
 
 #define KEY_IDNFR_SZ_BYTES		16
 
+#ifdef CONFIG_CMD_DEKBLOB
+/* inline_cnstr_jobdesc_blob_dek:
+ * Intializes and constructs the job descriptor for DEK encapsulation
+ * using the given parameters.
+ * @desc: reference to the job descriptor
+ * @plain_txt: reference to the DEK
+ * @enc_blob: reference where to store the blob
+ * @in_sz: size in bytes of the DEK
+ * @return: 0 on success, ECONSTRJDESC otherwise
+ */
+int inline_cnstr_jobdesc_blob_dek(uint32_t *desc, const uint8_t *plain_txt,
+				uint8_t *enc_blob, uint32_t in_sz);
+#endif
+
 void inline_cnstr_jobdesc_hash(uint32_t *desc,
 			  const uint8_t *msg, uint32_t msgsz, uint8_t *digest,
 			  u32 alg_type, uint32_t alg_size, int sg_tbl);
diff --git a/drivers/crypto/fsl/jr.c b/drivers/crypto/fsl/jr.c
index f9d4938..f99d594 100644
--- a/drivers/crypto/fsl/jr.c
+++ b/drivers/crypto/fsl/jr.c
@@ -90,11 +90,13 @@
 	jr.liodn = DEFAULT_JR_LIODN;
 #endif
 	jr.size = JR_SIZE;
-	jr.input_ring = (dma_addr_t *)malloc(JR_SIZE * sizeof(dma_addr_t));
+	jr.input_ring = (dma_addr_t *)memalign(ARCH_DMA_MINALIGN,
+				JR_SIZE * sizeof(dma_addr_t));
 	if (!jr.input_ring)
 		return -1;
 	jr.output_ring =
-	    (struct op_ring *)malloc(JR_SIZE * sizeof(struct op_ring));
+	    (struct op_ring *)memalign(ARCH_DMA_MINALIGN,
+				JR_SIZE * sizeof(struct op_ring));
 	if (!jr.output_ring)
 		return -1;
 
@@ -163,13 +165,23 @@
 	    CIRC_SPACE(jr.head, jr.tail, jr.size) <= 0)
 		return -1;
 
-	jr.input_ring[head] = desc_phys_addr;
 	jr.info[head].desc_phys_addr = desc_phys_addr;
 	jr.info[head].desc_addr = (uint32_t)desc_addr;
 	jr.info[head].callback = (void *)callback;
 	jr.info[head].arg = arg;
 	jr.info[head].op_done = 0;
 
+	unsigned long start = (unsigned long)&jr.info[head] &
+					~(ARCH_DMA_MINALIGN - 1);
+	unsigned long end = ALIGN(start + sizeof(struct jr_info),
+					ARCH_DMA_MINALIGN);
+	flush_dcache_range(start, end);
+
+	jr.input_ring[head] = desc_phys_addr;
+	start = (unsigned long)&jr.input_ring[head] & ~(ARCH_DMA_MINALIGN - 1);
+	end = ALIGN(start + sizeof(dma_addr_t), ARCH_DMA_MINALIGN);
+	flush_dcache_range(start, end);
+
 	jr.head = (head + 1) & (jr.size - 1);
 
 	sec_out32(&regs->irja, 1);
@@ -187,6 +199,13 @@
 	void *arg = NULL;
 
 	while (sec_in32(&regs->orsf) && CIRC_CNT(jr.head, jr.tail, jr.size)) {
+		unsigned long start = (unsigned long)jr.output_ring &
+					~(ARCH_DMA_MINALIGN - 1);
+		unsigned long end = ALIGN(start +
+					  sizeof(struct op_ring)*JR_SIZE,
+					  ARCH_DMA_MINALIGN);
+		invalidate_dcache_range(start, end);
+
 		found = 0;
 
 		dma_addr_t op_desc = jr.output_ring[jr.tail].desc;
@@ -333,13 +352,17 @@
 
 	memset(&op, 0, sizeof(struct result));
 
-	desc = malloc(sizeof(int) * 6);
+	desc = memalign(ARCH_DMA_MINALIGN, sizeof(uint32_t) * 6);
 	if (!desc) {
 		printf("cannot allocate RNG init descriptor memory\n");
 		return -1;
 	}
 
 	inline_cnstr_jobdesc_rng_instantiation(desc);
+	int size = roundup(sizeof(uint32_t) * 6, ARCH_DMA_MINALIGN);
+	flush_dcache_range((unsigned long)desc,
+			   (unsigned long)desc + size);
+
 	ret = run_descriptor_jr(desc);
 
 	if (ret)