socfpga: arria10: Improve bitstream loading speed

Apply some optimizations to speed up bitstream loading
(both for full and split periph/core bitstreams):

 * Change the size of the first fs read, so that all the subsequent
   reads are aligned to a specific value (called MAX_FIRST_LOAD_SIZE).
   This value was chosen so that in subsequent reads the fat fs driver
   doesn't have to allocate a temporary buffer in get_contents
   (assuming 8KiB clusters).

 * Change the buffer size to a larger value when reading to ddr
   (but not too large, because large transfers cause a stack overflow
   in the dwmmc driver).

Signed-off-by: Paweł Anikiel <pan@semihalf.com>
Reviewed-by: Simon Glass <sjg@chromium.org>
diff --git a/drivers/fpga/socfpga_arria10.c b/drivers/fpga/socfpga_arria10.c
index 798e3a3..07bfe30 100644
--- a/drivers/fpga/socfpga_arria10.c
+++ b/drivers/fpga/socfpga_arria10.c
@@ -30,6 +30,14 @@
 #define FPGA_TIMEOUT_MSEC	1000  /* timeout in ms */
 #define FPGA_TIMEOUT_CNT	0x1000000
 #define DEFAULT_DDR_LOAD_ADDRESS	0x400
+#define DDR_BUFFER_SIZE		0x100000
+
+/* When reading bitstream from a filesystem, the size of the first read is
+ * changed so that the subsequent reads are aligned to this value. This value
+ * was chosen so that in subsequent reads the fat fs driver doesn't have to
+ * allocate a temporary buffer in get_contents (assuming 8KiB clusters).
+ */
+#define MAX_FIRST_LOAD_SIZE	0x2000
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -526,7 +534,8 @@
 #ifdef CONFIG_FS_LOADER
 static int first_loading_rbf_to_buffer(struct udevice *dev,
 				struct fpga_loadfs_info *fpga_loadfs,
-				u32 *buffer, size_t *buffer_bsize)
+				u32 *buffer, size_t *buffer_bsize,
+				size_t *buffer_bsize_ori)
 {
 	u32 *buffer_p = (u32 *)*buffer;
 	u32 *loadable = buffer_p;
@@ -674,6 +683,7 @@
 		}
 
 		buffer_size = rbf_size;
+		*buffer_bsize_ori = DDR_BUFFER_SIZE;
 	}
 
 	debug("FPGA: External data: offset = 0x%x, size = 0x%x.\n",
@@ -686,11 +696,16 @@
 	 * chunk by chunk transfer is required due to smaller buffer size
 	 * compare to bitstream
 	 */
+
+	if (buffer_size > MAX_FIRST_LOAD_SIZE)
+		buffer_size = MAX_FIRST_LOAD_SIZE;
+
 	if (rbf_size <= buffer_size) {
 		/* Loading whole bitstream into buffer */
 		buffer_size = rbf_size;
 		fpga_loadfs->remaining = 0;
 	} else {
+		buffer_size -= rbf_offset % buffer_size;
 		fpga_loadfs->remaining -= buffer_size;
 	}
 
@@ -806,7 +821,8 @@
 	 * function below.
 	 */
 	ret = first_loading_rbf_to_buffer(dev, &fpga_loadfs, &buffer,
-					   &buffer_sizebytes);
+					   &buffer_sizebytes,
+					   &buffer_sizebytes_ori);
 	if (ret == 1) {
 		printf("FPGA: Skipping configuration ...\n");
 		return 0;