x86: Use fs for global data

Use the base address of the 'F' segment as a pointer to the global data
structure. By adding the linear address (i.e. the 'D' segment address) as
the first word of the global data structure, the address of the global data
relative to the 'D' segment can be found simply, for example, by:

	fs movl 0, %eax

This makes the gd 'pointer' writable prior to relocation (by reloading the
Global Desctriptor Table) which brings x86 into line with all other arches

NOTE: Writing to the gd 'pointer' is expensive (but we only do it
twice) but using it to access global data members (read and write) is
still fairly cheap

--
Changes for v2:
 - Rebased against changes made to patch #3
 - Removed extra indent
 - Tweaked commit message
diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c
index 8102fb9..8c3b92c 100644
--- a/arch/x86/cpu/cpu.c
+++ b/arch/x86/cpu/cpu.c
@@ -90,6 +90,37 @@
 	asm volatile("lgdtl %0\n" : : "m" (gdt));
 }
 
+void init_gd(gd_t *id, u64 *gdt_addr)
+{
+	id->gd_addr = (ulong)id;
+	setup_gdt(id, gdt_addr);
+}
+
+void setup_gdt(gd_t *id, u64 *gdt_addr)
+{
+	/* CS: code, read/execute, 4 GB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff);
+
+	/* DS: data, read/write, 4 GB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff);
+
+	/* FS: data, read/write, 4 GB, base (Global Data Pointer) */
+	gdt_addr[X86_GDT_ENTRY_32BIT_FS] = GDT_ENTRY(0xc093, (ulong)id, 0xfffff);
+
+	/* 16-bit CS: code, read/execute, 64 kB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff);
+
+	/* 16-bit DS: data, read/write, 64 kB, base 0 */
+	gdt_addr[X86_GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff);
+
+	load_gdt(gdt_addr, X86_GDT_NUM_ENTRIES);
+	load_ds(X86_GDT_ENTRY_32BIT_DS);
+	load_es(X86_GDT_ENTRY_32BIT_DS);
+	load_gs(X86_GDT_ENTRY_32BIT_DS);
+	load_ss(X86_GDT_ENTRY_32BIT_DS);
+	load_fs(X86_GDT_ENTRY_32BIT_FS);
+}
+
 int x86_cpu_init_f(void)
 {
 	const u32 em_rst = ~X86_CR0_EM;
@@ -117,28 +148,6 @@
 	    "movl	%%eax, %%cr0\n"
 	    "wbinvd\n" : : "i" (nw_cd_rst) : "eax");
 
-	/*
-	 * There are machines which are known to not boot with the GDT
-	 * being 8-byte unaligned. Intel recommends 16 byte alignment
-	 */
-	static const u64 boot_gdt[] __aligned(16) = {
-		/* CS: code, read/execute, 4 GB, base 0 */
-		[X86_GDT_ENTRY_32BIT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff),
-		/* DS: data, read/write, 4 GB, base 0 */
-		[X86_GDT_ENTRY_32BIT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff),
-		/* 16-bit CS: code, read/execute, 64 kB, base 0 */
-		[X86_GDT_ENTRY_16BIT_CS] = GDT_ENTRY(0x109b, 0, 0x0ffff),
-		/* 16-bit DS: data, read/write, 64 kB, base 0 */
-		[X86_GDT_ENTRY_16BIT_DS] = GDT_ENTRY(0x1093, 0, 0x0ffff),
-	};
-
-	load_gdt(boot_gdt, X86_GDT_NUM_ENTRIES);
-	load_ds(X86_GDT_ENTRY_32BIT_DS);
-	load_es(X86_GDT_ENTRY_32BIT_DS);
-	load_fs(X86_GDT_ENTRY_32BIT_DS);
-	load_gs(X86_GDT_ENTRY_32BIT_DS);
-	load_ss(X86_GDT_ENTRY_32BIT_DS);
-
 	/* Initialize core interrupt and exception functionality of CPU */
 	cpu_init_interrupts();
 	return 0;