Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (C) 2020 Marvell International Ltd. |
| 4 | */ |
| 5 | |
| 6 | #include <command.h> |
| 7 | #include <dm.h> |
| 8 | #include <hang.h> |
| 9 | #include <i2c.h> |
| 10 | #include <ram.h> |
| 11 | #include <time.h> |
| 12 | |
| 13 | #include <linux/bitops.h> |
| 14 | #include <linux/io.h> |
| 15 | |
| 16 | #include <mach/octeon_ddr.h> |
| 17 | |
| 18 | /* Random number generator stuff */ |
| 19 | |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 20 | #define CVMX_OCT_DID_RNG 8ULL |
| 21 | |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 22 | static u64 cvmx_rng_get_random64(void) |
| 23 | { |
| 24 | return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0)); |
| 25 | } |
| 26 | |
| 27 | static void cvmx_rng_enable(void) |
| 28 | { |
| 29 | u64 val; |
| 30 | |
| 31 | val = csr_rd(CVMX_RNM_CTL_STATUS); |
| 32 | val |= BIT(0) | BIT(1); |
| 33 | csr_wr(CVMX_RNM_CTL_STATUS, val); |
| 34 | } |
| 35 | |
| 36 | #define RLEVEL_PRINTALL_DEFAULT 1 |
| 37 | #define WLEVEL_PRINTALL_DEFAULT 1 |
| 38 | |
| 39 | /* |
| 40 | * Define how many HW WL samples to take for majority voting. |
| 41 | * MUST BE odd!! |
| 42 | * Assume there should only be 2 possible values that will show up, |
| 43 | * so treat ties as a problem!!! |
| 44 | * NOTE: Do not change this without checking the code!!! |
| 45 | */ |
| 46 | #define WLEVEL_LOOPS_DEFAULT 5 |
| 47 | |
| 48 | #define ENABLE_COMPUTED_VREF_ADJUSTMENT 1 |
| 49 | #define SW_WLEVEL_HW_DEFAULT 1 |
| 50 | #define DEFAULT_BEST_RANK_SCORE 9999999 |
| 51 | #define MAX_RANK_SCORE_LIMIT 99 |
| 52 | |
| 53 | /* |
| 54 | * Define how many HW RL samples per rank to take multiple samples will |
| 55 | * allow looking for the best sample score |
| 56 | */ |
| 57 | #define RLEVEL_SAMPLES_DEFAULT 3 |
| 58 | |
| 59 | #define ddr_seq_print(format, ...) do {} while (0) |
| 60 | |
| 61 | struct wlevel_bitcnt { |
| 62 | int bitcnt[4]; |
| 63 | }; |
| 64 | |
| 65 | static void display_dac_dbi_settings(int lmc, int dac_or_dbi, |
| 66 | int ecc_ena, int *settings, char *title); |
| 67 | |
| 68 | static unsigned short load_dac_override(struct ddr_priv *priv, int if_num, |
| 69 | int dac_value, int byte); |
| 70 | |
| 71 | /* "mode" arg */ |
| 72 | #define DBTRAIN_TEST 0 |
| 73 | #define DBTRAIN_DBI 1 |
| 74 | #define DBTRAIN_LFSR 2 |
| 75 | |
| 76 | static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr, |
| 77 | int mode, u64 *xor_data); |
| 78 | |
| 79 | #define LMC_DDR3_RESET_ASSERT 0 |
| 80 | #define LMC_DDR3_RESET_DEASSERT 1 |
| 81 | |
| 82 | static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset) |
| 83 | { |
| 84 | union cvmx_lmcx_reset_ctl reset_ctl; |
| 85 | |
| 86 | /* |
| 87 | * 4. Deassert DDRn_RESET_L pin by writing |
| 88 | * LMC(0..3)_RESET_CTL[DDR3RST] = 1 |
| 89 | * without modifying any other LMC(0..3)_RESET_CTL fields. |
| 90 | * 5. Read LMC(0..3)_RESET_CTL and wait for the result. |
| 91 | * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us |
| 92 | * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* |
| 93 | * assertion. |
| 94 | */ |
| 95 | debug("LMC%d %s DDR_RESET_L\n", if_num, |
| 96 | (reset == |
| 97 | LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting"); |
| 98 | |
| 99 | reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num)); |
| 100 | reset_ctl.cn78xx.ddr3rst = reset; |
| 101 | lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64); |
| 102 | |
| 103 | lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num)); |
| 104 | |
| 105 | udelay(500); |
| 106 | } |
| 107 | |
| 108 | static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num) |
| 109 | { |
| 110 | /* |
| 111 | * 5.9.6 LMC RESET Initialization |
| 112 | * |
| 113 | * The purpose of this step is to assert/deassert the RESET# pin at the |
| 114 | * DDR3/DDR4 parts. |
| 115 | * |
| 116 | * This LMC RESET step is done for all enabled LMCs. |
| 117 | * |
| 118 | * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts |
| 119 | * are in self refresh and are currently preserving their |
| 120 | * contents. (Software can determine this via |
| 121 | * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of |
| 122 | * this section assumes that the DRAM contents need not be preserved. |
| 123 | * |
| 124 | * The remainder of this section assumes that the CN78XX DDRn_RESET_L |
| 125 | * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts, |
| 126 | * as will be appropriate in many systems. |
| 127 | * |
| 128 | * (In other systems, such as ones that can preserve DDR3/DDR4 part |
| 129 | * contents while CN78XX is powered down, it will not be appropriate to |
| 130 | * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the |
| 131 | * DDR3/DDR4 parts, and this section may not apply.) |
| 132 | * |
| 133 | * The remainder of this section describes the sequence for LMCn. |
| 134 | * |
| 135 | * Perform the following six substeps for LMC reset initialization: |
| 136 | * |
| 137 | * 1. If not done already, assert DDRn_RESET_L pin by writing |
| 138 | * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other |
| 139 | * LMC(0..3)_RESET_CTL fields. |
| 140 | */ |
| 141 | |
| 142 | if (!ddr_memory_preserved(priv)) { |
| 143 | /* |
| 144 | * 2. Read LMC(0..3)_RESET_CTL and wait for the result. |
| 145 | */ |
| 146 | |
| 147 | lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num)); |
| 148 | |
| 149 | /* |
| 150 | * 3. Wait until RESET# assertion-time requirement from JEDEC |
| 151 | * DDR3/DDR4 specification is satisfied (200 us during a |
| 152 | * power-on ramp, 100ns when power is already stable). |
| 153 | */ |
| 154 | |
| 155 | udelay(200); |
| 156 | |
| 157 | /* |
| 158 | * 4. Deassert DDRn_RESET_L pin by writing |
| 159 | * LMC(0..3)_RESET_CTL[DDR3RST] = 1 |
| 160 | * without modifying any other LMC(0..3)_RESET_CTL fields. |
| 161 | * 5. Read LMC(0..3)_RESET_CTL and wait for the result. |
| 162 | * 6. Wait a minimum of 500us. This guarantees the necessary |
| 163 | * T = 500us delay between DDRn_RESET_L deassertion and |
| 164 | * DDRn_DIMM*_CKE* assertion. |
| 165 | */ |
| 166 | cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT); |
| 167 | |
| 168 | /* Toggle Reset Again */ |
| 169 | /* That is, assert, then de-assert, one more time */ |
| 170 | cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT); |
| 171 | cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT); |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num, |
| 176 | int sequence) |
| 177 | { |
| 178 | /* |
| 179 | * 3. Without changing any other fields in LMC(0)_CONFIG, write |
| 180 | * LMC(0)_CONFIG[RANKMASK] then write both |
| 181 | * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write |
| 182 | * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate |
| 183 | * the ranks that will participate in the sequence. |
| 184 | * |
| 185 | * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or |
| 186 | * selfrefresh exit, depending on whether the DRAM parts are in |
| 187 | * self-refresh and whether their contents should be preserved. While |
| 188 | * LMC performs these sequences, it will not perform any other DDR3 |
| 189 | * transactions. When the sequence is complete, hardware sets the |
| 190 | * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been |
| 191 | * initialized. |
| 192 | * |
| 193 | * If power-up/init is selected immediately following a DRESET |
| 194 | * assertion, LMC executes the sequence described in the "Reset and |
| 195 | * Initialization Procedure" section of the JEDEC DDR3 |
| 196 | * specification. This includes activating CKE, writing all four DDR3 |
| 197 | * mode registers on all selected ranks, and issuing the required |
| 198 | * ZQCL |
| 199 | * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks |
| 200 | * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1, |
| 201 | * LMC writes the JEDEC standard SSTE32882 control words selected by |
| 202 | * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and |
| 203 | * the first DDR3 mode register write operation. |
| 204 | * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the |
| 205 | * corresponding DIMM is not present. |
| 206 | * |
| 207 | * If self-refresh exit is selected, LMC executes the required SRX |
| 208 | * command followed by a refresh and ZQ calibration. Section 4.5 |
| 209 | * describes behavior of a REF + ZQCS. LMC does not write the DDR3 |
| 210 | * mode registers as part of this sequence, and the mode register |
| 211 | * parameters must match at self-refresh entry and exit times. |
| 212 | * |
| 213 | * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] |
| 214 | * to be set. |
| 215 | * |
| 216 | * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have |
| 217 | * been initialized. |
| 218 | */ |
| 219 | |
| 220 | union cvmx_lmcx_seq_ctl seq_ctl; |
| 221 | union cvmx_lmcx_config lmc_config; |
| 222 | int timeout; |
| 223 | |
| 224 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 225 | lmc_config.s.rankmask = rank_mask; |
| 226 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); |
| 227 | |
| 228 | seq_ctl.u64 = 0; |
| 229 | |
| 230 | seq_ctl.s.init_start = 1; |
| 231 | seq_ctl.s.seq_sel = sequence; |
| 232 | |
| 233 | ddr_seq_print |
| 234 | ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n", |
| 235 | rank_mask, sequence, sequence_str[sequence]); |
| 236 | |
| 237 | if (seq_ctl.s.seq_sel == 3) |
| 238 | debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num, |
| 239 | rank_mask); |
| 240 | |
| 241 | lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64); |
| 242 | lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num)); |
| 243 | |
| 244 | timeout = 100; |
| 245 | do { |
| 246 | udelay(100); /* Wait a while */ |
| 247 | seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num)); |
| 248 | if (--timeout == 0) { |
| 249 | printf("Sequence %d timed out\n", sequence); |
| 250 | break; |
| 251 | } |
| 252 | } while (seq_ctl.s.seq_complete != 1); |
| 253 | |
| 254 | ddr_seq_print(" LMC sequence=%x: Completed.\n", sequence); |
| 255 | } |
| 256 | |
| 257 | #define bdk_numa_get_address(n, p) ((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT) |
| 258 | #define AREA_BASE_OFFSET BIT_ULL(26) |
| 259 | |
| 260 | static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p, |
| 261 | u64 bitmask, u64 *xor_data) |
| 262 | { |
| 263 | u64 p1, p2, d1, d2; |
| 264 | u64 v, v1; |
| 265 | u64 p2offset = (1ULL << 26); // offset to area 2 |
| 266 | u64 datamask; |
| 267 | u64 xor; |
| 268 | u64 i, j, k; |
| 269 | u64 ii; |
| 270 | int errors = 0; |
| 271 | //u64 index; |
| 272 | u64 pattern1 = cvmx_rng_get_random64(); |
| 273 | u64 pattern2 = 0; |
| 274 | u64 bad_bits[2] = { 0, 0 }; |
| 275 | int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18; |
| 276 | union cvmx_l2c_ctl l2c_ctl; |
| 277 | int burst; |
| 278 | int saved_dissblkdty; |
| 279 | int node = 0; |
| 280 | |
| 281 | // Force full cacheline write-backs to boost traffic |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 282 | l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 283 | saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty; |
| 284 | l2c_ctl.cn78xx.dissblkdty = 1; |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 285 | l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 286 | |
| 287 | if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) |
| 288 | kbitno = 18; |
| 289 | |
| 290 | // Byte lanes may be clear in the mask to indicate no testing on that |
| 291 | //lane. |
| 292 | datamask = bitmask; |
| 293 | |
| 294 | /* |
| 295 | * Add offset to both test regions to not clobber boot stuff |
| 296 | * when running from L2 for NAND boot. |
| 297 | */ |
| 298 | p += AREA_BASE_OFFSET; // make sure base is out of the way of boot |
| 299 | |
| 300 | // final address must include LMC and node |
| 301 | p |= (lmc << 7); /* Map address into proper interface */ |
| 302 | p = bdk_numa_get_address(node, p); /* Map to node */ |
| 303 | p |= 1ull << 63; |
| 304 | |
| 305 | #define II_INC BIT_ULL(22) |
| 306 | #define II_MAX BIT_ULL(22) |
| 307 | #define K_INC BIT_ULL(14) |
| 308 | #define K_MAX BIT_ULL(kbitno) |
| 309 | #define J_INC BIT_ULL(9) |
| 310 | #define J_MAX BIT_ULL(12) |
| 311 | #define I_INC BIT_ULL(3) |
| 312 | #define I_MAX BIT_ULL(7) |
| 313 | |
| 314 | debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n", |
| 315 | node, lmc, __func__, p, p + p2offset, 1ULL << kbitno); |
| 316 | |
| 317 | // loops are ordered so that only a single 64-bit slot is written to |
| 318 | // each cacheline at one time, then the cachelines are forced out; |
| 319 | // this should maximize read/write traffic |
| 320 | |
| 321 | // FIXME? extend the range of memory tested!! |
| 322 | for (ii = 0; ii < II_MAX; ii += II_INC) { |
| 323 | for (i = 0; i < I_MAX; i += I_INC) { |
| 324 | for (k = 0; k < K_MAX; k += K_INC) { |
| 325 | for (j = 0; j < J_MAX; j += J_INC) { |
| 326 | p1 = p + ii + k + j; |
| 327 | p2 = p1 + p2offset; |
| 328 | |
| 329 | v = pattern1 * (p1 + i); |
| 330 | // write the same thing to both areas |
| 331 | v1 = v; |
| 332 | |
| 333 | cvmx_write64_uint64(p1 + i, v); |
| 334 | cvmx_write64_uint64(p2 + i, v1); |
| 335 | |
| 336 | CVMX_CACHE_WBIL2(p1, 0); |
| 337 | CVMX_CACHE_WBIL2(p2, 0); |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | CVMX_DCACHE_INVALIDATE; |
| 344 | |
| 345 | debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc); |
| 346 | |
| 347 | /* Make a series of passes over the memory areas. */ |
| 348 | |
| 349 | for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) { |
| 350 | u64 this_pattern = cvmx_rng_get_random64(); |
| 351 | |
| 352 | pattern2 ^= this_pattern; |
| 353 | |
| 354 | /* |
| 355 | * XOR the data with a random value, applying the change to both |
| 356 | * memory areas. |
| 357 | */ |
| 358 | |
| 359 | // FIXME? extend the range of memory tested!! |
| 360 | for (ii = 0; ii < II_MAX; ii += II_INC) { |
| 361 | // FIXME: rearranged, did not make much difference? |
| 362 | for (i = 0; i < I_MAX; i += I_INC) { |
| 363 | for (k = 0; k < K_MAX; k += K_INC) { |
| 364 | for (j = 0; j < J_MAX; j += J_INC) { |
| 365 | p1 = p + ii + k + j; |
| 366 | p2 = p1 + p2offset; |
| 367 | |
| 368 | v = cvmx_read64_uint64(p1 + |
| 369 | i) ^ |
| 370 | this_pattern; |
| 371 | v1 = cvmx_read64_uint64(p2 + |
| 372 | i) ^ |
| 373 | this_pattern; |
| 374 | |
| 375 | cvmx_write64_uint64(p1 + i, v); |
| 376 | cvmx_write64_uint64(p2 + i, v1); |
| 377 | |
| 378 | CVMX_CACHE_WBIL2(p1, 0); |
| 379 | CVMX_CACHE_WBIL2(p2, 0); |
| 380 | } |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | CVMX_DCACHE_INVALIDATE; |
| 386 | |
| 387 | debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n", |
| 388 | node, lmc); |
| 389 | |
| 390 | /* |
| 391 | * Look for differences in the areas. If there is a mismatch, |
| 392 | * reset both memory locations with the same pattern. Failing |
| 393 | * to do so means that on all subsequent passes the pair of |
| 394 | * locations remain out of sync giving spurious errors. |
| 395 | */ |
| 396 | |
| 397 | // FIXME: Change the loop order so that an entire cache line |
| 398 | // is compared at one time. This is so that a read |
| 399 | // error that occurs *anywhere* on the cacheline will |
| 400 | // be caught, rather than comparing only 1 cacheline |
| 401 | // slot at a time, where an error on a different |
| 402 | // slot will be missed that time around |
| 403 | // Does the above make sense? |
| 404 | |
| 405 | // FIXME? extend the range of memory tested!! |
| 406 | for (ii = 0; ii < II_MAX; ii += II_INC) { |
| 407 | for (k = 0; k < K_MAX; k += K_INC) { |
| 408 | for (j = 0; j < J_MAX; j += J_INC) { |
| 409 | p1 = p + ii + k + j; |
| 410 | p2 = p1 + p2offset; |
| 411 | |
| 412 | // process entire cachelines in the |
| 413 | //innermost loop |
| 414 | for (i = 0; i < I_MAX; i += I_INC) { |
| 415 | int bybit = 1; |
| 416 | // start in byte lane 0 |
| 417 | u64 bymsk = 0xffULL; |
| 418 | |
| 419 | // FIXME: this should predict |
| 420 | // what we find...??? |
| 421 | v = ((p1 + i) * pattern1) ^ |
| 422 | pattern2; |
| 423 | d1 = cvmx_read64_uint64(p1 + i); |
| 424 | d2 = cvmx_read64_uint64(p2 + i); |
| 425 | |
| 426 | // union of error bits only in |
| 427 | // active byte lanes |
| 428 | xor = ((d1 ^ v) | (d2 ^ v)) & |
| 429 | datamask; |
| 430 | |
| 431 | if (!xor) |
| 432 | continue; |
| 433 | |
| 434 | // accumulate bad bits |
| 435 | bad_bits[0] |= xor; |
| 436 | |
| 437 | while (xor != 0) { |
| 438 | debug("ERROR(%03d): [0x%016llX] [0x%016llX] expected 0x%016llX d1 %016llX d2 %016llX\n", |
| 439 | burst, p1, p2, v, |
| 440 | d1, d2); |
| 441 | // error(s) in this lane |
| 442 | if (xor & bymsk) { |
| 443 | // set the byte |
| 444 | // error bit |
| 445 | errors |= bybit; |
| 446 | // clear byte |
| 447 | // lane in |
| 448 | // error bits |
| 449 | xor &= ~bymsk; |
| 450 | // clear the |
| 451 | // byte lane in |
| 452 | // the mask |
| 453 | datamask &= ~bymsk; |
| 454 | #if EXIT_WHEN_ALL_LANES_HAVE_ERRORS |
| 455 | // nothing |
| 456 | // left to do |
| 457 | if (datamask == 0) { |
| 458 | return errors; |
| 459 | } |
| 460 | #endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */ |
| 461 | } |
| 462 | // move mask into |
| 463 | // next byte lane |
| 464 | bymsk <<= 8; |
| 465 | // move bit into next |
| 466 | // byte position |
| 467 | bybit <<= 1; |
| 468 | } |
| 469 | } |
| 470 | CVMX_CACHE_WBIL2(p1, 0); |
| 471 | CVMX_CACHE_WBIL2(p2, 0); |
| 472 | } |
| 473 | } |
| 474 | } |
| 475 | |
| 476 | debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n", |
| 477 | node, lmc); |
| 478 | } |
| 479 | |
| 480 | if (xor_data) { // send the bad bits back... |
| 481 | xor_data[0] = bad_bits[0]; |
| 482 | xor_data[1] = bad_bits[1]; // let it be zeroed |
| 483 | } |
| 484 | |
| 485 | // Restore original setting that could enable partial cacheline writes |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 486 | l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 487 | l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty; |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 488 | l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 489 | |
| 490 | return errors; |
| 491 | } |
| 492 | |
| 493 | static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank, |
| 494 | int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1) |
| 495 | { |
| 496 | union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; |
| 497 | |
| 498 | lmc_mr_mpr_ctl.u64 = 0; |
| 499 | lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr; |
| 500 | lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel; |
| 501 | lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank; |
| 502 | lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value = |
| 503 | (mr_wr_addr == -1) ? 1 : 0; |
| 504 | lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1; |
| 505 | lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); |
| 506 | |
| 507 | /* Mode Register Write */ |
| 508 | oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8); |
| 509 | } |
| 510 | |
| 511 | #define INV_A0_17(x) ((x) ^ 0x22bf8) |
| 512 | |
| 513 | static void set_mpr_mode(struct ddr_priv *priv, int rank_mask, |
| 514 | int if_num, int dimm_count, int mpr, int bg1) |
| 515 | { |
| 516 | int rankx; |
| 517 | |
| 518 | debug("All Ranks: Set mpr mode = %x %c-side\n", |
| 519 | mpr, (bg1 == 0) ? 'A' : 'B'); |
| 520 | |
| 521 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 522 | if (!(rank_mask & (1 << rankx))) |
| 523 | continue; |
| 524 | if (bg1 == 0) { |
| 525 | /* MR3 A-side */ |
| 526 | ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1); |
| 527 | } else { |
| 528 | /* MR3 B-side */ |
| 529 | ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3, |
| 530 | bg1); |
| 531 | } |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num, |
| 536 | int rank, int page, int location) |
| 537 | { |
| 538 | union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; |
| 539 | |
| 540 | lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num)); |
| 541 | lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0; |
| 542 | lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */ |
| 543 | lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank; |
| 544 | lmc_mr_mpr_ctl.cn70xx.mpr_loc = location; |
| 545 | lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0; /* Read=0, Write=1 */ |
| 546 | lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); |
| 547 | |
| 548 | /* MPR register access sequence */ |
| 549 | oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9); |
| 550 | |
| 551 | debug("LMC_MR_MPR_CTL : 0x%016llx\n", |
| 552 | lmc_mr_mpr_ctl.u64); |
| 553 | debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n", |
| 554 | lmc_mr_mpr_ctl.cn70xx.mr_wr_addr); |
| 555 | debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n", |
| 556 | lmc_mr_mpr_ctl.cn70xx.mr_wr_sel); |
| 557 | debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n", |
| 558 | lmc_mr_mpr_ctl.cn70xx.mpr_loc); |
| 559 | debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n", |
| 560 | lmc_mr_mpr_ctl.cn70xx.mpr_wr); |
| 561 | } |
| 562 | |
| 563 | static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable) |
| 564 | { |
| 565 | union cvmx_lmcx_control lmc_control; |
| 566 | int save_rdimm_mode; |
| 567 | |
| 568 | lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 569 | save_rdimm_mode = lmc_control.s.rdimm_ena; |
| 570 | lmc_control.s.rdimm_ena = enable; |
| 571 | debug("Setting RDIMM_ENA = %x\n", enable); |
| 572 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64); |
| 573 | |
| 574 | return save_rdimm_mode; |
| 575 | } |
| 576 | |
| 577 | static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank, |
| 578 | int page, int location, u64 *mpr_data) |
| 579 | { |
| 580 | do_ddr4_mpr_read(priv, if_num, rank, page, location); |
| 581 | |
| 582 | mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num)); |
| 583 | } |
| 584 | |
| 585 | /* Display MPR values for Page */ |
| 586 | static void display_mpr_page(struct ddr_priv *priv, int rank_mask, |
| 587 | int if_num, int page) |
| 588 | { |
| 589 | int rankx, location; |
| 590 | u64 mpr_data[3]; |
| 591 | |
| 592 | for (rankx = 0; rankx < 4; rankx++) { |
| 593 | if (!(rank_mask & (1 << rankx))) |
| 594 | continue; |
| 595 | |
| 596 | debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ", |
| 597 | if_num, rankx, page); |
| 598 | for (location = 0; location < 4; location++) { |
| 599 | ddr4_mpr_read(priv, if_num, rankx, page, location, |
| 600 | mpr_data); |
| 601 | debug("0x%02llx ", mpr_data[0] & 0xFF); |
| 602 | } |
| 603 | debug("\n"); |
| 604 | |
| 605 | } /* for (rankx = 0; rankx < 4; rankx++) */ |
| 606 | } |
| 607 | |
| 608 | static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank, |
| 609 | int page, int location, u8 mpr_data) |
| 610 | { |
| 611 | union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; |
| 612 | |
| 613 | lmc_mr_mpr_ctl.u64 = 0; |
| 614 | lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data; |
| 615 | lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */ |
| 616 | lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank; |
| 617 | lmc_mr_mpr_ctl.cn70xx.mpr_loc = location; |
| 618 | lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1; /* Read=0, Write=1 */ |
| 619 | lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); |
| 620 | |
| 621 | /* MPR register access sequence */ |
| 622 | oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9); |
| 623 | |
| 624 | debug("LMC_MR_MPR_CTL : 0x%016llx\n", |
| 625 | lmc_mr_mpr_ctl.u64); |
| 626 | debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n", |
| 627 | lmc_mr_mpr_ctl.cn70xx.mr_wr_addr); |
| 628 | debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n", |
| 629 | lmc_mr_mpr_ctl.cn70xx.mr_wr_sel); |
| 630 | debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n", |
| 631 | lmc_mr_mpr_ctl.cn70xx.mpr_loc); |
| 632 | debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n", |
| 633 | lmc_mr_mpr_ctl.cn70xx.mpr_wr); |
| 634 | } |
| 635 | |
| 636 | static void set_vref(struct ddr_priv *priv, int if_num, int rank, |
| 637 | int range, int value) |
| 638 | { |
| 639 | union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl; |
| 640 | union cvmx_lmcx_modereg_params3 lmc_modereg_params3; |
| 641 | int mr_wr_addr = 0; |
| 642 | |
| 643 | lmc_mr_mpr_ctl.u64 = 0; |
| 644 | lmc_modereg_params3.u64 = lmc_rd(priv, |
| 645 | CVMX_LMCX_MODEREG_PARAMS3(if_num)); |
| 646 | |
| 647 | /* A12:A10 tCCD_L */ |
| 648 | mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10; |
| 649 | mr_wr_addr |= 1 << 7; /* A7 1 = Enable(Training Mode) */ |
| 650 | mr_wr_addr |= range << 6; /* A6 vrefDQ Training Range */ |
| 651 | mr_wr_addr |= value << 0; /* A5:A0 vrefDQ Training Value */ |
| 652 | |
| 653 | lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr; |
| 654 | lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6; /* Write MR6 */ |
| 655 | lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank; |
| 656 | lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); |
| 657 | |
| 658 | /* 0x8 = Mode Register Write */ |
| 659 | oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8); |
| 660 | |
| 661 | /* |
| 662 | * It is vendor specific whether vref_value is captured with A7=1. |
| 663 | * A subsequent MRS might be necessary. |
| 664 | */ |
| 665 | oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8); |
| 666 | |
| 667 | mr_wr_addr &= ~(1 << 7); /* A7 0 = Disable(Training Mode) */ |
| 668 | lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr; |
| 669 | lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64); |
| 670 | } |
| 671 | |
| 672 | static void set_dram_output_inversion(struct ddr_priv *priv, int if_num, |
| 673 | int dimm_count, int rank_mask, |
| 674 | int inversion) |
| 675 | { |
| 676 | union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl; |
| 677 | union cvmx_lmcx_dimmx_params lmc_dimmx_params; |
| 678 | union cvmx_lmcx_dimm_ctl lmc_dimm_ctl; |
| 679 | int dimm_no; |
| 680 | |
| 681 | /* Don't touch extenced register control words */ |
| 682 | lmc_ddr4_dimm_ctl.u64 = 0; |
| 683 | lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64); |
| 684 | |
| 685 | debug("All DIMMs: Register Control Word RC0 : %x\n", |
| 686 | (inversion & 1)); |
| 687 | |
| 688 | for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) { |
| 689 | lmc_dimmx_params.u64 = |
| 690 | lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num)); |
| 691 | lmc_dimmx_params.s.rc0 = |
| 692 | (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1); |
| 693 | |
| 694 | lmc_wr(priv, |
| 695 | CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num), |
| 696 | lmc_dimmx_params.u64); |
| 697 | } |
| 698 | |
| 699 | /* LMC0_DIMM_CTL */ |
| 700 | lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); |
| 701 | lmc_dimm_ctl.s.dimm0_wmask = 0x1; |
| 702 | lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000; |
| 703 | |
| 704 | debug("LMC DIMM_CTL : 0x%016llx\n", |
| 705 | lmc_dimm_ctl.u64); |
| 706 | lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64); |
| 707 | |
| 708 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); /* Init RCW */ |
| 709 | } |
| 710 | |
| 711 | static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask, |
| 712 | int if_num, int dimm_count, int pattern, |
| 713 | int location_mask) |
| 714 | { |
| 715 | int rankx; |
| 716 | int location; |
| 717 | |
| 718 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 719 | if (!(rank_mask & (1 << rankx))) |
| 720 | continue; |
| 721 | for (location = 0; location < 4; ++location) { |
| 722 | if (!(location_mask & (1 << location))) |
| 723 | continue; |
| 724 | |
| 725 | ddr4_mpr_write(priv, if_num, rankx, |
| 726 | /* page */ 0, /* location */ location, |
| 727 | pattern); |
| 728 | } |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask, |
| 733 | int if_num, int dimm_count) |
| 734 | { |
| 735 | int save_ref_zqcs_int; |
| 736 | union cvmx_lmcx_config lmc_config; |
| 737 | |
| 738 | /* |
| 739 | * Okay, here is the latest sequence. This should work for all |
| 740 | * chips and passes (78,88,73,etc). This sequence should be run |
| 741 | * immediately after DRAM INIT. The basic idea is to write the |
| 742 | * same pattern into each of the 4 MPR locations in the DRAM, so |
| 743 | * that the same value is returned when doing MPR reads regardless |
| 744 | * of the inversion state. My advice is to put this into a |
| 745 | * function, change_rdimm_mpr_pattern or something like that, so |
| 746 | * that it can be called multiple times, as I think David wants a |
| 747 | * clock-like pattern for OFFSET training, but does not want a |
| 748 | * clock pattern for Bit-Deskew. You should then be able to call |
| 749 | * this at any point in the init sequence (after DRAM init) to |
| 750 | * change the pattern to a new value. |
| 751 | * Mike |
| 752 | * |
| 753 | * A correction: PHY doesn't need any pattern during offset |
| 754 | * training, but needs clock like pattern for internal vref and |
| 755 | * bit-dskew training. So for that reason, these steps below have |
| 756 | * to be conducted before those trainings to pre-condition |
| 757 | * the pattern. David |
| 758 | * |
| 759 | * Note: Step 3, 4, 8 and 9 have to be done through RDIMM |
| 760 | * sequence. If you issue MRW sequence to do RCW write (in o78 pass |
| 761 | * 1 at least), LMC will still do two commands because |
| 762 | * CONTROL[RDIMM_ENA] is still set high. We don't want it to have |
| 763 | * any unintentional mode register write so it's best to do what |
| 764 | * Mike is doing here. |
| 765 | * Andrew |
| 766 | */ |
| 767 | |
| 768 | /* 1) Disable refresh (REF_ZQCS_INT = 0) */ |
| 769 | |
| 770 | debug("1) Disable refresh (REF_ZQCS_INT = 0)\n"); |
| 771 | |
| 772 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 773 | save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int; |
| 774 | lmc_config.cn78xx.ref_zqcs_int = 0; |
| 775 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); |
| 776 | |
| 777 | /* |
| 778 | * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8) |
| 779 | * with MODEREG_PARAMS0[MPRLOC]=0, |
| 780 | * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and |
| 781 | * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) |
| 782 | */ |
| 783 | |
| 784 | debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n"); |
| 785 | |
| 786 | /* A-side */ |
| 787 | set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0); |
| 788 | /* B-side */ |
| 789 | set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1); |
| 790 | |
| 791 | /* |
| 792 | * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set |
| 793 | * the value you would like directly into |
| 794 | * MR_MPR_CTL[MR_WR_ADDR] |
| 795 | */ |
| 796 | |
| 797 | /* |
| 798 | * 3) Disable RCD Parity (if previously enabled) - parity does not |
| 799 | * work if inversion disabled |
| 800 | */ |
| 801 | |
| 802 | debug("3) Disable RCD Parity\n"); |
| 803 | |
| 804 | /* |
| 805 | * 4) Disable Inversion in the RCD. |
| 806 | * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it |
| 807 | * may be easier to use the MRW sequence (seq_sel=8). Just set |
| 808 | * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data, |
| 809 | * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg |
| 810 | */ |
| 811 | |
| 812 | debug("4) Disable Inversion in the RCD.\n"); |
| 813 | |
| 814 | set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1); |
| 815 | |
| 816 | /* |
| 817 | * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out |
| 818 | * non-inverted. |
| 819 | */ |
| 820 | |
| 821 | debug("5) Disable CONTROL[RDIMM_ENA]\n"); |
| 822 | |
| 823 | set_rdimm_mode(priv, if_num, 0); |
| 824 | |
| 825 | /* |
| 826 | * 6) Write all 4 MPR registers with the desired pattern (have to |
| 827 | * do this for all enabled ranks) |
| 828 | * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3, |
| 829 | * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern |
| 830 | */ |
| 831 | |
| 832 | debug("6) Write all 4 MPR page 0 Training Patterns\n"); |
| 833 | |
| 834 | write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8); |
| 835 | |
| 836 | /* 7) Re-enable RDIMM_ENA */ |
| 837 | |
| 838 | debug("7) Re-enable RDIMM_ENA\n"); |
| 839 | |
| 840 | set_rdimm_mode(priv, if_num, 1); |
| 841 | |
| 842 | /* 8) Re-enable RDIMM inversion */ |
| 843 | |
| 844 | debug("8) Re-enable RDIMM inversion\n"); |
| 845 | |
| 846 | set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0); |
| 847 | |
| 848 | /* 9) Re-enable RDIMM parity (if desired) */ |
| 849 | |
| 850 | debug("9) Re-enable RDIMM parity (if desired)\n"); |
| 851 | |
| 852 | /* |
| 853 | * 10)Take B-side devices out of MPR mode (Run MRW sequence |
| 854 | * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0, |
| 855 | * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and |
| 856 | * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) |
| 857 | */ |
| 858 | |
| 859 | debug("10)Take B-side devices out of MPR mode\n"); |
| 860 | |
| 861 | set_mpr_mode(priv, rank_mask, if_num, dimm_count, |
| 862 | /* mpr */ 0, /* bg1 */ 1); |
| 863 | |
| 864 | /* |
| 865 | * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and |
| 866 | * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR] |
| 867 | */ |
| 868 | |
| 869 | /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */ |
| 870 | |
| 871 | debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n"); |
| 872 | |
| 873 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 874 | lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int; |
| 875 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); |
| 876 | } |
| 877 | |
| 878 | static int validate_hwl_seq(int *wl, int *seq) |
| 879 | { |
| 880 | // sequence index, step through the sequence array |
| 881 | int seqx; |
| 882 | int bitnum; |
| 883 | |
| 884 | seqx = 0; |
| 885 | |
| 886 | while (seq[seqx + 1] >= 0) { // stop on next seq entry == -1 |
| 887 | // but now, check current versus next |
| 888 | bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]]; |
| 889 | // magic validity number (see matrix above) |
| 890 | if (!((1 << bitnum) & 0xBDE7)) |
| 891 | return 1; |
| 892 | seqx++; |
| 893 | } |
| 894 | |
| 895 | return 0; |
| 896 | } |
| 897 | |
| 898 | static int validate_hw_wl_settings(int if_num, |
| 899 | union cvmx_lmcx_wlevel_rankx |
| 900 | *lmc_wlevel_rank, int is_rdimm, int ecc_ena) |
| 901 | { |
| 902 | int wl[9], byte, errors; |
| 903 | |
| 904 | // arrange the sequences so |
| 905 | // index 0 has byte 0, etc, ECC in middle |
| 906 | int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 }; |
| 907 | // index 0 is ECC, then go down |
| 908 | int rseq1[] = { 8, 3, 2, 1, 0, -1 }; |
| 909 | // index 0 has byte 4, then go up |
| 910 | int rseq2[] = { 4, 5, 6, 7, -1 }; |
| 911 | // index 0 has byte 0, etc, no ECC |
| 912 | int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 }; |
| 913 | // index 0 is byte 3, then go down, no ECC |
| 914 | int rseq1no[] = { 3, 2, 1, 0, -1 }; |
| 915 | |
| 916 | // in the CSR, bytes 0-7 are always data, byte 8 is ECC |
| 917 | for (byte = 0; byte < (8 + ecc_ena); byte++) { |
| 918 | // preprocess :-) |
| 919 | wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >> |
| 920 | 1) & 3; |
| 921 | } |
| 922 | |
| 923 | errors = 0; |
| 924 | if (is_rdimm) { // RDIMM order |
| 925 | errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no); |
| 926 | errors += validate_hwl_seq(wl, rseq2); |
| 927 | } else { // UDIMM order |
| 928 | errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno); |
| 929 | } |
| 930 | |
| 931 | return errors; |
| 932 | } |
| 933 | |
| 934 | static unsigned int extr_wr(u64 u, int x) |
| 935 | { |
| 936 | return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) | |
| 937 | ((u >> (51 + x - 2)) & 0x4ULL)); |
| 938 | } |
| 939 | |
| 940 | static void insrt_wr(u64 *up, int x, int v) |
| 941 | { |
| 942 | u64 u = *up; |
| 943 | |
| 944 | u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x))); |
| 945 | *up = (u | ((v & 0x3ULL) << (x * 12 + 5)) | |
| 946 | ((v & 0x4ULL) << (51 + x - 2))); |
| 947 | } |
| 948 | |
| 949 | /* Read out Deskew Settings for DDR */ |
| 950 | |
| 951 | struct deskew_bytes { |
| 952 | u16 bits[8]; |
| 953 | }; |
| 954 | |
| 955 | struct deskew_data { |
| 956 | struct deskew_bytes bytes[9]; |
| 957 | }; |
| 958 | |
| 959 | struct dac_data { |
| 960 | int bytes[9]; |
| 961 | }; |
| 962 | |
| 963 | // T88 pass 1, skip 4=DAC |
| 964 | static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 }; |
| 965 | // T88 Pass 2, skip 4=DAC and 5=DBI |
| 966 | static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 }; |
| 967 | |
| 968 | static void get_deskew_settings(struct ddr_priv *priv, int if_num, |
| 969 | struct deskew_data *dskdat) |
| 970 | { |
| 971 | union cvmx_lmcx_phy_ctl phy_ctl; |
| 972 | union cvmx_lmcx_config lmc_config; |
| 973 | int bit_index; |
| 974 | int byte_lane, byte_limit; |
| 975 | // NOTE: these are for pass 2.x |
| 976 | int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X); |
| 977 | const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1; |
| 978 | |
| 979 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 980 | byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena; |
| 981 | |
| 982 | memset(dskdat, 0, sizeof(*dskdat)); |
| 983 | |
| 984 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 985 | phy_ctl.s.dsk_dbg_clk_scaler = 3; |
| 986 | |
| 987 | for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { |
| 988 | phy_ctl.s.dsk_dbg_byte_sel = byte_lane; // set byte lane |
| 989 | |
| 990 | for (bit_index = 0; bit_index < 8; ++bit_index) { |
| 991 | // set bit number and start read sequence |
| 992 | phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index]; |
| 993 | phy_ctl.s.dsk_dbg_rd_start = 1; |
| 994 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 995 | |
| 996 | // poll for read sequence to complete |
| 997 | do { |
| 998 | phy_ctl.u64 = |
| 999 | lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1000 | } while (phy_ctl.s.dsk_dbg_rd_complete != 1); |
| 1001 | |
| 1002 | // record the data |
| 1003 | dskdat->bytes[byte_lane].bits[bit_index] = |
| 1004 | phy_ctl.s.dsk_dbg_rd_data & 0x3ff; |
| 1005 | } |
| 1006 | } |
| 1007 | } |
| 1008 | |
| 1009 | static void display_deskew_settings(struct ddr_priv *priv, int if_num, |
| 1010 | struct deskew_data *dskdat, |
| 1011 | int print_enable) |
| 1012 | { |
| 1013 | int byte_lane; |
| 1014 | int bit_num; |
| 1015 | u16 flags, deskew; |
| 1016 | union cvmx_lmcx_config lmc_config; |
| 1017 | int byte_limit; |
| 1018 | const char *fc = " ?-=+*#&"; |
| 1019 | |
| 1020 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 1021 | byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; |
| 1022 | |
| 1023 | if (print_enable) { |
| 1024 | debug("N0.LMC%d: Deskew Data: Bit => :", |
| 1025 | if_num); |
| 1026 | for (bit_num = 7; bit_num >= 0; --bit_num) |
| 1027 | debug(" %3d ", bit_num); |
| 1028 | debug("\n"); |
| 1029 | } |
| 1030 | |
| 1031 | for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { |
| 1032 | if (print_enable) |
| 1033 | debug("N0.LMC%d: Bit Deskew Byte %d %s :", |
| 1034 | if_num, byte_lane, |
| 1035 | (print_enable >= 3) ? "FINAL" : " "); |
| 1036 | |
| 1037 | for (bit_num = 7; bit_num >= 0; --bit_num) { |
| 1038 | flags = dskdat->bytes[byte_lane].bits[bit_num] & 7; |
| 1039 | deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3; |
| 1040 | |
| 1041 | if (print_enable) |
| 1042 | debug(" %3d %c", deskew, fc[flags ^ 1]); |
| 1043 | |
| 1044 | } /* for (bit_num = 7; bit_num >= 0; --bit_num) */ |
| 1045 | |
| 1046 | if (print_enable) |
| 1047 | debug("\n"); |
| 1048 | } |
| 1049 | } |
| 1050 | |
| 1051 | static void override_deskew_settings(struct ddr_priv *priv, int if_num, |
| 1052 | struct deskew_data *dskdat) |
| 1053 | { |
| 1054 | union cvmx_lmcx_phy_ctl phy_ctl; |
| 1055 | union cvmx_lmcx_config lmc_config; |
| 1056 | |
| 1057 | int bit, byte_lane, byte_limit; |
| 1058 | u64 csr_data; |
| 1059 | |
| 1060 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 1061 | byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; |
| 1062 | |
| 1063 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1064 | |
| 1065 | phy_ctl.s.phy_reset = 0; |
| 1066 | phy_ctl.s.dsk_dbg_num_bits_sel = 1; |
| 1067 | phy_ctl.s.dsk_dbg_offset = 0; |
| 1068 | phy_ctl.s.dsk_dbg_clk_scaler = 3; |
| 1069 | |
| 1070 | phy_ctl.s.dsk_dbg_wr_mode = 1; |
| 1071 | phy_ctl.s.dsk_dbg_load_dis = 0; |
| 1072 | phy_ctl.s.dsk_dbg_overwrt_ena = 0; |
| 1073 | |
| 1074 | phy_ctl.s.phy_dsk_reset = 0; |
| 1075 | |
| 1076 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1077 | lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1078 | |
| 1079 | for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { |
| 1080 | csr_data = 0; |
| 1081 | // FIXME: can we ignore DBI? |
| 1082 | for (bit = 0; bit < 8; ++bit) { |
| 1083 | // fetch input and adjust |
| 1084 | u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) & |
| 1085 | 0x7F; |
| 1086 | |
| 1087 | /* |
| 1088 | * lmc_general_purpose0.data[6:0] // DQ0 |
| 1089 | * lmc_general_purpose0.data[13:7] // DQ1 |
| 1090 | * lmc_general_purpose0.data[20:14] // DQ2 |
| 1091 | * lmc_general_purpose0.data[27:21] // DQ3 |
| 1092 | * lmc_general_purpose0.data[34:28] // DQ4 |
| 1093 | * lmc_general_purpose0.data[41:35] // DQ5 |
| 1094 | * lmc_general_purpose0.data[48:42] // DQ6 |
| 1095 | * lmc_general_purpose0.data[55:49] // DQ7 |
| 1096 | * lmc_general_purpose0.data[62:56] // DBI |
| 1097 | */ |
| 1098 | csr_data |= (bits << (7 * bit)); |
| 1099 | |
| 1100 | } /* for (bit = 0; bit < 8; ++bit) */ |
| 1101 | |
| 1102 | // update GP0 with the bit data for this byte lane |
| 1103 | lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data); |
| 1104 | lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num)); |
| 1105 | |
| 1106 | // start the deskew load sequence |
| 1107 | phy_ctl.s.dsk_dbg_byte_sel = byte_lane; |
| 1108 | phy_ctl.s.dsk_dbg_rd_start = 1; |
| 1109 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1110 | |
| 1111 | // poll for read sequence to complete |
| 1112 | do { |
| 1113 | udelay(100); |
| 1114 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1115 | } while (phy_ctl.s.dsk_dbg_rd_complete != 1); |
| 1116 | } |
| 1117 | |
| 1118 | // tell phy to use the new settings |
| 1119 | phy_ctl.s.dsk_dbg_overwrt_ena = 1; |
| 1120 | phy_ctl.s.dsk_dbg_rd_start = 0; |
| 1121 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1122 | |
| 1123 | phy_ctl.s.dsk_dbg_wr_mode = 0; |
| 1124 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1125 | } |
| 1126 | |
| 1127 | static void process_by_rank_dac(struct ddr_priv *priv, int if_num, |
| 1128 | int rank_mask, struct dac_data *dacdat) |
| 1129 | { |
| 1130 | union cvmx_lmcx_config lmc_config; |
| 1131 | int rankx, byte_lane; |
| 1132 | int byte_limit; |
| 1133 | int rank_count; |
| 1134 | struct dac_data dacsum; |
| 1135 | int lane_probs; |
| 1136 | |
| 1137 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 1138 | byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; |
| 1139 | |
| 1140 | memset((void *)&dacsum, 0, sizeof(dacsum)); |
| 1141 | rank_count = 0; |
| 1142 | lane_probs = 0; |
| 1143 | |
| 1144 | for (rankx = 0; rankx < 4; rankx++) { |
| 1145 | if (!(rank_mask & (1 << rankx))) |
| 1146 | continue; |
| 1147 | rank_count++; |
| 1148 | |
| 1149 | display_dac_dbi_settings(if_num, /*dac */ 1, |
| 1150 | lmc_config.s.ecc_ena, |
| 1151 | &dacdat[rankx].bytes[0], |
| 1152 | "By-Ranks VREF"); |
| 1153 | // sum |
| 1154 | for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { |
| 1155 | if (rank_count == 2) { |
| 1156 | int ranks_diff = |
| 1157 | abs((dacsum.bytes[byte_lane] - |
| 1158 | dacdat[rankx].bytes[byte_lane])); |
| 1159 | |
| 1160 | // FIXME: is 19 a good number? |
| 1161 | if (ranks_diff > 19) |
| 1162 | lane_probs |= (1 << byte_lane); |
| 1163 | } |
| 1164 | dacsum.bytes[byte_lane] += |
| 1165 | dacdat[rankx].bytes[byte_lane]; |
| 1166 | } |
| 1167 | } |
| 1168 | |
| 1169 | // average |
| 1170 | for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) |
| 1171 | dacsum.bytes[byte_lane] /= rank_count; // FIXME: nint? |
| 1172 | |
| 1173 | display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena, |
| 1174 | &dacsum.bytes[0], "All-Rank VREF"); |
| 1175 | |
| 1176 | if (lane_probs) { |
| 1177 | debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n", |
| 1178 | if_num, lane_probs); |
| 1179 | } |
| 1180 | |
| 1181 | // finally, write the averaged DAC values |
| 1182 | for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { |
| 1183 | load_dac_override(priv, if_num, dacsum.bytes[byte_lane], |
| 1184 | byte_lane); |
| 1185 | } |
| 1186 | } |
| 1187 | |
| 1188 | static void process_by_rank_dsk(struct ddr_priv *priv, int if_num, |
| 1189 | int rank_mask, struct deskew_data *dskdat) |
| 1190 | { |
| 1191 | union cvmx_lmcx_config lmc_config; |
| 1192 | int rankx, lane, bit; |
| 1193 | int byte_limit; |
| 1194 | struct deskew_data dsksum, dskcnt; |
| 1195 | u16 deskew; |
| 1196 | |
| 1197 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 1198 | byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena; |
| 1199 | |
| 1200 | memset((void *)&dsksum, 0, sizeof(dsksum)); |
| 1201 | memset((void *)&dskcnt, 0, sizeof(dskcnt)); |
| 1202 | |
| 1203 | for (rankx = 0; rankx < 4; rankx++) { |
| 1204 | if (!(rank_mask & (1 << rankx))) |
| 1205 | continue; |
| 1206 | |
| 1207 | // sum ranks |
| 1208 | for (lane = 0; lane < byte_limit; lane++) { |
| 1209 | for (bit = 0; bit < 8; ++bit) { |
| 1210 | deskew = dskdat[rankx].bytes[lane].bits[bit]; |
| 1211 | // if flags indicate sat hi or lo, skip it |
| 1212 | if (deskew & 6) |
| 1213 | continue; |
| 1214 | |
| 1215 | // clear flags |
| 1216 | dsksum.bytes[lane].bits[bit] += |
| 1217 | deskew & ~7; |
| 1218 | // count entries |
| 1219 | dskcnt.bytes[lane].bits[bit] += 1; |
| 1220 | } |
| 1221 | } |
| 1222 | } |
| 1223 | |
| 1224 | // average ranks |
| 1225 | for (lane = 0; lane < byte_limit; lane++) { |
| 1226 | for (bit = 0; bit < 8; ++bit) { |
| 1227 | int div = dskcnt.bytes[lane].bits[bit]; |
| 1228 | |
| 1229 | if (div > 0) { |
| 1230 | dsksum.bytes[lane].bits[bit] /= div; |
| 1231 | // clear flags |
| 1232 | dsksum.bytes[lane].bits[bit] &= ~7; |
| 1233 | // set LOCK |
| 1234 | dsksum.bytes[lane].bits[bit] |= 1; |
| 1235 | } else { |
| 1236 | // FIXME? use reset value? |
| 1237 | dsksum.bytes[lane].bits[bit] = |
| 1238 | (64 << 3) | 1; |
| 1239 | } |
| 1240 | } |
| 1241 | } |
| 1242 | |
| 1243 | // TME for FINAL version |
| 1244 | display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3); |
| 1245 | |
| 1246 | // finally, write the averaged DESKEW values |
| 1247 | override_deskew_settings(priv, if_num, &dsksum); |
| 1248 | } |
| 1249 | |
| 1250 | struct deskew_counts { |
| 1251 | int saturated; // number saturated |
| 1252 | int unlocked; // number unlocked |
| 1253 | int nibrng_errs; // nibble range errors |
| 1254 | int nibunl_errs; // nibble unlocked errors |
| 1255 | int bitval_errs; // bit value errors |
| 1256 | }; |
| 1257 | |
| 1258 | #define MIN_BITVAL 17 |
| 1259 | #define MAX_BITVAL 110 |
| 1260 | |
| 1261 | static void validate_deskew_training(struct ddr_priv *priv, int rank_mask, |
| 1262 | int if_num, struct deskew_counts *counts, |
| 1263 | int print_flags) |
| 1264 | { |
| 1265 | int byte_lane, bit_index, nib_num; |
| 1266 | int nibrng_errs, nibunl_errs, bitval_errs; |
| 1267 | union cvmx_lmcx_config lmc_config; |
| 1268 | s16 nib_min[2], nib_max[2], nib_unl[2]; |
| 1269 | int byte_limit; |
| 1270 | int print_enable = print_flags & 1; |
| 1271 | struct deskew_data dskdat; |
| 1272 | s16 flags, deskew; |
| 1273 | const char *fc = " ?-=+*#&"; |
| 1274 | int bit_last; |
| 1275 | |
| 1276 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 1277 | byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena; |
| 1278 | |
| 1279 | memset(counts, 0, sizeof(struct deskew_counts)); |
| 1280 | |
| 1281 | get_deskew_settings(priv, if_num, &dskdat); |
| 1282 | |
| 1283 | if (print_enable) { |
| 1284 | debug("N0.LMC%d: Deskew Settings: Bit => :", |
| 1285 | if_num); |
| 1286 | for (bit_index = 7; bit_index >= 0; --bit_index) |
| 1287 | debug(" %3d ", bit_index); |
| 1288 | debug("\n"); |
| 1289 | } |
| 1290 | |
| 1291 | for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) { |
| 1292 | if (print_enable) |
| 1293 | debug("N0.LMC%d: Bit Deskew Byte %d %s :", |
| 1294 | if_num, byte_lane, |
| 1295 | (print_flags & 2) ? "FINAL" : " "); |
| 1296 | |
| 1297 | nib_min[0] = 127; |
| 1298 | nib_min[1] = 127; |
| 1299 | nib_max[0] = 0; |
| 1300 | nib_max[1] = 0; |
| 1301 | nib_unl[0] = 0; |
| 1302 | nib_unl[1] = 0; |
| 1303 | |
| 1304 | if (lmc_config.s.mode32b == 1 && byte_lane == 4) { |
| 1305 | bit_last = 3; |
| 1306 | if (print_enable) |
| 1307 | debug(" "); |
| 1308 | } else { |
| 1309 | bit_last = 7; |
| 1310 | } |
| 1311 | |
| 1312 | for (bit_index = bit_last; bit_index >= 0; --bit_index) { |
| 1313 | nib_num = (bit_index > 3) ? 1 : 0; |
| 1314 | |
| 1315 | flags = dskdat.bytes[byte_lane].bits[bit_index] & 7; |
| 1316 | deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3; |
| 1317 | |
| 1318 | counts->saturated += !!(flags & 6); |
| 1319 | |
| 1320 | // Do range calc even when locked; it could happen |
| 1321 | // that a bit is still unlocked after final retry, |
| 1322 | // and we want to have an external retry if a RANGE |
| 1323 | // error is present at exit... |
| 1324 | nib_min[nib_num] = min(nib_min[nib_num], deskew); |
| 1325 | nib_max[nib_num] = max(nib_max[nib_num], deskew); |
| 1326 | |
| 1327 | if (!(flags & 1)) { // only when not locked |
| 1328 | counts->unlocked += 1; |
| 1329 | nib_unl[nib_num] += 1; |
| 1330 | } |
| 1331 | |
| 1332 | if (print_enable) |
| 1333 | debug(" %3d %c", deskew, fc[flags ^ 1]); |
| 1334 | } |
| 1335 | |
| 1336 | /* |
| 1337 | * Now look for nibble errors |
| 1338 | * |
| 1339 | * For bit 55, it looks like a bit deskew problem. When the |
| 1340 | * upper nibble of byte 6 needs to go to saturation, bit 7 |
| 1341 | * of byte 6 locks prematurely at 64. For DIMMs with raw |
| 1342 | * card A and B, can we reset the deskew training when we |
| 1343 | * encounter this case? The reset criteria should be looking |
| 1344 | * at one nibble at a time for raw card A and B; if the |
| 1345 | * bit-deskew setting within a nibble is different by > 33, |
| 1346 | * we'll issue a reset to the bit deskew training. |
| 1347 | * |
| 1348 | * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64 |
| 1349 | */ |
| 1350 | // upper nibble range, then lower nibble range |
| 1351 | nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0; |
| 1352 | nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0; |
| 1353 | |
| 1354 | // check for nibble all unlocked |
| 1355 | nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0; |
| 1356 | |
| 1357 | // check for bit value errors, ie < 17 or > 110 |
| 1358 | // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL |
| 1359 | bitval_errs = ((nib_max[1] > MAX_BITVAL) || |
| 1360 | (nib_max[0] > MAX_BITVAL)) ? 1 : 0; |
| 1361 | bitval_errs |= ((nib_min[1] < MIN_BITVAL) || |
| 1362 | (nib_min[0] < MIN_BITVAL)) ? 1 : 0; |
| 1363 | |
| 1364 | if ((nibrng_errs != 0 || nibunl_errs != 0 || |
| 1365 | bitval_errs != 0) && print_enable) { |
| 1366 | debug(" %c%c%c", |
| 1367 | (nibrng_errs) ? 'R' : ' ', |
| 1368 | (nibunl_errs) ? 'U' : ' ', |
| 1369 | (bitval_errs) ? 'V' : ' '); |
| 1370 | } |
| 1371 | |
| 1372 | if (print_enable) |
| 1373 | debug("\n"); |
| 1374 | |
| 1375 | counts->nibrng_errs |= (nibrng_errs << byte_lane); |
| 1376 | counts->nibunl_errs |= (nibunl_errs << byte_lane); |
| 1377 | counts->bitval_errs |= (bitval_errs << byte_lane); |
| 1378 | } |
| 1379 | } |
| 1380 | |
| 1381 | static unsigned short load_dac_override(struct ddr_priv *priv, int if_num, |
| 1382 | int dac_value, int byte) |
| 1383 | { |
| 1384 | union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; |
| 1385 | // single bytelanes incr by 1; A is for ALL |
| 1386 | int bytex = (byte == 0x0A) ? byte : byte + 1; |
| 1387 | |
| 1388 | ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); |
| 1389 | |
| 1390 | SET_DDR_DLL_CTL3(byte_sel, bytex); |
| 1391 | SET_DDR_DLL_CTL3(offset, dac_value >> 1); |
| 1392 | |
| 1393 | ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */ |
| 1394 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 1395 | |
| 1396 | ddr_dll_ctl3.cn73xx.bit_select = 0xC; /* vref bypass setting load */ |
| 1397 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 1398 | |
| 1399 | ddr_dll_ctl3.cn73xx.bit_select = 0xD; /* vref bypass on. */ |
| 1400 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 1401 | |
| 1402 | ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */ |
| 1403 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 1404 | |
| 1405 | lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); // flush writes |
| 1406 | |
| 1407 | return (unsigned short)GET_DDR_DLL_CTL3(offset); |
| 1408 | } |
| 1409 | |
| 1410 | // arg dac_or_dbi is 1 for DAC, 0 for DBI |
| 1411 | // returns 9 entries (bytelanes 0 through 8) in settings[] |
| 1412 | // returns 0 if OK, -1 if a problem |
| 1413 | static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num, |
| 1414 | int dac_or_dbi, int *settings) |
| 1415 | { |
| 1416 | union cvmx_lmcx_phy_ctl phy_ctl; |
| 1417 | int byte_lane, bit_num; |
| 1418 | int deskew; |
| 1419 | int dac_value; |
| 1420 | int new_deskew_layout = 0; |
| 1421 | |
| 1422 | new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) || |
| 1423 | octeon_is_cpuid(OCTEON_CNF75XX); |
| 1424 | new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) && |
| 1425 | !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)); |
| 1426 | |
| 1427 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1428 | phy_ctl.s.dsk_dbg_clk_scaler = 3; |
| 1429 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1430 | |
| 1431 | bit_num = (dac_or_dbi) ? 4 : 5; |
| 1432 | // DBI not available |
| 1433 | if (bit_num == 5 && !new_deskew_layout) |
| 1434 | return -1; |
| 1435 | |
| 1436 | // FIXME: always assume ECC is available |
| 1437 | for (byte_lane = 8; byte_lane >= 0; --byte_lane) { |
| 1438 | //set byte lane and bit to read |
| 1439 | phy_ctl.s.dsk_dbg_bit_sel = bit_num; |
| 1440 | phy_ctl.s.dsk_dbg_byte_sel = byte_lane; |
| 1441 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1442 | |
| 1443 | //start read sequence |
| 1444 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1445 | phy_ctl.s.dsk_dbg_rd_start = 1; |
| 1446 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1447 | |
| 1448 | //poll for read sequence to complete |
| 1449 | do { |
| 1450 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1451 | } while (phy_ctl.s.dsk_dbg_rd_complete != 1); |
| 1452 | |
| 1453 | // keep the flag bits where they are for DBI |
| 1454 | deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */ |
| 1455 | dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff; |
| 1456 | |
| 1457 | settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew; |
| 1458 | } |
| 1459 | |
| 1460 | return 0; |
| 1461 | } |
| 1462 | |
| 1463 | // print out the DBI settings array |
| 1464 | // arg dac_or_dbi is 1 for DAC, 0 for DBI |
| 1465 | static void display_dac_dbi_settings(int lmc, int dac_or_dbi, |
| 1466 | int ecc_ena, int *settings, char *title) |
| 1467 | { |
| 1468 | int byte; |
| 1469 | int flags; |
| 1470 | int deskew; |
| 1471 | const char *fc = " ?-=+*#&"; |
| 1472 | |
| 1473 | debug("N0.LMC%d: %s %s Settings %d:0 :", |
| 1474 | lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena); |
| 1475 | // FIXME: what about 32-bit mode? |
| 1476 | for (byte = (7 + ecc_ena); byte >= 0; --byte) { |
| 1477 | if (dac_or_dbi) { // DAC |
| 1478 | flags = 1; // say its locked to get blank |
| 1479 | deskew = settings[byte] & 0xff; |
| 1480 | } else { // DBI |
| 1481 | flags = settings[byte] & 7; |
| 1482 | deskew = (settings[byte] >> 3) & 0x7f; |
| 1483 | } |
| 1484 | debug(" %3d %c", deskew, fc[flags ^ 1]); |
| 1485 | } |
| 1486 | debug("\n"); |
| 1487 | } |
| 1488 | |
| 1489 | // Find a HWL majority |
| 1490 | static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc, |
| 1491 | int *xc, int *cc) |
| 1492 | { |
| 1493 | int ix, ic; |
| 1494 | |
| 1495 | *mx = -1; |
| 1496 | *mc = 0; |
| 1497 | *xc = 0; |
| 1498 | *cc = 0; |
| 1499 | |
| 1500 | for (ix = 0; ix < 4; ix++) { |
| 1501 | ic = bc->bitcnt[ix]; |
| 1502 | |
| 1503 | // make a bitmask of the ones with a count |
| 1504 | if (ic > 0) { |
| 1505 | *mc |= (1 << ix); |
| 1506 | *cc += 1; // count how many had non-zero counts |
| 1507 | } |
| 1508 | |
| 1509 | // find the majority |
| 1510 | if (ic > *xc) { // new max? |
| 1511 | *xc = ic; // yes |
| 1512 | *mx = ix; // set its index |
| 1513 | } |
| 1514 | } |
| 1515 | |
| 1516 | return (*mx << 1); |
| 1517 | } |
| 1518 | |
| 1519 | // Evaluate the DAC settings array |
| 1520 | static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings) |
| 1521 | { |
| 1522 | int byte, lane, dac, comp; |
| 1523 | int last = (if_64b) ? 7 : 3; |
| 1524 | |
| 1525 | // FIXME: change the check...??? |
| 1526 | // this looks only for sets of DAC values whose max/min differ by a lot |
| 1527 | // let any EVEN go so long as it is within range... |
| 1528 | for (byte = (last + ecc_ena); byte >= 0; --byte) { |
| 1529 | dac = settings[byte] & 0xff; |
| 1530 | |
| 1531 | for (lane = (last + ecc_ena); lane >= 0; --lane) { |
| 1532 | comp = settings[lane] & 0xff; |
| 1533 | if (abs((dac - comp)) > 25) |
| 1534 | return 1; |
| 1535 | } |
| 1536 | } |
| 1537 | |
| 1538 | return 0; |
| 1539 | } |
| 1540 | |
| 1541 | static void perform_offset_training(struct ddr_priv *priv, int rank_mask, |
| 1542 | int if_num) |
| 1543 | { |
| 1544 | union cvmx_lmcx_phy_ctl lmc_phy_ctl; |
| 1545 | u64 orig_phy_ctl; |
| 1546 | const char *s; |
| 1547 | |
| 1548 | /* |
| 1549 | * 4.8.6 LMC Offset Training |
| 1550 | * |
| 1551 | * LMC requires input-receiver offset training. |
| 1552 | * |
| 1553 | * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1 |
| 1554 | */ |
| 1555 | lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1556 | orig_phy_ctl = lmc_phy_ctl.u64; |
| 1557 | lmc_phy_ctl.s.dac_on = 1; |
| 1558 | |
| 1559 | // allow full CSR override |
| 1560 | s = lookup_env_ull(priv, "ddr_phy_ctl"); |
| 1561 | if (s) |
| 1562 | lmc_phy_ctl.u64 = strtoull(s, NULL, 0); |
| 1563 | |
| 1564 | // do not print or write if CSR does not change... |
| 1565 | if (lmc_phy_ctl.u64 != orig_phy_ctl) { |
| 1566 | debug("PHY_CTL : 0x%016llx\n", |
| 1567 | lmc_phy_ctl.u64); |
| 1568 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64); |
| 1569 | } |
| 1570 | |
| 1571 | /* |
| 1572 | * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and |
| 1573 | * LMC(0)_SEQ_CTL[INIT_START] = 1. |
| 1574 | * |
| 1575 | * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. |
| 1576 | */ |
| 1577 | /* Start Offset training sequence */ |
| 1578 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B); |
| 1579 | } |
| 1580 | |
| 1581 | static void perform_internal_vref_training(struct ddr_priv *priv, |
| 1582 | int rank_mask, int if_num) |
| 1583 | { |
| 1584 | union cvmx_lmcx_ext_config ext_config; |
| 1585 | union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; |
| 1586 | |
| 1587 | // First, make sure all byte-lanes are out of VREF bypass mode |
| 1588 | ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); |
| 1589 | |
| 1590 | ddr_dll_ctl3.cn78xx.byte_sel = 0x0A; /* all byte-lanes */ |
| 1591 | ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */ |
| 1592 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 1593 | |
| 1594 | ddr_dll_ctl3.cn78xx.bit_select = 0x0E; /* vref bypass off. */ |
| 1595 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 1596 | |
| 1597 | ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */ |
| 1598 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 1599 | |
| 1600 | /* |
| 1601 | * 4.8.7 LMC Internal vref Training |
| 1602 | * |
| 1603 | * LMC requires input-reference-voltage training. |
| 1604 | * |
| 1605 | * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0. |
| 1606 | */ |
| 1607 | ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); |
| 1608 | ext_config.s.vrefint_seq_deskew = 0; |
| 1609 | |
| 1610 | ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n", |
| 1611 | ext_config.s.vrefint_seq_deskew); |
| 1612 | |
| 1613 | lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64); |
| 1614 | |
| 1615 | /* |
| 1616 | * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and |
| 1617 | * LMC(0)_SEQ_CTL[INIT_START] = 1. |
| 1618 | * |
| 1619 | * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. |
| 1620 | */ |
| 1621 | /* Start LMC Internal vref Training */ |
| 1622 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A); |
| 1623 | } |
| 1624 | |
| 1625 | #define dbg_avg(format, ...) // debug(format, ##__VA_ARGS__) |
| 1626 | |
| 1627 | static int process_samples_average(s16 *bytes, int num_samples, |
| 1628 | int lmc, int lane_no) |
| 1629 | { |
| 1630 | int i, sadj, sum = 0, ret, asum, trunc; |
| 1631 | s16 smin = 32767, smax = -32768; |
| 1632 | int nmin, nmax; |
| 1633 | //int rng; |
| 1634 | |
| 1635 | dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no); |
| 1636 | |
| 1637 | for (i = 0; i < num_samples; i++) { |
| 1638 | sum += bytes[i]; |
| 1639 | if (bytes[i] < smin) |
| 1640 | smin = bytes[i]; |
| 1641 | if (bytes[i] > smax) |
| 1642 | smax = bytes[i]; |
| 1643 | dbg_avg(" %3d", bytes[i]); |
| 1644 | } |
| 1645 | |
| 1646 | nmin = 0; |
| 1647 | nmax = 0; |
| 1648 | for (i = 0; i < num_samples; i++) { |
| 1649 | if (bytes[i] == smin) |
| 1650 | nmin += 1; |
| 1651 | if (bytes[i] == smax) |
| 1652 | nmax += 1; |
| 1653 | } |
| 1654 | dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)", |
| 1655 | smin, nmin, smax, nmax, rng, num_samples); |
| 1656 | |
| 1657 | asum = sum - smin - smax; |
| 1658 | |
| 1659 | sadj = divide_nint(asum * 10, (num_samples - 2)); |
| 1660 | |
| 1661 | trunc = asum / (num_samples - 2); |
| 1662 | |
| 1663 | dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc); |
| 1664 | |
| 1665 | sadj = divide_nint(sadj, 10); |
| 1666 | if (trunc & 1) |
| 1667 | ret = trunc; |
| 1668 | else if (sadj & 1) |
| 1669 | ret = sadj; |
| 1670 | else |
| 1671 | ret = trunc + 1; |
| 1672 | |
| 1673 | dbg_avg(" -> %3d\n", ret); |
| 1674 | |
| 1675 | return ret; |
| 1676 | } |
| 1677 | |
| 1678 | #define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries |
| 1679 | |
| 1680 | #define default_lock_retry_limit 20 // 20 retries |
| 1681 | #define deskew_validation_delay 10000 // 10 millisecs |
| 1682 | |
| 1683 | static int perform_deskew_training(struct ddr_priv *priv, int rank_mask, |
| 1684 | int if_num, int spd_rawcard_aorb) |
| 1685 | { |
| 1686 | int unsaturated, locked; |
| 1687 | int sat_retries, sat_retries_limit; |
| 1688 | int lock_retries, lock_retries_total, lock_retries_limit; |
| 1689 | int print_first; |
| 1690 | int print_them_all; |
| 1691 | struct deskew_counts dsk_counts; |
| 1692 | union cvmx_lmcx_phy_ctl phy_ctl; |
| 1693 | char *s; |
| 1694 | int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) || |
| 1695 | octeon_is_cpuid(OCTEON_CNF75XX); |
| 1696 | int disable_bitval_retries = 1; // default to disabled |
| 1697 | |
| 1698 | debug("N0.LMC%d: Performing Deskew Training.\n", if_num); |
| 1699 | |
| 1700 | sat_retries = 0; |
| 1701 | sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT; |
| 1702 | |
| 1703 | lock_retries_total = 0; |
| 1704 | unsaturated = 0; |
| 1705 | print_first = 1; // print the first one |
| 1706 | // set to true for printing all normal deskew attempts |
| 1707 | print_them_all = 0; |
| 1708 | |
| 1709 | // provide override for bitval_errs causing internal VREF retries |
| 1710 | s = env_get("ddr_disable_bitval_retries"); |
| 1711 | if (s) |
| 1712 | disable_bitval_retries = !!simple_strtoul(s, NULL, 0); |
| 1713 | |
| 1714 | lock_retries_limit = default_lock_retry_limit; |
| 1715 | if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) || |
| 1716 | (octeon_is_cpuid(OCTEON_CN73XX)) || |
| 1717 | (octeon_is_cpuid(OCTEON_CNF75XX))) |
| 1718 | lock_retries_limit *= 2; // give new chips twice as many |
| 1719 | |
| 1720 | do { /* while (sat_retries < sat_retry_limit) */ |
| 1721 | /* |
| 1722 | * 4.8.8 LMC Deskew Training |
| 1723 | * |
| 1724 | * LMC requires input-read-data deskew training. |
| 1725 | * |
| 1726 | * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1. |
| 1727 | */ |
| 1728 | |
| 1729 | union cvmx_lmcx_ext_config ext_config; |
| 1730 | |
| 1731 | ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); |
| 1732 | ext_config.s.vrefint_seq_deskew = 1; |
| 1733 | |
| 1734 | ddr_seq_print |
| 1735 | ("Performing LMC sequence: vrefint_seq_deskew = %d\n", |
| 1736 | ext_config.s.vrefint_seq_deskew); |
| 1737 | |
| 1738 | lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64); |
| 1739 | |
| 1740 | /* |
| 1741 | * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and |
| 1742 | * LMC(0)_SEQ_CTL[INIT_START] = 1. |
| 1743 | * |
| 1744 | * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1. |
| 1745 | */ |
| 1746 | |
| 1747 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1748 | phy_ctl.s.phy_dsk_reset = 1; /* RESET Deskew sequence */ |
| 1749 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1750 | |
| 1751 | /* LMC Deskew Training */ |
| 1752 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A); |
| 1753 | |
| 1754 | lock_retries = 0; |
| 1755 | |
| 1756 | perform_deskew_training: |
| 1757 | |
| 1758 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 1759 | phy_ctl.s.phy_dsk_reset = 0; /* Normal Deskew sequence */ |
| 1760 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 1761 | |
| 1762 | /* LMC Deskew Training */ |
| 1763 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A); |
| 1764 | |
| 1765 | // Moved this from validate_deskew_training |
| 1766 | /* Allow deskew results to stabilize before evaluating them. */ |
| 1767 | udelay(deskew_validation_delay); |
| 1768 | |
| 1769 | // Now go look at lock and saturation status... |
| 1770 | validate_deskew_training(priv, rank_mask, if_num, &dsk_counts, |
| 1771 | print_first); |
| 1772 | // after printing the first and not doing them all, no more |
| 1773 | if (print_first && !print_them_all) |
| 1774 | print_first = 0; |
| 1775 | |
| 1776 | unsaturated = (dsk_counts.saturated == 0); |
| 1777 | locked = (dsk_counts.unlocked == 0); |
| 1778 | |
| 1779 | // only do locking retries if unsaturated or rawcard A or B, |
| 1780 | // otherwise full SAT retry |
| 1781 | if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) { |
| 1782 | if (!locked) { // and not locked |
| 1783 | lock_retries++; |
| 1784 | lock_retries_total++; |
| 1785 | if (lock_retries <= lock_retries_limit) { |
| 1786 | goto perform_deskew_training; |
| 1787 | } else { |
| 1788 | debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n", |
| 1789 | if_num, lock_retries_limit); |
| 1790 | } |
| 1791 | } else { |
| 1792 | // only print if we did try |
| 1793 | if (lock_retries_total > 0) |
| 1794 | debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n", |
| 1795 | if_num, lock_retries); |
| 1796 | } |
| 1797 | } /* if (unsaturated || spd_rawcard_aorb) */ |
| 1798 | |
| 1799 | ++sat_retries; |
| 1800 | |
| 1801 | /* |
| 1802 | * At this point, check for a DDR4 RDIMM that will not |
| 1803 | * benefit from SAT retries; if so, exit |
| 1804 | */ |
| 1805 | if (spd_rawcard_aorb && !has_no_sat) { |
| 1806 | debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n", |
| 1807 | if_num); |
| 1808 | break; // no sat or lock retries |
| 1809 | } |
| 1810 | |
| 1811 | } while (!unsaturated && (sat_retries < sat_retries_limit)); |
| 1812 | |
| 1813 | debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n", |
| 1814 | if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ? |
| 1815 | "Timed Out" : "Completed", sat_retries - 1, lock_retries_total); |
| 1816 | |
| 1817 | // FIXME? add saturation to reasons for fault return - give it a |
| 1818 | // chance via Internal VREF |
| 1819 | // FIXME? add OPTIONAL bit value to reasons for fault return - |
| 1820 | // give it a chance via Internal VREF |
| 1821 | if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 || |
| 1822 | (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) || |
| 1823 | !unsaturated) { |
| 1824 | debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n", |
| 1825 | if_num); |
| 1826 | // FIXME: do we want this output always for errors? |
| 1827 | validate_deskew_training(priv, rank_mask, if_num, |
| 1828 | &dsk_counts, 1); |
| 1829 | return -1; // we did retry locally, they did not help |
| 1830 | } |
| 1831 | |
| 1832 | // NOTE: we (currently) always print one last training validation |
| 1833 | // before starting Read Leveling... |
| 1834 | |
| 1835 | return 0; |
| 1836 | } |
| 1837 | |
| 1838 | #define SCALING_FACTOR (1000) |
| 1839 | |
| 1840 | // NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config |
| 1841 | static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl, |
| 1842 | int rank_count, int dram_connection) |
| 1843 | { |
| 1844 | u64 reff_s; |
| 1845 | u64 rser_s = (dram_connection) ? 0 : 15; |
| 1846 | u64 vdd = 1200; |
| 1847 | u64 vref; |
| 1848 | // 99 == HiZ |
| 1849 | u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ? |
| 1850 | 1 * 1024 * 1024 : rtt_wr); |
| 1851 | u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) && |
| 1852 | (rtt_wr != 0))) ? |
| 1853 | 1 * 1024 * 1024 : rtt_park); |
| 1854 | u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl); |
| 1855 | int vref_value; |
| 1856 | u64 rangepc = 6000; // range1 base |
| 1857 | u64 vrefpc; |
| 1858 | int vref_range = 0; |
| 1859 | |
| 1860 | reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s)); |
| 1861 | |
| 1862 | vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) / |
| 1863 | (rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR; |
| 1864 | |
| 1865 | vref = (vref * vdd) / 2 / SCALING_FACTOR; |
| 1866 | |
| 1867 | vrefpc = (vref * 100 * 100) / vdd; |
| 1868 | |
| 1869 | if (vrefpc < rangepc) { // < range1 base, use range2 |
| 1870 | vref_range = 1 << 6; // set bit A6 for range2 |
| 1871 | rangepc = 4500; // range2 base is 45% |
| 1872 | } |
| 1873 | |
| 1874 | vref_value = divide_nint(vrefpc - rangepc, 65); |
| 1875 | if (vref_value < 0) |
| 1876 | vref_value = vref_range; // set to base of range |
| 1877 | else |
| 1878 | vref_value |= vref_range; |
| 1879 | |
| 1880 | debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n", |
| 1881 | rtt_wr, rtt_park, dqx_ctl, rank_count); |
| 1882 | debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n", |
| 1883 | rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range, |
| 1884 | vref_range ? 2 : 1); |
| 1885 | |
| 1886 | return vref_value; |
| 1887 | } |
| 1888 | |
| 1889 | // NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs |
| 1890 | static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00, |
| 1891 | int rtt_park_01, |
| 1892 | int dqx_ctl, int rtt_nom, |
| 1893 | int dram_connection) |
| 1894 | { |
| 1895 | u64 rser = (dram_connection) ? 0 : 15; |
| 1896 | u64 vdd = 1200; |
| 1897 | u64 vl, vlp, vcm; |
| 1898 | u64 rd0, rd1, rpullup; |
| 1899 | // 99 == HiZ |
| 1900 | u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ? |
| 1901 | 1 * 1024 * 1024 : rtt_wr); |
| 1902 | u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00); |
| 1903 | u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01); |
| 1904 | u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl); |
| 1905 | u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom); |
| 1906 | int vref_value; |
| 1907 | u64 rangepc = 6000; // range1 base |
| 1908 | u64 vrefpc; |
| 1909 | int vref_range = 0; |
| 1910 | |
| 1911 | // rd0 = (RTT_NOM (parallel) RTT_WR) + = |
| 1912 | // ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER |
| 1913 | rd0 = divide_nint((rtt_nom_s * rtt_wr_s), |
| 1914 | (rtt_nom_s + rtt_wr_s)) + rser; |
| 1915 | |
| 1916 | // rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER = |
| 1917 | // ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER |
| 1918 | rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s), |
| 1919 | (rtt_park_00_s + rtt_park_01_s)) + rser; |
| 1920 | |
| 1921 | // rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1) |
| 1922 | rpullup = divide_nint((rd0 * rd1), (rd0 + rd1)); |
| 1923 | |
| 1924 | // vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2 |
| 1925 | vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup)); |
| 1926 | |
| 1927 | // vlp = ((RSER / rd0) * (1.2 - vl)) + vl |
| 1928 | vlp = divide_nint((rser * (vdd - vl)), rd0) + vl; |
| 1929 | |
| 1930 | // vcm = (vlp + 1.2) / 2 |
| 1931 | vcm = divide_nint((vlp + vdd), 2); |
| 1932 | |
| 1933 | // vrefpc = (vcm / 1.2) * 100 |
| 1934 | vrefpc = divide_nint((vcm * 100 * 100), vdd); |
| 1935 | |
| 1936 | if (vrefpc < rangepc) { // < range1 base, use range2 |
| 1937 | vref_range = 1 << 6; // set bit A6 for range2 |
| 1938 | rangepc = 4500; // range2 base is 45% |
| 1939 | } |
| 1940 | |
| 1941 | vref_value = divide_nint(vrefpc - rangepc, 65); |
| 1942 | if (vref_value < 0) |
| 1943 | vref_value = vref_range; // set to base of range |
| 1944 | else |
| 1945 | vref_value |= vref_range; |
| 1946 | |
| 1947 | debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n", |
| 1948 | rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value, |
| 1949 | vref_value); |
| 1950 | |
| 1951 | return vref_value; |
| 1952 | } |
| 1953 | |
| 1954 | // NOTE: only call this for DIMMs with 1 or 2 ranks, not 4. |
| 1955 | static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx, |
| 1956 | int dimm_count, int rank_count, |
| 1957 | struct impedence_values *imp_values, |
| 1958 | int is_stacked_die, int dram_connection) |
| 1959 | { |
| 1960 | int computed_final_vref_value = 0; |
| 1961 | int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT; |
| 1962 | const char *s; |
| 1963 | int rtt_wr, dqx_ctl, rtt_nom, index; |
| 1964 | union cvmx_lmcx_modereg_params1 lmc_modereg_params1; |
| 1965 | union cvmx_lmcx_modereg_params2 lmc_modereg_params2; |
| 1966 | union cvmx_lmcx_comp_ctl2 comp_ctl2; |
| 1967 | int rtt_park; |
| 1968 | int rtt_park_00; |
| 1969 | int rtt_park_01; |
| 1970 | |
| 1971 | debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n", |
| 1972 | if_num, rankx, __func__, dram_connection); |
| 1973 | |
| 1974 | // allow some overrides... |
| 1975 | s = env_get("ddr_adjust_computed_vref"); |
| 1976 | if (s) { |
| 1977 | enable_adjust = !!simple_strtoul(s, NULL, 0); |
| 1978 | if (!enable_adjust) { |
| 1979 | debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n", |
| 1980 | if_num, rankx); |
| 1981 | } |
| 1982 | } |
| 1983 | |
| 1984 | s = env_get("ddr_set_computed_vref"); |
| 1985 | if (s) { |
| 1986 | int new_vref = simple_strtoul(s, NULL, 0); |
| 1987 | |
| 1988 | debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n", |
| 1989 | if_num, rankx, new_vref, new_vref); |
| 1990 | return new_vref; |
| 1991 | } |
| 1992 | |
| 1993 | /* |
| 1994 | * Calculate an alternative to the measured vref value |
| 1995 | * but only for configurations we know how to... |
| 1996 | */ |
| 1997 | // We have code for 2-rank DIMMs in both 1-slot or 2-slot configs, |
| 1998 | // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot |
| 1999 | // configs, and can use the 2-rank 2-slot code for 1-rank DIMMs |
| 2000 | // in 2-slot configs. |
| 2001 | |
| 2002 | lmc_modereg_params1.u64 = |
| 2003 | lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num)); |
| 2004 | lmc_modereg_params2.u64 = |
| 2005 | lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num)); |
| 2006 | comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 2007 | dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl]; |
| 2008 | |
| 2009 | // WR always comes from the current rank |
| 2010 | index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03; |
| 2011 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) |
| 2012 | index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04; |
| 2013 | rtt_wr = imp_values->rtt_wr_ohms[index]; |
| 2014 | |
| 2015 | // separate calculations for 1 vs 2 DIMMs per LMC |
| 2016 | if (dimm_count == 1) { |
| 2017 | // PARK comes from this rank if 1-rank, otherwise other rank |
| 2018 | index = |
| 2019 | (lmc_modereg_params2.u64 >> |
| 2020 | ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07; |
| 2021 | rtt_park = imp_values->rtt_nom_ohms[index]; |
| 2022 | computed_final_vref_value = |
| 2023 | compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl, |
| 2024 | rank_count, dram_connection); |
| 2025 | } else { |
| 2026 | // get both PARK values from the other DIMM |
| 2027 | index = |
| 2028 | (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) & |
| 2029 | 0x07; |
| 2030 | rtt_park_00 = imp_values->rtt_nom_ohms[index]; |
| 2031 | index = |
| 2032 | (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) & |
| 2033 | 0x07; |
| 2034 | rtt_park_01 = imp_values->rtt_nom_ohms[index]; |
| 2035 | // NOM comes from this rank if 1-rank, otherwise other rank |
| 2036 | index = |
| 2037 | (lmc_modereg_params1.u64 >> |
| 2038 | ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07; |
| 2039 | rtt_nom = imp_values->rtt_nom_ohms[index]; |
| 2040 | computed_final_vref_value = |
| 2041 | compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01, |
| 2042 | dqx_ctl, rtt_nom, dram_connection); |
| 2043 | } |
| 2044 | |
| 2045 | if (enable_adjust) { |
| 2046 | union cvmx_lmcx_config lmc_config; |
| 2047 | union cvmx_lmcx_control lmc_control; |
| 2048 | |
| 2049 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 2050 | lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 2051 | |
| 2052 | /* |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 2053 | * New computed vref = existing computed vref - X |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 2054 | * |
| 2055 | * The value of X is depending on different conditions. |
| 2056 | * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked |
| 2057 | * die 2Rx4, so I conclude the results into two conditions: |
| 2058 | * |
| 2059 | * 1. Stacked Die: 2Rx4 |
| 2060 | * 1-slot: offset = 7. i, e New computed vref = existing |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 2061 | * computed vref - 7 |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 2062 | * 2-slot: offset = 6 |
| 2063 | * |
| 2064 | * 2. Regular: 2Rx4 |
| 2065 | * 1-slot: offset = 3 |
| 2066 | * 2-slot: offset = 2 |
| 2067 | */ |
| 2068 | // we know we never get called unless DDR4, so test just |
| 2069 | // the other conditions |
| 2070 | if (lmc_control.s.rdimm_ena == 1 && |
| 2071 | rank_count == 2 && lmc_config.s.mode_x4dev) { |
| 2072 | // it must first be RDIMM and 2-rank and x4 |
| 2073 | int adj; |
| 2074 | |
| 2075 | // now do according to stacked die or not... |
| 2076 | if (is_stacked_die) |
| 2077 | adj = (dimm_count == 1) ? -7 : -6; |
| 2078 | else |
| 2079 | adj = (dimm_count == 1) ? -3 : -2; |
| 2080 | |
| 2081 | // we must have adjusted it, so print it out if |
| 2082 | // verbosity is right |
| 2083 | debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n", |
| 2084 | if_num, rankx, computed_final_vref_value, |
| 2085 | computed_final_vref_value, |
| 2086 | computed_final_vref_value + adj, |
| 2087 | computed_final_vref_value + adj); |
| 2088 | computed_final_vref_value += adj; |
| 2089 | } |
| 2090 | } |
| 2091 | |
| 2092 | return computed_final_vref_value; |
| 2093 | } |
| 2094 | |
| 2095 | static void unpack_rlevel_settings(int if_bytemask, int ecc_ena, |
| 2096 | struct rlevel_byte_data *rlevel_byte, |
| 2097 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank) |
| 2098 | { |
| 2099 | if ((if_bytemask & 0xff) == 0xff) { |
| 2100 | if (ecc_ena) { |
| 2101 | rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7; |
| 2102 | rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6; |
| 2103 | rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5; |
| 2104 | rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4; |
| 2105 | /* ECC */ |
| 2106 | rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8; |
| 2107 | } else { |
| 2108 | rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; |
| 2109 | rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; |
| 2110 | rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; |
| 2111 | rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; |
| 2112 | } |
| 2113 | } else { |
| 2114 | rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8; /* unused */ |
| 2115 | rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; /* unused */ |
| 2116 | rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; /* unused */ |
| 2117 | rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; /* unused */ |
| 2118 | rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; /* ECC */ |
| 2119 | } |
| 2120 | |
| 2121 | rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3; |
| 2122 | rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2; |
| 2123 | rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1; |
| 2124 | rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0; |
| 2125 | } |
| 2126 | |
| 2127 | static void pack_rlevel_settings(int if_bytemask, int ecc_ena, |
| 2128 | struct rlevel_byte_data *rlevel_byte, |
| 2129 | union cvmx_lmcx_rlevel_rankx |
| 2130 | *final_rlevel_rank) |
| 2131 | { |
| 2132 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank; |
| 2133 | |
| 2134 | if ((if_bytemask & 0xff) == 0xff) { |
| 2135 | if (ecc_ena) { |
| 2136 | lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay; |
| 2137 | lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay; |
| 2138 | lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay; |
| 2139 | lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay; |
| 2140 | /* ECC */ |
| 2141 | lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay; |
| 2142 | } else { |
| 2143 | lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay; |
| 2144 | lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay; |
| 2145 | lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay; |
| 2146 | lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay; |
| 2147 | } |
| 2148 | } else { |
| 2149 | lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay; |
| 2150 | lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay; |
| 2151 | lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay; |
| 2152 | lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay; |
| 2153 | lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay; |
| 2154 | } |
| 2155 | |
| 2156 | lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay; |
| 2157 | lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay; |
| 2158 | lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay; |
| 2159 | lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay; |
| 2160 | |
| 2161 | *final_rlevel_rank = lmc_rlevel_rank; |
| 2162 | } |
| 2163 | |
| 2164 | /////////////////// These are the RLEVEL settings display routines |
| 2165 | |
| 2166 | // flags |
| 2167 | #define WITH_NOTHING 0 |
| 2168 | #define WITH_SCORE 1 |
| 2169 | #define WITH_AVERAGE 2 |
| 2170 | #define WITH_FINAL 4 |
| 2171 | #define WITH_COMPUTE 8 |
| 2172 | |
| 2173 | static void do_display_rl(int if_num, |
| 2174 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, |
| 2175 | int rank, int flags, int score) |
| 2176 | { |
| 2177 | char score_buf[16]; |
| 2178 | char *msg_buf; |
| 2179 | char hex_buf[20]; |
| 2180 | |
| 2181 | if (flags & WITH_SCORE) { |
| 2182 | snprintf(score_buf, sizeof(score_buf), "(%d)", score); |
| 2183 | } else { |
| 2184 | score_buf[0] = ' '; |
| 2185 | score_buf[1] = 0; |
| 2186 | } |
| 2187 | |
| 2188 | if (flags & WITH_AVERAGE) { |
| 2189 | msg_buf = " DELAY AVERAGES "; |
| 2190 | } else if (flags & WITH_FINAL) { |
| 2191 | msg_buf = " FINAL SETTINGS "; |
| 2192 | } else if (flags & WITH_COMPUTE) { |
| 2193 | msg_buf = " COMPUTED DELAYS "; |
| 2194 | } else { |
| 2195 | snprintf(hex_buf, sizeof(hex_buf), "0x%016llX", |
| 2196 | (unsigned long long)lmc_rlevel_rank.u64); |
| 2197 | msg_buf = hex_buf; |
| 2198 | } |
| 2199 | |
| 2200 | debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n", |
| 2201 | if_num, rank, lmc_rlevel_rank.s.status, msg_buf, |
| 2202 | lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7, |
| 2203 | lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5, |
| 2204 | lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3, |
| 2205 | lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1, |
| 2206 | lmc_rlevel_rank.s.byte0, score_buf); |
| 2207 | } |
| 2208 | |
| 2209 | static void display_rl(int if_num, |
| 2210 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank) |
| 2211 | { |
| 2212 | do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0); |
| 2213 | } |
| 2214 | |
| 2215 | static void display_rl_with_score(int if_num, |
| 2216 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, |
| 2217 | int rank, int score) |
| 2218 | { |
| 2219 | do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score); |
| 2220 | } |
| 2221 | |
| 2222 | static void display_rl_with_final(int if_num, |
| 2223 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, |
| 2224 | int rank) |
| 2225 | { |
| 2226 | do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0); |
| 2227 | } |
| 2228 | |
| 2229 | static void display_rl_with_computed(int if_num, |
| 2230 | union cvmx_lmcx_rlevel_rankx |
| 2231 | lmc_rlevel_rank, int rank, int score) |
| 2232 | { |
| 2233 | do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score); |
| 2234 | } |
| 2235 | |
| 2236 | // flag values |
| 2237 | #define WITH_RODT_BLANK 0 |
| 2238 | #define WITH_RODT_SKIPPING 1 |
| 2239 | #define WITH_RODT_BESTROW 2 |
| 2240 | #define WITH_RODT_BESTSCORE 3 |
| 2241 | // control |
| 2242 | #define SKIP_SKIPPING 1 |
| 2243 | |
| 2244 | static const char *with_rodt_canned_msgs[4] = { |
| 2245 | " ", "SKIPPING ", "BEST ROW ", "BEST SCORE" |
| 2246 | }; |
| 2247 | |
| 2248 | static void display_rl_with_rodt(int if_num, |
| 2249 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, |
| 2250 | int rank, int score, |
| 2251 | int nom_ohms, int rodt_ohms, int flag) |
| 2252 | { |
| 2253 | const char *msg_buf; |
| 2254 | char set_buf[20]; |
| 2255 | |
| 2256 | #if SKIP_SKIPPING |
| 2257 | if (flag == WITH_RODT_SKIPPING) |
| 2258 | return; |
| 2259 | #endif |
| 2260 | |
| 2261 | msg_buf = with_rodt_canned_msgs[flag]; |
| 2262 | if (nom_ohms < 0) { |
| 2263 | snprintf(set_buf, sizeof(set_buf), " RODT %3d ", |
| 2264 | rodt_ohms); |
| 2265 | } else { |
| 2266 | snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms, |
| 2267 | rodt_ohms); |
| 2268 | } |
| 2269 | |
| 2270 | debug("N0.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n", |
| 2271 | if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8, |
| 2272 | lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6, |
| 2273 | lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4, |
| 2274 | lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2, |
| 2275 | lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score); |
| 2276 | } |
| 2277 | |
| 2278 | static void do_display_wl(int if_num, |
| 2279 | union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, |
| 2280 | int rank, int flags) |
| 2281 | { |
| 2282 | char *msg_buf; |
| 2283 | char hex_buf[20]; |
| 2284 | |
| 2285 | if (flags & WITH_FINAL) { |
| 2286 | msg_buf = " FINAL SETTINGS "; |
| 2287 | } else { |
| 2288 | snprintf(hex_buf, sizeof(hex_buf), "0x%016llX", |
| 2289 | (unsigned long long)lmc_wlevel_rank.u64); |
| 2290 | msg_buf = hex_buf; |
| 2291 | } |
| 2292 | |
| 2293 | debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", |
| 2294 | if_num, rank, lmc_wlevel_rank.s.status, msg_buf, |
| 2295 | lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7, |
| 2296 | lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5, |
| 2297 | lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3, |
| 2298 | lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1, |
| 2299 | lmc_wlevel_rank.s.byte0); |
| 2300 | } |
| 2301 | |
| 2302 | static void display_wl(int if_num, |
| 2303 | union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank) |
| 2304 | { |
| 2305 | do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING); |
| 2306 | } |
| 2307 | |
| 2308 | static void display_wl_with_final(int if_num, |
| 2309 | union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, |
| 2310 | int rank) |
| 2311 | { |
| 2312 | do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL); |
| 2313 | } |
| 2314 | |
| 2315 | // pretty-print bitmask adjuster |
| 2316 | static u64 ppbm(u64 bm) |
| 2317 | { |
| 2318 | if (bm != 0ul) { |
| 2319 | while ((bm & 0x0fful) == 0ul) |
| 2320 | bm >>= 4; |
| 2321 | } |
| 2322 | |
| 2323 | return bm; |
| 2324 | } |
| 2325 | |
| 2326 | // xlate PACKED index to UNPACKED index to use with rlevel_byte |
| 2327 | #define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4)) |
| 2328 | // xlate UNPACKED index to PACKED index to use with rlevel_bitmask |
| 2329 | #define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i)) |
| 2330 | |
| 2331 | // flag values |
| 2332 | #define WITH_WL_BITMASKS 0 |
| 2333 | #define WITH_RL_BITMASKS 1 |
| 2334 | #define WITH_RL_MASK_SCORES 2 |
| 2335 | #define WITH_RL_SEQ_SCORES 3 |
| 2336 | |
| 2337 | static void do_display_bm(int if_num, int rank, void *bm, |
| 2338 | int flags, int ecc) |
| 2339 | { |
| 2340 | if (flags == WITH_WL_BITMASKS) { |
| 2341 | // wlevel_bitmask array in PACKED index order, so just |
| 2342 | // print them |
| 2343 | int *bitmasks = (int *)bm; |
| 2344 | |
| 2345 | debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n", |
| 2346 | if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6], |
| 2347 | bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2], |
| 2348 | bitmasks[1], bitmasks[0] |
| 2349 | ); |
| 2350 | } else if (flags == WITH_RL_BITMASKS) { |
| 2351 | // rlevel_bitmask array in PACKED index order, so just |
| 2352 | // print them |
| 2353 | struct rlevel_bitmask *rlevel_bitmask = |
| 2354 | (struct rlevel_bitmask *)bm; |
| 2355 | |
| 2356 | debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n", |
| 2357 | if_num, rank, ppbm(rlevel_bitmask[8].bm), |
| 2358 | ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm), |
| 2359 | ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm), |
| 2360 | ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm), |
| 2361 | ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm) |
| 2362 | ); |
| 2363 | } else if (flags == WITH_RL_MASK_SCORES) { |
| 2364 | // rlevel_bitmask array in PACKED index order, so just |
| 2365 | // print them |
| 2366 | struct rlevel_bitmask *rlevel_bitmask = |
| 2367 | (struct rlevel_bitmask *)bm; |
| 2368 | |
| 2369 | debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", |
| 2370 | if_num, rank, rlevel_bitmask[8].errs, |
| 2371 | rlevel_bitmask[7].errs, rlevel_bitmask[6].errs, |
| 2372 | rlevel_bitmask[5].errs, rlevel_bitmask[4].errs, |
| 2373 | rlevel_bitmask[3].errs, rlevel_bitmask[2].errs, |
| 2374 | rlevel_bitmask[1].errs, rlevel_bitmask[0].errs); |
| 2375 | } else if (flags == WITH_RL_SEQ_SCORES) { |
| 2376 | // rlevel_byte array in UNPACKED index order, so xlate |
| 2377 | // and print them |
| 2378 | struct rlevel_byte_data *rlevel_byte = |
| 2379 | (struct rlevel_byte_data *)bm; |
| 2380 | |
| 2381 | debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", |
| 2382 | if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs, |
| 2383 | rlevel_byte[XPU(7, ecc)].sqerrs, |
| 2384 | rlevel_byte[XPU(6, ecc)].sqerrs, |
| 2385 | rlevel_byte[XPU(5, ecc)].sqerrs, |
| 2386 | rlevel_byte[XPU(4, ecc)].sqerrs, |
| 2387 | rlevel_byte[XPU(3, ecc)].sqerrs, |
| 2388 | rlevel_byte[XPU(2, ecc)].sqerrs, |
| 2389 | rlevel_byte[XPU(1, ecc)].sqerrs, |
| 2390 | rlevel_byte[XPU(0, ecc)].sqerrs); |
| 2391 | } |
| 2392 | } |
| 2393 | |
| 2394 | static void display_wl_bm(int if_num, int rank, int *bitmasks) |
| 2395 | { |
| 2396 | do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0); |
| 2397 | } |
| 2398 | |
| 2399 | static void display_rl_bm(int if_num, int rank, |
| 2400 | struct rlevel_bitmask *bitmasks, int ecc_ena) |
| 2401 | { |
| 2402 | do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS, |
| 2403 | ecc_ena); |
| 2404 | } |
| 2405 | |
| 2406 | static void display_rl_bm_scores(int if_num, int rank, |
| 2407 | struct rlevel_bitmask *bitmasks, int ecc_ena) |
| 2408 | { |
| 2409 | do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES, |
| 2410 | ecc_ena); |
| 2411 | } |
| 2412 | |
| 2413 | static void display_rl_seq_scores(int if_num, int rank, |
| 2414 | struct rlevel_byte_data *bytes, int ecc_ena) |
| 2415 | { |
| 2416 | do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena); |
| 2417 | } |
| 2418 | |
| 2419 | #define RODT_OHMS_COUNT 8 |
| 2420 | #define RTT_NOM_OHMS_COUNT 8 |
| 2421 | #define RTT_NOM_TABLE_COUNT 8 |
| 2422 | #define RTT_WR_OHMS_COUNT 8 |
| 2423 | #define DIC_OHMS_COUNT 3 |
| 2424 | #define DRIVE_STRENGTH_COUNT 15 |
| 2425 | |
| 2426 | static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = { |
| 2427 | 0, 40, 60, 80, 120, 240, 34, 48 }; |
| 2428 | static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = { |
| 2429 | 0, 60, 120, 40, 240, 48, 80, 34 }; |
| 2430 | static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = { |
| 2431 | 0, 4, 2, 6, 1, 5, 3, 7 }; |
| 2432 | // setting HiZ ohms to 99 for computed vref |
| 2433 | static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { |
| 2434 | 0, 120, 240, 99, 80 }; |
| 2435 | static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 }; |
| 2436 | static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = { |
| 2437 | 0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 }; |
| 2438 | static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = { |
| 2439 | 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 }; |
| 2440 | struct impedence_values ddr4_impedence_val = { |
| 2441 | .rodt_ohms = ddr4_rodt_ohms, |
| 2442 | .rtt_nom_ohms = ddr4_rtt_nom_ohms, |
| 2443 | .rtt_nom_table = ddr4_rtt_nom_table, |
| 2444 | .rtt_wr_ohms = ddr4_rtt_wr_ohms, |
| 2445 | .dic_ohms = ddr4_dic_ohms, |
| 2446 | .drive_strength = ddr4_drive_strength, |
| 2447 | .dqx_strength = ddr4_dqx_strength, |
| 2448 | }; |
| 2449 | |
| 2450 | static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = { |
| 2451 | 0, 20, 30, 40, 60, 120, 0, 0 }; |
| 2452 | static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = { |
| 2453 | 0, 60, 120, 40, 20, 30, 0, 0 }; |
| 2454 | static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = { |
| 2455 | 0, 2, 1, 3, 5, 4, 0, 0 }; |
| 2456 | static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 }; |
| 2457 | static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 }; |
| 2458 | static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = { |
| 2459 | 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 }; |
| 2460 | static struct impedence_values ddr3_impedence_val = { |
| 2461 | .rodt_ohms = ddr3_rodt_ohms, |
| 2462 | .rtt_nom_ohms = ddr3_rtt_nom_ohms, |
| 2463 | .rtt_nom_table = ddr3_rtt_nom_table, |
| 2464 | .rtt_wr_ohms = ddr3_rtt_wr_ohms, |
| 2465 | .dic_ohms = ddr3_dic_ohms, |
| 2466 | .drive_strength = ddr3_drive_strength, |
| 2467 | .dqx_strength = ddr3_drive_strength, |
| 2468 | }; |
| 2469 | |
| 2470 | static u64 hertz_to_psecs(u64 hertz) |
| 2471 | { |
| 2472 | /* Clock in psecs */ |
| 2473 | return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz); |
| 2474 | } |
| 2475 | |
| 2476 | #define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */ |
| 2477 | |
| 2478 | static u64 psecs_to_mts(u64 psecs) |
| 2479 | { |
| 2480 | return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE), |
| 2481 | psecs), DIVIDEND_SCALE); |
| 2482 | } |
| 2483 | |
| 2484 | #define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m)))) |
| 2485 | |
| 2486 | static unsigned long pretty_psecs_to_mts(u64 psecs) |
| 2487 | { |
| 2488 | u64 ret = 0; // default to error |
| 2489 | |
| 2490 | if (WITHIN(psecs, 2500, 1)) |
| 2491 | ret = 800; |
| 2492 | else if (WITHIN(psecs, 1875, 1)) |
| 2493 | ret = 1066; |
| 2494 | else if (WITHIN(psecs, 1500, 1)) |
| 2495 | ret = 1333; |
| 2496 | else if (WITHIN(psecs, 1250, 1)) |
| 2497 | ret = 1600; |
| 2498 | else if (WITHIN(psecs, 1071, 1)) |
| 2499 | ret = 1866; |
| 2500 | else if (WITHIN(psecs, 937, 1)) |
| 2501 | ret = 2133; |
| 2502 | else if (WITHIN(psecs, 833, 1)) |
| 2503 | ret = 2400; |
| 2504 | else if (WITHIN(psecs, 750, 1)) |
| 2505 | ret = 2666; |
| 2506 | return ret; |
| 2507 | } |
| 2508 | |
| 2509 | static u64 mts_to_hertz(u64 mts) |
| 2510 | { |
| 2511 | return ((mts * 1000 * 1000) / 2); |
| 2512 | } |
| 2513 | |
| 2514 | static int compute_rc3x(int64_t tclk_psecs) |
| 2515 | { |
| 2516 | long speed; |
| 2517 | long tclk_psecs_min, tclk_psecs_max; |
| 2518 | long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max; |
| 2519 | int rc3x; |
| 2520 | |
| 2521 | #define ENCODING_BASE 1240 |
| 2522 | |
| 2523 | data_rate_mhz = psecs_to_mts(tclk_psecs); |
| 2524 | |
| 2525 | /* |
| 2526 | * 2400 MT/s is a special case. Using integer arithmetic it rounds |
| 2527 | * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the |
| 2528 | * proper setting from the table. |
| 2529 | */ |
| 2530 | if (tclk_psecs == 833) |
| 2531 | data_rate_mhz = 2400; |
| 2532 | |
| 2533 | for (speed = ENCODING_BASE; speed < 3200; speed += 20) { |
| 2534 | int error = 0; |
| 2535 | |
| 2536 | /* Clock in psecs */ |
| 2537 | tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00)); |
| 2538 | /* Clock in psecs */ |
| 2539 | tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18)); |
| 2540 | |
| 2541 | data_rate_mhz_min = psecs_to_mts(tclk_psecs_min); |
| 2542 | data_rate_mhz_max = psecs_to_mts(tclk_psecs_max); |
| 2543 | |
| 2544 | /* Force alingment to multiple to avound rounding errors. */ |
| 2545 | data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20; |
| 2546 | data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20; |
| 2547 | |
| 2548 | error += (speed + 00 != data_rate_mhz_min); |
| 2549 | error += (speed + 20 != data_rate_mhz_max); |
| 2550 | |
| 2551 | rc3x = (speed - ENCODING_BASE) / 20; |
| 2552 | |
| 2553 | if (data_rate_mhz <= (speed + 20)) |
| 2554 | break; |
| 2555 | } |
| 2556 | |
| 2557 | return rc3x; |
| 2558 | } |
| 2559 | |
| 2560 | /* |
| 2561 | * static global variables needed, so that functions (loops) can be |
| 2562 | * restructured from the main huge function. Its not elegant, but the |
| 2563 | * only way to break the original functions like init_octeon3_ddr3_interface() |
| 2564 | * into separate logical smaller functions with less indentation levels. |
| 2565 | */ |
| 2566 | static int if_num __section(".data"); |
| 2567 | static u32 if_mask __section(".data"); |
| 2568 | static int ddr_hertz __section(".data"); |
| 2569 | |
| 2570 | static struct ddr_conf *ddr_conf __section(".data"); |
| 2571 | static const struct dimm_odt_config *odt_1rank_config __section(".data"); |
| 2572 | static const struct dimm_odt_config *odt_2rank_config __section(".data"); |
| 2573 | static const struct dimm_odt_config *odt_4rank_config __section(".data"); |
| 2574 | static struct dimm_config *dimm_config_table __section(".data"); |
| 2575 | static const struct dimm_odt_config *odt_config __section(".data"); |
| 2576 | static const struct ddr3_custom_config *c_cfg __section(".data"); |
| 2577 | |
| 2578 | static int odt_idx __section(".data"); |
| 2579 | |
| 2580 | static ulong tclk_psecs __section(".data"); |
| 2581 | static ulong eclk_psecs __section(".data"); |
| 2582 | |
| 2583 | static int row_bits __section(".data"); |
| 2584 | static int col_bits __section(".data"); |
| 2585 | static int num_banks __section(".data"); |
| 2586 | static int num_ranks __section(".data"); |
| 2587 | static int dram_width __section(".data"); |
| 2588 | static int dimm_count __section(".data"); |
| 2589 | /* Accumulate and report all the errors before giving up */ |
| 2590 | static int fatal_error __section(".data"); |
| 2591 | /* Flag that indicates safe DDR settings should be used */ |
| 2592 | static int safe_ddr_flag __section(".data"); |
| 2593 | /* Octeon II Default: 64bit interface width */ |
| 2594 | static int if_64b __section(".data"); |
| 2595 | static int if_bytemask __section(".data"); |
| 2596 | static u32 mem_size_mbytes __section(".data"); |
| 2597 | static unsigned int didx __section(".data"); |
| 2598 | static int bank_bits __section(".data"); |
| 2599 | static int bunk_enable __section(".data"); |
| 2600 | static int rank_mask __section(".data"); |
| 2601 | static int column_bits_start __section(".data"); |
| 2602 | static int row_lsb __section(".data"); |
| 2603 | static int pbank_lsb __section(".data"); |
| 2604 | static int use_ecc __section(".data"); |
| 2605 | static int mtb_psec __section(".data"); |
| 2606 | static short ftb_dividend __section(".data"); |
| 2607 | static short ftb_divisor __section(".data"); |
| 2608 | static int taamin __section(".data"); |
| 2609 | static int tckmin __section(".data"); |
| 2610 | static int cl __section(".data"); |
| 2611 | static int min_cas_latency __section(".data"); |
| 2612 | static int max_cas_latency __section(".data"); |
| 2613 | static int override_cas_latency __section(".data"); |
| 2614 | static int ddr_rtt_nom_auto __section(".data"); |
| 2615 | static int ddr_rodt_ctl_auto __section(".data"); |
| 2616 | |
| 2617 | static int spd_addr __section(".data"); |
| 2618 | static int spd_org __section(".data"); |
| 2619 | static int spd_banks __section(".data"); |
| 2620 | static int spd_rdimm __section(".data"); |
| 2621 | static int spd_dimm_type __section(".data"); |
| 2622 | static int spd_ecc __section(".data"); |
| 2623 | static u32 spd_cas_latency __section(".data"); |
| 2624 | static int spd_mtb_dividend __section(".data"); |
| 2625 | static int spd_mtb_divisor __section(".data"); |
| 2626 | static int spd_tck_min __section(".data"); |
| 2627 | static int spd_taa_min __section(".data"); |
| 2628 | static int spd_twr __section(".data"); |
| 2629 | static int spd_trcd __section(".data"); |
| 2630 | static int spd_trrd __section(".data"); |
| 2631 | static int spd_trp __section(".data"); |
| 2632 | static int spd_tras __section(".data"); |
| 2633 | static int spd_trc __section(".data"); |
| 2634 | static int spd_trfc __section(".data"); |
| 2635 | static int spd_twtr __section(".data"); |
| 2636 | static int spd_trtp __section(".data"); |
| 2637 | static int spd_tfaw __section(".data"); |
| 2638 | static int spd_addr_mirror __section(".data"); |
| 2639 | static int spd_package __section(".data"); |
| 2640 | static int spd_rawcard __section(".data"); |
| 2641 | static int spd_rawcard_aorb __section(".data"); |
| 2642 | static int spd_rdimm_registers __section(".data"); |
| 2643 | static int spd_thermal_sensor __section(".data"); |
| 2644 | |
| 2645 | static int is_stacked_die __section(".data"); |
| 2646 | static int is_3ds_dimm __section(".data"); |
| 2647 | // 3DS: logical ranks per package rank |
| 2648 | static int lranks_per_prank __section(".data"); |
| 2649 | // 3DS: logical ranks bits |
| 2650 | static int lranks_bits __section(".data"); |
| 2651 | // in Mbits; only used for 3DS |
| 2652 | static int die_capacity __section(".data"); |
| 2653 | |
| 2654 | static enum ddr_type ddr_type __section(".data"); |
| 2655 | |
| 2656 | static int twr __section(".data"); |
| 2657 | static int trcd __section(".data"); |
| 2658 | static int trrd __section(".data"); |
| 2659 | static int trp __section(".data"); |
| 2660 | static int tras __section(".data"); |
| 2661 | static int trc __section(".data"); |
| 2662 | static int trfc __section(".data"); |
| 2663 | static int twtr __section(".data"); |
| 2664 | static int trtp __section(".data"); |
| 2665 | static int tfaw __section(".data"); |
| 2666 | |
| 2667 | static int ddr4_tckavgmin __section(".data"); |
| 2668 | static int ddr4_tckavgmax __section(".data"); |
| 2669 | static int ddr4_trdcmin __section(".data"); |
| 2670 | static int ddr4_trpmin __section(".data"); |
| 2671 | static int ddr4_trasmin __section(".data"); |
| 2672 | static int ddr4_trcmin __section(".data"); |
| 2673 | static int ddr4_trfc1min __section(".data"); |
| 2674 | static int ddr4_trfc2min __section(".data"); |
| 2675 | static int ddr4_trfc4min __section(".data"); |
| 2676 | static int ddr4_tfawmin __section(".data"); |
| 2677 | static int ddr4_trrd_smin __section(".data"); |
| 2678 | static int ddr4_trrd_lmin __section(".data"); |
| 2679 | static int ddr4_tccd_lmin __section(".data"); |
| 2680 | |
| 2681 | static int wl_mask_err __section(".data"); |
| 2682 | static int wl_loops __section(".data"); |
| 2683 | static int default_rtt_nom[4] __section(".data"); |
| 2684 | static int dyn_rtt_nom_mask __section(".data"); |
| 2685 | static struct impedence_values *imp_val __section(".data"); |
| 2686 | static char default_rodt_ctl __section(".data"); |
| 2687 | // default to disabled (ie, try LMC restart, not chip reset) |
| 2688 | static int ddr_disable_chip_reset __section(".data"); |
| 2689 | static const char *dimm_type_name __section(".data"); |
| 2690 | static int match_wl_rtt_nom __section(".data"); |
| 2691 | |
| 2692 | struct hwl_alt_by_rank { |
| 2693 | u16 hwl_alt_mask; // mask of bytelanes with alternate |
| 2694 | u16 hwl_alt_delay[9]; // bytelane alternate avail if mask=1 |
| 2695 | }; |
| 2696 | |
| 2697 | static struct hwl_alt_by_rank hwl_alts[4] __section(".data"); |
| 2698 | |
| 2699 | #define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3 // was: 5 |
| 2700 | static int internal_retries __section(".data"); |
| 2701 | |
| 2702 | static int deskew_training_errors __section(".data"); |
| 2703 | static struct deskew_counts deskew_training_results __section(".data"); |
| 2704 | static int disable_deskew_training __section(".data"); |
| 2705 | static int restart_if_dsk_incomplete __section(".data"); |
| 2706 | static int dac_eval_retries __section(".data"); |
| 2707 | static int dac_settings[9] __section(".data"); |
| 2708 | static int num_samples __section(".data"); |
| 2709 | static int sample __section(".data"); |
| 2710 | static int lane __section(".data"); |
| 2711 | static int last_lane __section(".data"); |
| 2712 | static int total_dac_eval_retries __section(".data"); |
| 2713 | static int dac_eval_exhausted __section(".data"); |
| 2714 | |
| 2715 | #define DEFAULT_DAC_SAMPLES 7 // originally was 5 |
| 2716 | #define DAC_RETRIES_LIMIT 2 |
| 2717 | |
| 2718 | struct bytelane_sample { |
| 2719 | s16 bytes[DEFAULT_DAC_SAMPLES]; |
| 2720 | }; |
| 2721 | |
| 2722 | static struct bytelane_sample lanes[9] __section(".data"); |
| 2723 | |
| 2724 | static char disable_sequential_delay_check __section(".data"); |
| 2725 | static int wl_print __section(".data"); |
| 2726 | |
| 2727 | static int enable_by_rank_init __section(".data"); |
| 2728 | static int saved_rank_mask __section(".data"); |
| 2729 | static int by_rank __section(".data"); |
| 2730 | static struct deskew_data rank_dsk[4] __section(".data"); |
| 2731 | static struct dac_data rank_dac[4] __section(".data"); |
| 2732 | |
| 2733 | // todo: perhaps remove node at some time completely? |
| 2734 | static int node __section(".data"); |
| 2735 | static int base_cl __section(".data"); |
| 2736 | |
| 2737 | /* Parameters from DDR3 Specifications */ |
| 2738 | #define DDR3_TREFI 7800000 /* 7.8 us */ |
| 2739 | #define DDR3_ZQCS 80000ull /* 80 ns */ |
| 2740 | #define DDR3_ZQCS_INTERNAL 1280000000ull /* 128ms/100 */ |
| 2741 | #define DDR3_TCKE 5000 /* 5 ns */ |
| 2742 | #define DDR3_TMRD 4 /* 4 nCK */ |
| 2743 | #define DDR3_TDLLK 512 /* 512 nCK */ |
| 2744 | #define DDR3_TMPRR 1 /* 1 nCK */ |
| 2745 | #define DDR3_TWLMRD 40 /* 40 nCK */ |
| 2746 | #define DDR3_TWLDQSEN 25 /* 25 nCK */ |
| 2747 | |
| 2748 | /* Parameters from DDR4 Specifications */ |
| 2749 | #define DDR4_TMRD 8 /* 8 nCK */ |
| 2750 | #define DDR4_TDLLK 768 /* 768 nCK */ |
| 2751 | |
| 2752 | static void lmc_config(struct ddr_priv *priv) |
| 2753 | { |
| 2754 | union cvmx_lmcx_config cfg; |
| 2755 | char *s; |
| 2756 | |
| 2757 | cfg.u64 = 0; |
| 2758 | |
| 2759 | cfg.cn78xx.ecc_ena = use_ecc; |
| 2760 | cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb); |
| 2761 | cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb); |
| 2762 | |
| 2763 | cfg.cn78xx.idlepower = 0; /* Disabled */ |
| 2764 | |
| 2765 | s = lookup_env(priv, "ddr_idlepower"); |
| 2766 | if (s) |
| 2767 | cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0); |
| 2768 | |
| 2769 | cfg.cn78xx.forcewrite = 0; /* Disabled */ |
| 2770 | /* Include memory reference address in the ECC */ |
| 2771 | cfg.cn78xx.ecc_adr = 1; |
| 2772 | |
| 2773 | s = lookup_env(priv, "ddr_ecc_adr"); |
| 2774 | if (s) |
| 2775 | cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0); |
| 2776 | |
| 2777 | cfg.cn78xx.reset = 0; |
| 2778 | |
| 2779 | /* |
| 2780 | * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to |
| 2781 | * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25], |
| 2782 | * ref_zqcs_int(18:7) to |
| 2783 | * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this |
| 2784 | * value should always be greater than 32, to account for |
| 2785 | * resistor calibration delays. |
| 2786 | */ |
| 2787 | |
| 2788 | cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f); |
| 2789 | cfg.cn78xx.ref_zqcs_int |= |
| 2790 | ((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) / |
| 2791 | (512 * 128))) & 0xfff) << 7); |
| 2792 | |
| 2793 | cfg.cn78xx.early_dqx = 1; /* Default to enabled */ |
| 2794 | |
| 2795 | s = lookup_env(priv, "ddr_early_dqx"); |
| 2796 | if (!s) |
| 2797 | s = lookup_env(priv, "ddr%d_early_dqx", if_num); |
| 2798 | |
| 2799 | if (s) |
| 2800 | cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0); |
| 2801 | |
| 2802 | cfg.cn78xx.sref_with_dll = 0; |
| 2803 | |
| 2804 | cfg.cn78xx.rank_ena = bunk_enable; |
| 2805 | cfg.cn78xx.rankmask = rank_mask; /* Set later */ |
| 2806 | cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) & |
| 2807 | rank_mask; |
| 2808 | /* Set once and don't change it. */ |
| 2809 | cfg.cn78xx.init_status = rank_mask; |
| 2810 | cfg.cn78xx.early_unload_d0_r0 = 0; |
| 2811 | cfg.cn78xx.early_unload_d0_r1 = 0; |
| 2812 | cfg.cn78xx.early_unload_d1_r0 = 0; |
| 2813 | cfg.cn78xx.early_unload_d1_r1 = 0; |
| 2814 | cfg.cn78xx.scrz = 0; |
| 2815 | if (octeon_is_cpuid(OCTEON_CN70XX)) |
| 2816 | cfg.cn78xx.mode32b = 1; /* Read-only. Always 1. */ |
| 2817 | cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0; |
| 2818 | cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) && |
| 2819 | (dram_width == 16)) ? 0 : 1; |
| 2820 | |
| 2821 | s = lookup_env_ull(priv, "ddr_config"); |
| 2822 | if (s) |
| 2823 | cfg.u64 = simple_strtoull(s, NULL, 0); |
| 2824 | debug("LMC_CONFIG : 0x%016llx\n", |
| 2825 | cfg.u64); |
| 2826 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); |
| 2827 | } |
| 2828 | |
| 2829 | static void lmc_control(struct ddr_priv *priv) |
| 2830 | { |
| 2831 | union cvmx_lmcx_control ctrl; |
| 2832 | char *s; |
| 2833 | |
| 2834 | ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 2835 | ctrl.s.rdimm_ena = spd_rdimm; |
| 2836 | ctrl.s.bwcnt = 0; /* Clear counter later */ |
| 2837 | if (spd_rdimm) |
| 2838 | ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm); |
| 2839 | else |
| 2840 | ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm); |
| 2841 | ctrl.s.pocas = 0; |
| 2842 | ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2); |
| 2843 | ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0; |
| 2844 | ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0; |
| 2845 | ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0; |
| 2846 | ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0; |
| 2847 | ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0; |
| 2848 | /* discards writes to addresses that don't exist in the DRAM */ |
| 2849 | ctrl.s.nxm_write_en = 0; |
| 2850 | ctrl.s.max_write_batch = 8; |
| 2851 | ctrl.s.xor_bank = 1; |
| 2852 | ctrl.s.auto_dclkdis = 1; |
| 2853 | ctrl.s.int_zqcs_dis = 0; |
| 2854 | ctrl.s.ext_zqcs_dis = 0; |
| 2855 | ctrl.s.bprch = 1; |
| 2856 | ctrl.s.wodt_bprch = 1; |
| 2857 | ctrl.s.rodt_bprch = 1; |
| 2858 | |
| 2859 | s = lookup_env(priv, "ddr_xor_bank"); |
| 2860 | if (s) |
| 2861 | ctrl.s.xor_bank = simple_strtoul(s, NULL, 0); |
| 2862 | |
| 2863 | s = lookup_env(priv, "ddr_2t"); |
| 2864 | if (s) |
| 2865 | ctrl.s.ddr2t = simple_strtoul(s, NULL, 0); |
| 2866 | |
| 2867 | s = lookup_env(priv, "ddr_fprch2"); |
| 2868 | if (s) |
| 2869 | ctrl.s.fprch2 = simple_strtoul(s, NULL, 0); |
| 2870 | |
| 2871 | s = lookup_env(priv, "ddr_bprch"); |
| 2872 | if (s) |
| 2873 | ctrl.s.bprch = simple_strtoul(s, NULL, 0); |
| 2874 | |
| 2875 | s = lookup_env(priv, "ddr_wodt_bprch"); |
| 2876 | if (s) |
| 2877 | ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0); |
| 2878 | |
| 2879 | s = lookup_env(priv, "ddr_rodt_bprch"); |
| 2880 | if (s) |
| 2881 | ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0); |
| 2882 | |
| 2883 | s = lookup_env(priv, "ddr_int_zqcs_dis"); |
| 2884 | if (s) |
| 2885 | ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0); |
| 2886 | |
| 2887 | s = lookup_env(priv, "ddr_ext_zqcs_dis"); |
| 2888 | if (s) |
| 2889 | ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0); |
| 2890 | |
| 2891 | s = lookup_env_ull(priv, "ddr_control"); |
| 2892 | if (s) |
| 2893 | ctrl.u64 = simple_strtoull(s, NULL, 0); |
| 2894 | |
| 2895 | debug("LMC_CONTROL : 0x%016llx\n", |
| 2896 | ctrl.u64); |
| 2897 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); |
| 2898 | } |
| 2899 | |
| 2900 | static void lmc_timing_params0(struct ddr_priv *priv) |
| 2901 | { |
| 2902 | union cvmx_lmcx_timing_params0 tp0; |
| 2903 | unsigned int trp_value; |
| 2904 | char *s; |
| 2905 | |
| 2906 | tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num)); |
| 2907 | |
| 2908 | trp_value = divide_roundup(trp, tclk_psecs) - 1; |
| 2909 | debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value, |
| 2910 | trp_value + |
| 2911 | (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull), |
| 2912 | tclk_psecs)) - 4); |
| 2913 | s = lookup_env_ull(priv, "ddr_use_old_trp"); |
| 2914 | if (s) { |
| 2915 | if (!!simple_strtoull(s, NULL, 0)) { |
| 2916 | trp_value += |
| 2917 | divide_roundup(max(4ull * tclk_psecs, 7500ull), |
| 2918 | tclk_psecs) - 4; |
| 2919 | debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n", |
| 2920 | trp_value); |
| 2921 | } |
| 2922 | } |
| 2923 | |
| 2924 | tp0.cn78xx.txpr = |
| 2925 | divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull), |
| 2926 | 16 * tclk_psecs); |
| 2927 | tp0.cn78xx.trp = trp_value & 0x1f; |
| 2928 | tp0.cn78xx.tcksre = |
| 2929 | divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1; |
| 2930 | |
| 2931 | if (ddr_type == DDR4_DRAM) { |
| 2932 | int tzqinit = 4; // Default to 4, for all DDR4 speed bins |
| 2933 | |
| 2934 | s = lookup_env(priv, "ddr_tzqinit"); |
| 2935 | if (s) |
| 2936 | tzqinit = simple_strtoul(s, NULL, 0); |
| 2937 | |
| 2938 | tp0.cn78xx.tzqinit = tzqinit; |
| 2939 | /* Always 8. */ |
| 2940 | tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs, |
| 2941 | (16 * tclk_psecs)); |
| 2942 | tp0.cn78xx.tcke = |
| 2943 | divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE), |
| 2944 | tclk_psecs) - 1; |
| 2945 | tp0.cn78xx.tmrd = |
| 2946 | divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1; |
| 2947 | tp0.cn78xx.tmod = 25; /* 25 is the max allowed */ |
| 2948 | tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256); |
| 2949 | } else { |
| 2950 | tp0.cn78xx.tzqinit = |
| 2951 | divide_roundup(max(512ull * tclk_psecs, 640000ull), |
| 2952 | (256 * tclk_psecs)); |
| 2953 | tp0.cn78xx.tzqcs = |
| 2954 | divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS), |
| 2955 | (16 * tclk_psecs)); |
| 2956 | tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1; |
| 2957 | tp0.cn78xx.tmrd = |
| 2958 | divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1; |
| 2959 | tp0.cn78xx.tmod = |
| 2960 | divide_roundup(max(12ull * tclk_psecs, 15000ull), |
| 2961 | tclk_psecs) - 1; |
| 2962 | tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256); |
| 2963 | } |
| 2964 | |
| 2965 | s = lookup_env_ull(priv, "ddr_timing_params0"); |
| 2966 | if (s) |
| 2967 | tp0.u64 = simple_strtoull(s, NULL, 0); |
| 2968 | debug("TIMING_PARAMS0 : 0x%016llx\n", |
| 2969 | tp0.u64); |
| 2970 | lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64); |
| 2971 | } |
| 2972 | |
| 2973 | static void lmc_timing_params1(struct ddr_priv *priv) |
| 2974 | { |
| 2975 | union cvmx_lmcx_timing_params1 tp1; |
| 2976 | unsigned int txp, temp_trcd, trfc_dlr; |
| 2977 | char *s; |
| 2978 | |
| 2979 | tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num)); |
| 2980 | |
| 2981 | /* .cn70xx. */ |
| 2982 | tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1; |
| 2983 | |
| 2984 | tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1; |
| 2985 | |
| 2986 | temp_trcd = divide_roundup(trcd, tclk_psecs); |
| 2987 | if (temp_trcd > 15) { |
| 2988 | debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n", |
| 2989 | temp_trcd); |
| 2990 | } |
| 2991 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) { |
| 2992 | /* |
| 2993 | * Let .trcd=0 serve as a flag that the field has |
| 2994 | * overflowed. Must use Additive Latency mode as a |
| 2995 | * workaround. |
| 2996 | */ |
| 2997 | temp_trcd = 0; |
| 2998 | } |
| 2999 | tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf; |
| 3000 | tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1; |
| 3001 | |
| 3002 | tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1; |
| 3003 | tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs); |
| 3004 | |
| 3005 | if (ddr_type == DDR4_DRAM) { |
| 3006 | /* Workaround bug 24006. Use Trrd_l. */ |
| 3007 | tp1.cn78xx.trrd = |
| 3008 | divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2; |
| 3009 | } else { |
| 3010 | tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2; |
| 3011 | } |
| 3012 | |
| 3013 | /* |
| 3014 | * tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec |
| 3015 | * tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec |
| 3016 | * tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec |
| 3017 | * tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec |
| 3018 | * tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec |
| 3019 | * tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec |
| 3020 | */ |
| 3021 | txp = (tclk_psecs < 1875) ? 6000 : 7500; |
| 3022 | txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp), |
| 3023 | tclk_psecs) - 1; |
| 3024 | if (txp > 7) { |
| 3025 | debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n", |
| 3026 | txp); |
| 3027 | } |
| 3028 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7) |
| 3029 | txp = 7; // max it out |
| 3030 | tp1.cn78xx.txp = (txp >> 0) & 7; |
| 3031 | tp1.cn78xx.txp_ext = (txp >> 3) & 1; |
| 3032 | |
| 3033 | tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs, |
| 3034 | 4 * tclk_psecs); |
| 3035 | tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs, |
| 3036 | 4 * tclk_psecs); |
| 3037 | tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs); |
| 3038 | tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull), |
| 3039 | tclk_psecs) - 1; |
| 3040 | |
| 3041 | if (ddr_type == DDR4_DRAM && is_3ds_dimm) { |
| 3042 | /* |
| 3043 | * 4 Gb: tRFC_DLR = 90 ns |
| 3044 | * 8 Gb: tRFC_DLR = 120 ns |
| 3045 | * 16 Gb: tRFC_DLR = 190 ns FIXME? |
| 3046 | */ |
| 3047 | if (die_capacity == 0x1000) // 4 Gbit |
| 3048 | trfc_dlr = 90; |
| 3049 | else if (die_capacity == 0x2000) // 8 Gbit |
| 3050 | trfc_dlr = 120; |
| 3051 | else if (die_capacity == 0x4000) // 16 Gbit |
| 3052 | trfc_dlr = 190; |
| 3053 | else |
| 3054 | trfc_dlr = 0; |
| 3055 | |
| 3056 | if (trfc_dlr == 0) { |
| 3057 | debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n", |
| 3058 | node, if_num, die_capacity); |
| 3059 | } else { |
| 3060 | tp1.cn78xx.trfc_dlr = |
| 3061 | divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs); |
| 3062 | debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n", |
| 3063 | node, if_num, tp1.cn78xx.trfc_dlr); |
| 3064 | } |
| 3065 | } |
| 3066 | |
| 3067 | s = lookup_env_ull(priv, "ddr_timing_params1"); |
| 3068 | if (s) |
| 3069 | tp1.u64 = simple_strtoull(s, NULL, 0); |
| 3070 | |
| 3071 | debug("TIMING_PARAMS1 : 0x%016llx\n", |
| 3072 | tp1.u64); |
| 3073 | lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64); |
| 3074 | } |
| 3075 | |
| 3076 | static void lmc_timing_params2(struct ddr_priv *priv) |
| 3077 | { |
| 3078 | if (ddr_type == DDR4_DRAM) { |
| 3079 | union cvmx_lmcx_timing_params1 tp1; |
| 3080 | union cvmx_lmcx_timing_params2 tp2; |
| 3081 | int temp_trrd_l; |
| 3082 | |
| 3083 | tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num)); |
| 3084 | tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num)); |
| 3085 | debug("TIMING_PARAMS2 : 0x%016llx\n", |
| 3086 | tp2.u64); |
| 3087 | |
| 3088 | temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2; |
| 3089 | if (temp_trrd_l > 7) |
| 3090 | debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n", |
| 3091 | temp_trrd_l); |
| 3092 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7) |
| 3093 | temp_trrd_l = 7; // max it out |
| 3094 | tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7; |
| 3095 | tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1; |
| 3096 | |
| 3097 | // correct for 1600-2400 |
| 3098 | tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull), |
| 3099 | tclk_psecs) - 1; |
| 3100 | tp2.s.t_rw_op_max = 7; |
| 3101 | tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull), |
| 3102 | tclk_psecs) - 1; |
| 3103 | |
| 3104 | debug("TIMING_PARAMS2 : 0x%016llx\n", |
| 3105 | tp2.u64); |
| 3106 | lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64); |
| 3107 | |
| 3108 | /* |
| 3109 | * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met |
| 3110 | * for Write-to-Read operations to the same Bank Group |
| 3111 | */ |
| 3112 | if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) { |
| 3113 | tp1.cn78xx.twtr = tp2.s.twtr_l - 4; |
| 3114 | debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n", |
| 3115 | tp1.cn78xx.twtr, tp2.s.twtr_l); |
| 3116 | debug("TIMING_PARAMS1 : 0x%016llx\n", |
| 3117 | tp1.u64); |
| 3118 | lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64); |
| 3119 | } |
| 3120 | } |
| 3121 | } |
| 3122 | |
| 3123 | static void lmc_modereg_params0(struct ddr_priv *priv) |
| 3124 | { |
| 3125 | union cvmx_lmcx_modereg_params0 mp0; |
| 3126 | int param; |
| 3127 | char *s; |
| 3128 | |
| 3129 | mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); |
| 3130 | |
| 3131 | if (ddr_type == DDR4_DRAM) { |
| 3132 | mp0.s.cwl = 0; /* 1600 (1250ps) */ |
| 3133 | if (tclk_psecs < 1250) |
| 3134 | mp0.s.cwl = 1; /* 1866 (1072ps) */ |
| 3135 | if (tclk_psecs < 1072) |
| 3136 | mp0.s.cwl = 2; /* 2133 (938ps) */ |
| 3137 | if (tclk_psecs < 938) |
| 3138 | mp0.s.cwl = 3; /* 2400 (833ps) */ |
| 3139 | if (tclk_psecs < 833) |
| 3140 | mp0.s.cwl = 4; /* 2666 (750ps) */ |
| 3141 | if (tclk_psecs < 750) |
| 3142 | mp0.s.cwl = 5; /* 3200 (625ps) */ |
| 3143 | } else { |
| 3144 | /* |
| 3145 | ** CSR CWL CAS write Latency |
| 3146 | ** === === ================================= |
| 3147 | ** 0 5 ( tCK(avg) >= 2.5 ns) |
| 3148 | ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns) |
| 3149 | ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns) |
| 3150 | ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns) |
| 3151 | ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns) |
| 3152 | ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns) |
| 3153 | ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns) |
| 3154 | ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns) |
| 3155 | */ |
| 3156 | |
| 3157 | mp0.s.cwl = 0; |
| 3158 | if (tclk_psecs < 2500) |
| 3159 | mp0.s.cwl = 1; |
| 3160 | if (tclk_psecs < 1875) |
| 3161 | mp0.s.cwl = 2; |
| 3162 | if (tclk_psecs < 1500) |
| 3163 | mp0.s.cwl = 3; |
| 3164 | if (tclk_psecs < 1250) |
| 3165 | mp0.s.cwl = 4; |
| 3166 | if (tclk_psecs < 1070) |
| 3167 | mp0.s.cwl = 5; |
| 3168 | if (tclk_psecs < 935) |
| 3169 | mp0.s.cwl = 6; |
| 3170 | if (tclk_psecs < 833) |
| 3171 | mp0.s.cwl = 7; |
| 3172 | } |
| 3173 | |
| 3174 | s = lookup_env(priv, "ddr_cwl"); |
| 3175 | if (s) |
| 3176 | mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5; |
| 3177 | |
| 3178 | if (ddr_type == DDR4_DRAM) { |
| 3179 | debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]", |
| 3180 | mp0.s.cwl + 9 |
| 3181 | + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl); |
| 3182 | } else { |
| 3183 | debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]", |
| 3184 | mp0.s.cwl + 5, mp0.s.cwl); |
| 3185 | } |
| 3186 | |
| 3187 | mp0.s.mprloc = 0; |
| 3188 | mp0.s.mpr = 0; |
| 3189 | mp0.s.dll = (ddr_type == DDR4_DRAM); /* 0 for DDR3 and 1 for DDR4 */ |
| 3190 | mp0.s.al = 0; |
| 3191 | mp0.s.wlev = 0; /* Read Only */ |
| 3192 | if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM) |
| 3193 | mp0.s.tdqs = 0; |
| 3194 | else |
| 3195 | mp0.s.tdqs = 1; |
| 3196 | mp0.s.qoff = 0; |
| 3197 | |
| 3198 | s = lookup_env(priv, "ddr_cl"); |
| 3199 | if (s) { |
| 3200 | cl = simple_strtoul(s, NULL, 0); |
| 3201 | debug("CAS Latency : %6d\n", |
| 3202 | cl); |
| 3203 | } |
| 3204 | |
| 3205 | if (ddr_type == DDR4_DRAM) { |
| 3206 | mp0.s.cl = 0x0; |
| 3207 | if (cl > 9) |
| 3208 | mp0.s.cl = 0x1; |
| 3209 | if (cl > 10) |
| 3210 | mp0.s.cl = 0x2; |
| 3211 | if (cl > 11) |
| 3212 | mp0.s.cl = 0x3; |
| 3213 | if (cl > 12) |
| 3214 | mp0.s.cl = 0x4; |
| 3215 | if (cl > 13) |
| 3216 | mp0.s.cl = 0x5; |
| 3217 | if (cl > 14) |
| 3218 | mp0.s.cl = 0x6; |
| 3219 | if (cl > 15) |
| 3220 | mp0.s.cl = 0x7; |
| 3221 | if (cl > 16) |
| 3222 | mp0.s.cl = 0x8; |
| 3223 | if (cl > 18) |
| 3224 | mp0.s.cl = 0x9; |
| 3225 | if (cl > 20) |
| 3226 | mp0.s.cl = 0xA; |
| 3227 | if (cl > 24) |
| 3228 | mp0.s.cl = 0xB; |
| 3229 | } else { |
| 3230 | mp0.s.cl = 0x2; |
| 3231 | if (cl > 5) |
| 3232 | mp0.s.cl = 0x4; |
| 3233 | if (cl > 6) |
| 3234 | mp0.s.cl = 0x6; |
| 3235 | if (cl > 7) |
| 3236 | mp0.s.cl = 0x8; |
| 3237 | if (cl > 8) |
| 3238 | mp0.s.cl = 0xA; |
| 3239 | if (cl > 9) |
| 3240 | mp0.s.cl = 0xC; |
| 3241 | if (cl > 10) |
| 3242 | mp0.s.cl = 0xE; |
| 3243 | if (cl > 11) |
| 3244 | mp0.s.cl = 0x1; |
| 3245 | if (cl > 12) |
| 3246 | mp0.s.cl = 0x3; |
| 3247 | if (cl > 13) |
| 3248 | mp0.s.cl = 0x5; |
| 3249 | if (cl > 14) |
| 3250 | mp0.s.cl = 0x7; |
| 3251 | if (cl > 15) |
| 3252 | mp0.s.cl = 0x9; |
| 3253 | } |
| 3254 | |
| 3255 | mp0.s.rbt = 0; /* Read Only. */ |
| 3256 | mp0.s.tm = 0; |
| 3257 | mp0.s.dllr = 0; |
| 3258 | |
| 3259 | param = divide_roundup(twr, tclk_psecs); |
| 3260 | |
| 3261 | if (ddr_type == DDR4_DRAM) { /* DDR4 */ |
| 3262 | mp0.s.wrp = 1; |
| 3263 | if (param > 12) |
| 3264 | mp0.s.wrp = 2; |
| 3265 | if (param > 14) |
| 3266 | mp0.s.wrp = 3; |
| 3267 | if (param > 16) |
| 3268 | mp0.s.wrp = 4; |
| 3269 | if (param > 18) |
| 3270 | mp0.s.wrp = 5; |
| 3271 | if (param > 20) |
| 3272 | mp0.s.wrp = 6; |
| 3273 | if (param > 24) /* RESERVED in DDR4 spec */ |
| 3274 | mp0.s.wrp = 7; |
| 3275 | } else { /* DDR3 */ |
| 3276 | mp0.s.wrp = 1; |
| 3277 | if (param > 5) |
| 3278 | mp0.s.wrp = 2; |
| 3279 | if (param > 6) |
| 3280 | mp0.s.wrp = 3; |
| 3281 | if (param > 7) |
| 3282 | mp0.s.wrp = 4; |
| 3283 | if (param > 8) |
| 3284 | mp0.s.wrp = 5; |
| 3285 | if (param > 10) |
| 3286 | mp0.s.wrp = 6; |
| 3287 | if (param > 12) |
| 3288 | mp0.s.wrp = 7; |
| 3289 | } |
| 3290 | |
| 3291 | mp0.s.ppd = 0; |
| 3292 | |
| 3293 | s = lookup_env(priv, "ddr_wrp"); |
| 3294 | if (s) |
| 3295 | mp0.s.wrp = simple_strtoul(s, NULL, 0); |
| 3296 | |
| 3297 | debug("%-45s : %d, [0x%x]\n", |
| 3298 | "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp); |
| 3299 | |
| 3300 | s = lookup_env_ull(priv, "ddr_modereg_params0"); |
| 3301 | if (s) |
| 3302 | mp0.u64 = simple_strtoull(s, NULL, 0); |
| 3303 | |
| 3304 | debug("MODEREG_PARAMS0 : 0x%016llx\n", |
| 3305 | mp0.u64); |
| 3306 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64); |
| 3307 | } |
| 3308 | |
| 3309 | static void lmc_modereg_params1(struct ddr_priv *priv) |
| 3310 | { |
| 3311 | union cvmx_lmcx_modereg_params1 mp1; |
| 3312 | char *s; |
| 3313 | int i; |
| 3314 | |
| 3315 | mp1.u64 = odt_config[odt_idx].modereg_params1.u64; |
| 3316 | |
| 3317 | /* |
| 3318 | * Special request: mismatched DIMM support. Slot 0: 2-Rank, |
| 3319 | * Slot 1: 1-Rank |
| 3320 | */ |
| 3321 | if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */ |
| 3322 | mp1.s.rtt_nom_00 = 0; |
| 3323 | mp1.s.rtt_nom_01 = 3; /* rttnom_40ohm */ |
| 3324 | mp1.s.rtt_nom_10 = 3; /* rttnom_40ohm */ |
| 3325 | mp1.s.rtt_nom_11 = 0; |
| 3326 | dyn_rtt_nom_mask = 0x6; |
| 3327 | } |
| 3328 | |
| 3329 | s = lookup_env(priv, "ddr_rtt_nom_mask"); |
| 3330 | if (s) |
| 3331 | dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0); |
| 3332 | |
| 3333 | /* |
| 3334 | * Save the original rtt_nom settings before sweeping through |
| 3335 | * settings. |
| 3336 | */ |
| 3337 | default_rtt_nom[0] = mp1.s.rtt_nom_00; |
| 3338 | default_rtt_nom[1] = mp1.s.rtt_nom_01; |
| 3339 | default_rtt_nom[2] = mp1.s.rtt_nom_10; |
| 3340 | default_rtt_nom[3] = mp1.s.rtt_nom_11; |
| 3341 | |
| 3342 | ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto; |
| 3343 | |
| 3344 | for (i = 0; i < 4; ++i) { |
| 3345 | u64 value; |
| 3346 | |
| 3347 | s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2), |
| 3348 | !!(i & 1)); |
| 3349 | if (!s) |
| 3350 | s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num, |
| 3351 | !!(i & 2), !!(i & 1)); |
| 3352 | if (s) { |
| 3353 | value = simple_strtoul(s, NULL, 0); |
| 3354 | mp1.u64 &= ~((u64)0x7 << (i * 12 + 9)); |
| 3355 | mp1.u64 |= ((value & 0x7) << (i * 12 + 9)); |
| 3356 | default_rtt_nom[i] = value; |
| 3357 | ddr_rtt_nom_auto = 0; |
| 3358 | } |
| 3359 | } |
| 3360 | |
| 3361 | s = lookup_env(priv, "ddr_rtt_nom"); |
| 3362 | if (!s) |
| 3363 | s = lookup_env(priv, "ddr%d_rtt_nom", if_num); |
| 3364 | if (s) { |
| 3365 | u64 value; |
| 3366 | |
| 3367 | value = simple_strtoul(s, NULL, 0); |
| 3368 | |
| 3369 | if (dyn_rtt_nom_mask & 1) { |
| 3370 | default_rtt_nom[0] = value; |
| 3371 | mp1.s.rtt_nom_00 = value; |
| 3372 | } |
| 3373 | if (dyn_rtt_nom_mask & 2) { |
| 3374 | default_rtt_nom[1] = value; |
| 3375 | mp1.s.rtt_nom_01 = value; |
| 3376 | } |
| 3377 | if (dyn_rtt_nom_mask & 4) { |
| 3378 | default_rtt_nom[2] = value; |
| 3379 | mp1.s.rtt_nom_10 = value; |
| 3380 | } |
| 3381 | if (dyn_rtt_nom_mask & 8) { |
| 3382 | default_rtt_nom[3] = value; |
| 3383 | mp1.s.rtt_nom_11 = value; |
| 3384 | } |
| 3385 | |
| 3386 | ddr_rtt_nom_auto = 0; |
| 3387 | } |
| 3388 | |
| 3389 | for (i = 0; i < 4; ++i) { |
| 3390 | u64 value; |
| 3391 | |
| 3392 | s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1)); |
| 3393 | if (!s) |
| 3394 | s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num, |
| 3395 | !!(i & 2), !!(i & 1)); |
| 3396 | if (s) { |
| 3397 | value = simple_strtoul(s, NULL, 0); |
| 3398 | insrt_wr(&mp1.u64, i, value); |
| 3399 | } |
| 3400 | } |
| 3401 | |
| 3402 | // Make sure 78XX pass 1 has valid RTT_WR settings, because |
| 3403 | // configuration files may be set-up for later chips, and |
| 3404 | // 78XX pass 1 supports no RTT_WR extension bits |
| 3405 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { |
| 3406 | for (i = 0; i < 4; ++i) { |
| 3407 | // if 80 or undefined |
| 3408 | if (extr_wr(mp1.u64, i) > 3) { |
| 3409 | // FIXME? always insert 120 |
| 3410 | insrt_wr(&mp1.u64, i, 1); |
| 3411 | debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n", |
| 3412 | !!(i & 2), i & 1); |
| 3413 | } |
| 3414 | } |
| 3415 | } |
| 3416 | |
| 3417 | s = lookup_env(priv, "ddr_dic"); |
| 3418 | if (s) { |
| 3419 | u64 value = simple_strtoul(s, NULL, 0); |
| 3420 | |
| 3421 | for (i = 0; i < 4; ++i) { |
| 3422 | mp1.u64 &= ~((u64)0x3 << (i * 12 + 7)); |
| 3423 | mp1.u64 |= ((value & 0x3) << (i * 12 + 7)); |
| 3424 | } |
| 3425 | } |
| 3426 | |
| 3427 | for (i = 0; i < 4; ++i) { |
| 3428 | u64 value; |
| 3429 | |
| 3430 | s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1)); |
| 3431 | if (s) { |
| 3432 | value = simple_strtoul(s, NULL, 0); |
| 3433 | mp1.u64 &= ~((u64)0x3 << (i * 12 + 7)); |
| 3434 | mp1.u64 |= ((value & 0x3) << (i * 12 + 7)); |
| 3435 | } |
| 3436 | } |
| 3437 | |
| 3438 | s = lookup_env_ull(priv, "ddr_modereg_params1"); |
| 3439 | if (s) |
| 3440 | mp1.u64 = simple_strtoull(s, NULL, 0); |
| 3441 | |
| 3442 | debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 3443 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11], |
| 3444 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10], |
| 3445 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01], |
| 3446 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00], |
| 3447 | mp1.s.rtt_nom_11, |
| 3448 | mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00); |
| 3449 | |
| 3450 | debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 3451 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)], |
| 3452 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)], |
| 3453 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)], |
| 3454 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)], |
| 3455 | extr_wr(mp1.u64, 3), |
| 3456 | extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0)); |
| 3457 | |
| 3458 | debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 3459 | imp_val->dic_ohms[mp1.s.dic_11], |
| 3460 | imp_val->dic_ohms[mp1.s.dic_10], |
| 3461 | imp_val->dic_ohms[mp1.s.dic_01], |
| 3462 | imp_val->dic_ohms[mp1.s.dic_00], |
| 3463 | mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00); |
| 3464 | |
| 3465 | debug("MODEREG_PARAMS1 : 0x%016llx\n", |
| 3466 | mp1.u64); |
| 3467 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64); |
| 3468 | } |
| 3469 | |
| 3470 | static void lmc_modereg_params2(struct ddr_priv *priv) |
| 3471 | { |
| 3472 | char *s; |
| 3473 | int i; |
| 3474 | |
| 3475 | if (ddr_type == DDR4_DRAM) { |
| 3476 | union cvmx_lmcx_modereg_params2 mp2; |
| 3477 | |
| 3478 | mp2.u64 = odt_config[odt_idx].modereg_params2.u64; |
| 3479 | |
| 3480 | s = lookup_env(priv, "ddr_rtt_park"); |
| 3481 | if (s) { |
| 3482 | u64 value = simple_strtoul(s, NULL, 0); |
| 3483 | |
| 3484 | for (i = 0; i < 4; ++i) { |
| 3485 | mp2.u64 &= ~((u64)0x7 << (i * 10 + 0)); |
| 3486 | mp2.u64 |= ((value & 0x7) << (i * 10 + 0)); |
| 3487 | } |
| 3488 | } |
| 3489 | |
| 3490 | for (i = 0; i < 4; ++i) { |
| 3491 | u64 value; |
| 3492 | |
| 3493 | s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2), |
| 3494 | !!(i & 1)); |
| 3495 | if (s) { |
| 3496 | value = simple_strtoul(s, NULL, 0); |
| 3497 | mp2.u64 &= ~((u64)0x7 << (i * 10 + 0)); |
| 3498 | mp2.u64 |= ((value & 0x7) << (i * 10 + 0)); |
| 3499 | } |
| 3500 | } |
| 3501 | |
| 3502 | s = lookup_env_ull(priv, "ddr_modereg_params2"); |
| 3503 | if (s) |
| 3504 | mp2.u64 = simple_strtoull(s, NULL, 0); |
| 3505 | |
| 3506 | debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 3507 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_11], |
| 3508 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_10], |
| 3509 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_01], |
| 3510 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_00], |
| 3511 | mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01, |
| 3512 | mp2.s.rtt_park_00); |
| 3513 | |
| 3514 | debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE", |
| 3515 | mp2.s.vref_range_11, |
| 3516 | mp2.s.vref_range_10, |
| 3517 | mp2.s.vref_range_01, mp2.s.vref_range_00); |
| 3518 | |
| 3519 | debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE", |
| 3520 | mp2.s.vref_value_11, |
| 3521 | mp2.s.vref_value_10, |
| 3522 | mp2.s.vref_value_01, mp2.s.vref_value_00); |
| 3523 | |
| 3524 | debug("MODEREG_PARAMS2 : 0x%016llx\n", |
| 3525 | mp2.u64); |
| 3526 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64); |
| 3527 | } |
| 3528 | } |
| 3529 | |
| 3530 | static void lmc_modereg_params3(struct ddr_priv *priv) |
| 3531 | { |
| 3532 | char *s; |
| 3533 | |
| 3534 | if (ddr_type == DDR4_DRAM) { |
| 3535 | union cvmx_lmcx_modereg_params3 mp3; |
| 3536 | |
| 3537 | mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num)); |
| 3538 | /* Disable as workaround to Errata 20547 */ |
| 3539 | mp3.s.rd_dbi = 0; |
| 3540 | mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs), |
| 3541 | 5ull) - 4; |
| 3542 | |
| 3543 | s = lookup_env(priv, "ddr_rd_preamble"); |
| 3544 | if (s) |
| 3545 | mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0); |
| 3546 | |
| 3547 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { |
| 3548 | int delay = 0; |
| 3549 | |
| 3550 | if (lranks_per_prank == 4 && ddr_hertz >= 1000000000) |
| 3551 | delay = 1; |
| 3552 | |
| 3553 | mp3.s.xrank_add_tccd_l = delay; |
| 3554 | mp3.s.xrank_add_tccd_s = delay; |
| 3555 | } |
| 3556 | |
| 3557 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64); |
| 3558 | debug("MODEREG_PARAMS3 : 0x%016llx\n", |
| 3559 | mp3.u64); |
| 3560 | } |
| 3561 | } |
| 3562 | |
| 3563 | static void lmc_nxm(struct ddr_priv *priv) |
| 3564 | { |
| 3565 | union cvmx_lmcx_nxm lmc_nxm; |
| 3566 | int num_bits = row_lsb + row_bits + lranks_bits - 26; |
| 3567 | char *s; |
| 3568 | |
| 3569 | lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num)); |
| 3570 | |
| 3571 | /* .cn78xx. */ |
| 3572 | if (rank_mask & 0x1) |
| 3573 | lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits; |
| 3574 | if (rank_mask & 0x2) |
| 3575 | lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits; |
| 3576 | if (rank_mask & 0x4) |
| 3577 | lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits; |
| 3578 | if (rank_mask & 0x8) |
| 3579 | lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits; |
| 3580 | |
| 3581 | /* Set the mask for non-existent ranks. */ |
| 3582 | lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff; |
| 3583 | |
| 3584 | s = lookup_env_ull(priv, "ddr_nxm"); |
| 3585 | if (s) |
| 3586 | lmc_nxm.u64 = simple_strtoull(s, NULL, 0); |
| 3587 | |
| 3588 | debug("LMC_NXM : 0x%016llx\n", |
| 3589 | lmc_nxm.u64); |
| 3590 | lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64); |
| 3591 | } |
| 3592 | |
| 3593 | static void lmc_wodt_mask(struct ddr_priv *priv) |
| 3594 | { |
| 3595 | union cvmx_lmcx_wodt_mask wodt_mask; |
| 3596 | char *s; |
| 3597 | |
| 3598 | wodt_mask.u64 = odt_config[odt_idx].odt_mask; |
| 3599 | |
| 3600 | s = lookup_env_ull(priv, "ddr_wodt_mask"); |
| 3601 | if (s) |
| 3602 | wodt_mask.u64 = simple_strtoull(s, NULL, 0); |
| 3603 | |
| 3604 | debug("WODT_MASK : 0x%016llx\n", |
| 3605 | wodt_mask.u64); |
| 3606 | lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64); |
| 3607 | } |
| 3608 | |
| 3609 | static void lmc_rodt_mask(struct ddr_priv *priv) |
| 3610 | { |
| 3611 | union cvmx_lmcx_rodt_mask rodt_mask; |
| 3612 | int rankx; |
| 3613 | char *s; |
| 3614 | |
| 3615 | rodt_mask.u64 = odt_config[odt_idx].rodt_ctl; |
| 3616 | |
| 3617 | s = lookup_env_ull(priv, "ddr_rodt_mask"); |
| 3618 | if (s) |
| 3619 | rodt_mask.u64 = simple_strtoull(s, NULL, 0); |
| 3620 | |
| 3621 | debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64); |
| 3622 | lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64); |
| 3623 | |
| 3624 | dyn_rtt_nom_mask = 0; |
| 3625 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 3626 | if (!(rank_mask & (1 << rankx))) |
| 3627 | continue; |
| 3628 | dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff); |
| 3629 | } |
| 3630 | if (num_ranks == 4) { |
| 3631 | /* |
| 3632 | * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs |
| 3633 | * ODT1 is wired to the third rank (rank 2). The mask, |
| 3634 | * dyn_rtt_nom_mask, is used to indicate for which ranks |
| 3635 | * to sweep RTT_NOM during read-leveling. Shift the bit |
| 3636 | * from the ODT1 position over to the "ODT2" position so |
| 3637 | * that the read-leveling analysis comes out right. |
| 3638 | */ |
| 3639 | int odt1_bit = dyn_rtt_nom_mask & 2; |
| 3640 | |
| 3641 | dyn_rtt_nom_mask &= ~2; |
| 3642 | dyn_rtt_nom_mask |= odt1_bit << 1; |
| 3643 | } |
| 3644 | debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask); |
| 3645 | } |
| 3646 | |
| 3647 | static void lmc_comp_ctl2(struct ddr_priv *priv) |
| 3648 | { |
| 3649 | union cvmx_lmcx_comp_ctl2 cc2; |
| 3650 | char *s; |
| 3651 | |
| 3652 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 3653 | |
| 3654 | cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena; |
| 3655 | /* Default 4=34.3 ohm */ |
| 3656 | cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl; |
| 3657 | /* Default 4=34.3 ohm */ |
| 3658 | cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl; |
| 3659 | /* Default 4=34.3 ohm */ |
| 3660 | cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl; |
| 3661 | |
| 3662 | ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto; |
| 3663 | s = lookup_env(priv, "ddr_rodt_ctl_auto"); |
| 3664 | if (s) |
| 3665 | ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0); |
| 3666 | |
| 3667 | default_rodt_ctl = odt_config[odt_idx].qs_dic; |
| 3668 | s = lookup_env(priv, "ddr_rodt_ctl"); |
| 3669 | if (!s) |
| 3670 | s = lookup_env(priv, "ddr%d_rodt_ctl", if_num); |
| 3671 | if (s) { |
| 3672 | default_rodt_ctl = simple_strtoul(s, NULL, 0); |
| 3673 | ddr_rodt_ctl_auto = 0; |
| 3674 | } |
| 3675 | |
| 3676 | cc2.cn70xx.rodt_ctl = default_rodt_ctl; |
| 3677 | |
| 3678 | // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms, |
| 3679 | // and DCLK speed is 1 GHz or more... |
| 3680 | if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm && |
| 3681 | ddr_hertz >= 1000000000) { |
| 3682 | // lowest for DDR4 is 26 ohms |
| 3683 | cc2.s.ck_ctl = ddr4_driver_26_ohm; |
| 3684 | debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n", |
| 3685 | node, if_num, cc2.s.ck_ctl, |
| 3686 | imp_val->drive_strength[cc2.s.ck_ctl]); |
| 3687 | } |
| 3688 | |
| 3689 | // if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms, |
| 3690 | // if DCLK speed is 1 GHz or more... |
| 3691 | if (ddr_type == DDR4_DRAM && dimm_count == 2 && |
| 3692 | (spd_dimm_type == 2 || spd_dimm_type == 6) && |
| 3693 | ddr_hertz >= 1000000000) { |
| 3694 | // lowest for DDR4 is 26 ohms |
| 3695 | cc2.cn78xx.control_ctl = ddr4_driver_26_ohm; |
| 3696 | // lowest for DDR4 is 26 ohms |
| 3697 | cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm; |
| 3698 | debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n", |
| 3699 | node, if_num, ddr4_driver_26_ohm, |
| 3700 | imp_val->drive_strength[ddr4_driver_26_ohm]); |
| 3701 | } |
| 3702 | |
| 3703 | s = lookup_env(priv, "ddr_ck_ctl"); |
| 3704 | if (s) |
| 3705 | cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0); |
| 3706 | |
| 3707 | s = lookup_env(priv, "ddr_cmd_ctl"); |
| 3708 | if (s) |
| 3709 | cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0); |
| 3710 | |
| 3711 | s = lookup_env(priv, "ddr_control_ctl"); |
| 3712 | if (s) |
| 3713 | cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0); |
| 3714 | |
| 3715 | s = lookup_env(priv, "ddr_dqx_ctl"); |
| 3716 | if (s) |
| 3717 | cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0); |
| 3718 | |
| 3719 | debug("%-45s : %d, %d ohms\n", "DQX_CTL ", cc2.cn78xx.dqx_ctl, |
| 3720 | imp_val->drive_strength[cc2.cn78xx.dqx_ctl]); |
| 3721 | debug("%-45s : %d, %d ohms\n", "CK_CTL ", cc2.cn78xx.ck_ctl, |
| 3722 | imp_val->drive_strength[cc2.cn78xx.ck_ctl]); |
| 3723 | debug("%-45s : %d, %d ohms\n", "CMD_CTL ", cc2.cn78xx.cmd_ctl, |
| 3724 | imp_val->drive_strength[cc2.cn78xx.cmd_ctl]); |
| 3725 | debug("%-45s : %d, %d ohms\n", "CONTROL_CTL ", |
| 3726 | cc2.cn78xx.control_ctl, |
| 3727 | imp_val->drive_strength[cc2.cn78xx.control_ctl]); |
| 3728 | debug("Read ODT_CTL : 0x%x (%d ohms)\n", |
| 3729 | cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]); |
| 3730 | |
| 3731 | debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64); |
| 3732 | lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); |
| 3733 | } |
| 3734 | |
| 3735 | static void lmc_phy_ctl(struct ddr_priv *priv) |
| 3736 | { |
| 3737 | union cvmx_lmcx_phy_ctl phy_ctl; |
| 3738 | |
| 3739 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 3740 | phy_ctl.s.ts_stagger = 0; |
| 3741 | // FIXME: are there others TBD? |
| 3742 | phy_ctl.s.dsk_dbg_overwrt_ena = 0; |
| 3743 | |
| 3744 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) { |
| 3745 | // C0 is TEN, C1 is A17 |
| 3746 | phy_ctl.s.c0_sel = 2; |
| 3747 | phy_ctl.s.c1_sel = 2; |
| 3748 | debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n", |
| 3749 | node, if_num, phy_ctl.s.c1_sel); |
| 3750 | } |
| 3751 | |
| 3752 | debug("PHY_CTL : 0x%016llx\n", |
| 3753 | phy_ctl.u64); |
| 3754 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 3755 | } |
| 3756 | |
| 3757 | static void lmc_ext_config(struct ddr_priv *priv) |
| 3758 | { |
| 3759 | union cvmx_lmcx_ext_config ext_cfg; |
| 3760 | char *s; |
| 3761 | |
| 3762 | ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); |
| 3763 | ext_cfg.s.vrefint_seq_deskew = 0; |
| 3764 | ext_cfg.s.read_ena_bprch = 1; |
| 3765 | ext_cfg.s.read_ena_fprch = 1; |
| 3766 | ext_cfg.s.drive_ena_fprch = 1; |
| 3767 | ext_cfg.s.drive_ena_bprch = 1; |
| 3768 | // make sure this is OFF for all current chips |
| 3769 | ext_cfg.s.invert_data = 0; |
| 3770 | |
| 3771 | s = lookup_env(priv, "ddr_read_fprch"); |
| 3772 | if (s) |
| 3773 | ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0); |
| 3774 | |
| 3775 | s = lookup_env(priv, "ddr_read_bprch"); |
| 3776 | if (s) |
| 3777 | ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0); |
| 3778 | |
| 3779 | s = lookup_env(priv, "ddr_drive_fprch"); |
| 3780 | if (s) |
| 3781 | ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0); |
| 3782 | |
| 3783 | s = lookup_env(priv, "ddr_drive_bprch"); |
| 3784 | if (s) |
| 3785 | ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0); |
| 3786 | |
| 3787 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) { |
| 3788 | ext_cfg.s.dimm0_cid = lranks_bits; |
| 3789 | ext_cfg.s.dimm1_cid = lranks_bits; |
| 3790 | debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n", |
| 3791 | node, if_num, ext_cfg.s.dimm0_cid); |
| 3792 | } |
| 3793 | |
| 3794 | lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64); |
| 3795 | debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64); |
| 3796 | } |
| 3797 | |
| 3798 | static void lmc_ext_config2(struct ddr_priv *priv) |
| 3799 | { |
| 3800 | char *s; |
| 3801 | |
| 3802 | // NOTE: all chips have this register, but not necessarily the |
| 3803 | // fields we modify... |
| 3804 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && |
| 3805 | !octeon_is_cpuid(OCTEON_CN73XX)) { |
| 3806 | union cvmx_lmcx_ext_config2 ext_cfg2; |
| 3807 | int value = 1; // default to 1 |
| 3808 | |
| 3809 | ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num)); |
| 3810 | |
| 3811 | s = lookup_env(priv, "ddr_ext2_delay_unload"); |
| 3812 | if (s) |
| 3813 | value = !!simple_strtoul(s, NULL, 0); |
| 3814 | |
| 3815 | ext_cfg2.s.delay_unload_r0 = value; |
| 3816 | ext_cfg2.s.delay_unload_r1 = value; |
| 3817 | ext_cfg2.s.delay_unload_r2 = value; |
| 3818 | ext_cfg2.s.delay_unload_r3 = value; |
| 3819 | |
| 3820 | lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64); |
| 3821 | debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64); |
| 3822 | } |
| 3823 | } |
| 3824 | |
| 3825 | static void lmc_dimm01_params_loop(struct ddr_priv *priv) |
| 3826 | { |
| 3827 | union cvmx_lmcx_dimmx_params dimm_p; |
| 3828 | int dimmx = didx; |
| 3829 | char *s; |
| 3830 | int rc; |
| 3831 | int i; |
| 3832 | |
| 3833 | dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num)); |
| 3834 | |
| 3835 | if (ddr_type == DDR4_DRAM) { |
| 3836 | union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0; |
| 3837 | union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1; |
| 3838 | union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl; |
| 3839 | |
| 3840 | dimm_p.s.rc0 = 0; |
| 3841 | dimm_p.s.rc1 = 0; |
| 3842 | dimm_p.s.rc2 = 0; |
| 3843 | |
| 3844 | rc = read_spd(&dimm_config_table[didx], 0, |
| 3845 | DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL); |
| 3846 | dimm_p.s.rc3 = (rc >> 4) & 0xf; |
| 3847 | dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2; |
| 3848 | dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0; |
| 3849 | |
| 3850 | rc = read_spd(&dimm_config_table[didx], 0, |
| 3851 | DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK); |
| 3852 | dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2; |
| 3853 | dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0; |
| 3854 | |
| 3855 | dimm_p.s.rc6 = 0; |
| 3856 | dimm_p.s.rc7 = 0; |
| 3857 | dimm_p.s.rc8 = 0; |
| 3858 | dimm_p.s.rc9 = 0; |
| 3859 | |
| 3860 | /* |
| 3861 | * rc10 DDR4 RDIMM Operating Speed |
| 3862 | * === =================================================== |
| 3863 | * 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps) |
| 3864 | * 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps) |
| 3865 | * 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps) |
| 3866 | * 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps) |
| 3867 | * 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps) |
| 3868 | * 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps) |
| 3869 | */ |
| 3870 | dimm_p.s.rc10 = 0; |
| 3871 | if (tclk_psecs < 1250) |
| 3872 | dimm_p.s.rc10 = 1; |
| 3873 | if (tclk_psecs < 1071) |
| 3874 | dimm_p.s.rc10 = 2; |
| 3875 | if (tclk_psecs < 938) |
| 3876 | dimm_p.s.rc10 = 3; |
| 3877 | if (tclk_psecs < 833) |
| 3878 | dimm_p.s.rc10 = 4; |
| 3879 | if (tclk_psecs < 750) |
| 3880 | dimm_p.s.rc10 = 5; |
| 3881 | |
| 3882 | dimm_p.s.rc11 = 0; |
| 3883 | dimm_p.s.rc12 = 0; |
| 3884 | /* 0=LRDIMM, 1=RDIMM */ |
| 3885 | dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4; |
| 3886 | dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ? |
| 3887 | (spd_addr_mirror << 3) : 0; |
| 3888 | dimm_p.s.rc14 = 0; |
| 3889 | dimm_p.s.rc15 = 0; /* 1 nCK latency adder */ |
| 3890 | |
| 3891 | ddr4_p0.u64 = 0; |
| 3892 | |
| 3893 | ddr4_p0.s.rc8x = 0; |
| 3894 | ddr4_p0.s.rc7x = 0; |
| 3895 | ddr4_p0.s.rc6x = 0; |
| 3896 | ddr4_p0.s.rc5x = 0; |
| 3897 | ddr4_p0.s.rc4x = 0; |
| 3898 | |
| 3899 | ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs); |
| 3900 | |
| 3901 | ddr4_p0.s.rc2x = 0; |
| 3902 | ddr4_p0.s.rc1x = 0; |
| 3903 | |
| 3904 | ddr4_p1.u64 = 0; |
| 3905 | |
| 3906 | ddr4_p1.s.rcbx = 0; |
| 3907 | ddr4_p1.s.rcax = 0; |
| 3908 | ddr4_p1.s.rc9x = 0; |
| 3909 | |
| 3910 | ddr4_ctl.u64 = 0; |
| 3911 | ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004; |
| 3912 | ddr4_ctl.cn70xx.ddr4_dimm1_wmask = |
| 3913 | (dimm_count > 1) ? 0x004 : 0x0000; |
| 3914 | |
| 3915 | /* |
| 3916 | * Handle any overrides from envvars here... |
| 3917 | */ |
| 3918 | s = lookup_env(priv, "ddr_ddr4_params0"); |
| 3919 | if (s) |
| 3920 | ddr4_p0.u64 = simple_strtoul(s, NULL, 0); |
| 3921 | |
| 3922 | s = lookup_env(priv, "ddr_ddr4_params1"); |
| 3923 | if (s) |
| 3924 | ddr4_p1.u64 = simple_strtoul(s, NULL, 0); |
| 3925 | |
| 3926 | s = lookup_env(priv, "ddr_ddr4_dimm_ctl"); |
| 3927 | if (s) |
| 3928 | ddr4_ctl.u64 = simple_strtoul(s, NULL, 0); |
| 3929 | |
| 3930 | for (i = 0; i < 11; ++i) { |
| 3931 | u64 value; |
| 3932 | |
| 3933 | s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1); |
| 3934 | if (s) { |
| 3935 | value = simple_strtoul(s, NULL, 0); |
| 3936 | if (i < 8) { |
| 3937 | ddr4_p0.u64 &= ~((u64)0xff << (i * 8)); |
| 3938 | ddr4_p0.u64 |= (value << (i * 8)); |
| 3939 | } else { |
| 3940 | ddr4_p1.u64 &= |
| 3941 | ~((u64)0xff << ((i - 8) * 8)); |
| 3942 | ddr4_p1.u64 |= (value << ((i - 8) * 8)); |
| 3943 | } |
| 3944 | } |
| 3945 | } |
| 3946 | |
| 3947 | /* |
| 3948 | * write the final CSR values |
| 3949 | */ |
| 3950 | lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num), |
| 3951 | ddr4_p0.u64); |
| 3952 | |
| 3953 | lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64); |
| 3954 | |
| 3955 | lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num), |
| 3956 | ddr4_p1.u64); |
| 3957 | |
| 3958 | debug("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n", |
| 3959 | dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax, |
| 3960 | ddr4_p1.s.rc9x, ddr4_p0.s.rc8x, |
| 3961 | ddr4_p0.s.rc7x, ddr4_p0.s.rc6x, |
| 3962 | ddr4_p0.s.rc5x, ddr4_p0.s.rc4x, |
| 3963 | ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x); |
| 3964 | |
| 3965 | } else { |
| 3966 | rc = read_spd(&dimm_config_table[didx], 0, 69); |
| 3967 | dimm_p.s.rc0 = (rc >> 0) & 0xf; |
| 3968 | dimm_p.s.rc1 = (rc >> 4) & 0xf; |
| 3969 | |
| 3970 | rc = read_spd(&dimm_config_table[didx], 0, 70); |
| 3971 | dimm_p.s.rc2 = (rc >> 0) & 0xf; |
| 3972 | dimm_p.s.rc3 = (rc >> 4) & 0xf; |
| 3973 | |
| 3974 | rc = read_spd(&dimm_config_table[didx], 0, 71); |
| 3975 | dimm_p.s.rc4 = (rc >> 0) & 0xf; |
| 3976 | dimm_p.s.rc5 = (rc >> 4) & 0xf; |
| 3977 | |
| 3978 | rc = read_spd(&dimm_config_table[didx], 0, 72); |
| 3979 | dimm_p.s.rc6 = (rc >> 0) & 0xf; |
| 3980 | dimm_p.s.rc7 = (rc >> 4) & 0xf; |
| 3981 | |
| 3982 | rc = read_spd(&dimm_config_table[didx], 0, 73); |
| 3983 | dimm_p.s.rc8 = (rc >> 0) & 0xf; |
| 3984 | dimm_p.s.rc9 = (rc >> 4) & 0xf; |
| 3985 | |
| 3986 | rc = read_spd(&dimm_config_table[didx], 0, 74); |
| 3987 | dimm_p.s.rc10 = (rc >> 0) & 0xf; |
| 3988 | dimm_p.s.rc11 = (rc >> 4) & 0xf; |
| 3989 | |
| 3990 | rc = read_spd(&dimm_config_table[didx], 0, 75); |
| 3991 | dimm_p.s.rc12 = (rc >> 0) & 0xf; |
| 3992 | dimm_p.s.rc13 = (rc >> 4) & 0xf; |
| 3993 | |
| 3994 | rc = read_spd(&dimm_config_table[didx], 0, 76); |
| 3995 | dimm_p.s.rc14 = (rc >> 0) & 0xf; |
| 3996 | dimm_p.s.rc15 = (rc >> 4) & 0xf; |
| 3997 | |
| 3998 | s = ddr_getenv_debug(priv, "ddr_clk_drive"); |
| 3999 | if (s) { |
| 4000 | if (strcmp(s, "light") == 0) |
| 4001 | dimm_p.s.rc5 = 0x0; /* Light Drive */ |
| 4002 | if (strcmp(s, "moderate") == 0) |
| 4003 | dimm_p.s.rc5 = 0x5; /* Moderate Drive */ |
| 4004 | if (strcmp(s, "strong") == 0) |
| 4005 | dimm_p.s.rc5 = 0xA; /* Strong Drive */ |
| 4006 | printf("Parameter found in environment. ddr_clk_drive = %s\n", |
| 4007 | s); |
| 4008 | } |
| 4009 | |
| 4010 | s = ddr_getenv_debug(priv, "ddr_cmd_drive"); |
| 4011 | if (s) { |
| 4012 | if (strcmp(s, "light") == 0) |
| 4013 | dimm_p.s.rc3 = 0x0; /* Light Drive */ |
| 4014 | if (strcmp(s, "moderate") == 0) |
| 4015 | dimm_p.s.rc3 = 0x5; /* Moderate Drive */ |
| 4016 | if (strcmp(s, "strong") == 0) |
| 4017 | dimm_p.s.rc3 = 0xA; /* Strong Drive */ |
| 4018 | printf("Parameter found in environment. ddr_cmd_drive = %s\n", |
| 4019 | s); |
| 4020 | } |
| 4021 | |
| 4022 | s = ddr_getenv_debug(priv, "ddr_ctl_drive"); |
| 4023 | if (s) { |
| 4024 | if (strcmp(s, "light") == 0) |
| 4025 | dimm_p.s.rc4 = 0x0; /* Light Drive */ |
| 4026 | if (strcmp(s, "moderate") == 0) |
| 4027 | dimm_p.s.rc4 = 0x5; /* Moderate Drive */ |
| 4028 | printf("Parameter found in environment. ddr_ctl_drive = %s\n", |
| 4029 | s); |
| 4030 | } |
| 4031 | |
| 4032 | /* |
| 4033 | * rc10 DDR3 RDIMM Operating Speed |
| 4034 | * == ===================================================== |
| 4035 | * 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 def |
| 4036 | * 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066 |
| 4037 | * 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333 |
| 4038 | * 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600 |
| 4039 | * 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866 |
| 4040 | */ |
| 4041 | dimm_p.s.rc10 = 0; |
| 4042 | if (tclk_psecs < 2500) |
| 4043 | dimm_p.s.rc10 = 1; |
| 4044 | if (tclk_psecs < 1875) |
| 4045 | dimm_p.s.rc10 = 2; |
| 4046 | if (tclk_psecs < 1500) |
| 4047 | dimm_p.s.rc10 = 3; |
| 4048 | if (tclk_psecs < 1250) |
| 4049 | dimm_p.s.rc10 = 4; |
| 4050 | } |
| 4051 | |
| 4052 | s = lookup_env(priv, "ddr_dimmx_params", i); |
| 4053 | if (s) |
| 4054 | dimm_p.u64 = simple_strtoul(s, NULL, 0); |
| 4055 | |
| 4056 | for (i = 0; i < 16; ++i) { |
| 4057 | u64 value; |
| 4058 | |
| 4059 | s = lookup_env(priv, "ddr_rc%d", i); |
| 4060 | if (s) { |
| 4061 | value = simple_strtoul(s, NULL, 0); |
| 4062 | dimm_p.u64 &= ~((u64)0xf << (i * 4)); |
| 4063 | dimm_p.u64 |= (value << (i * 4)); |
| 4064 | } |
| 4065 | } |
| 4066 | |
| 4067 | lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64); |
| 4068 | |
| 4069 | debug("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n", |
| 4070 | dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13, |
| 4071 | dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10, |
| 4072 | dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7, |
| 4073 | dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4, |
| 4074 | dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0); |
| 4075 | |
| 4076 | // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers, |
| 4077 | // and treat it specially |
| 4078 | if (ddr_type == DDR3_DRAM && num_ranks == 4 && |
| 4079 | spd_rdimm_registers == 2 && dimmx == 0) { |
| 4080 | debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n"); |
| 4081 | lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64); |
| 4082 | } |
| 4083 | } |
| 4084 | |
| 4085 | static void lmc_dimm01_params(struct ddr_priv *priv) |
| 4086 | { |
| 4087 | union cvmx_lmcx_dimm_ctl dimm_ctl; |
| 4088 | char *s; |
| 4089 | |
| 4090 | if (spd_rdimm) { |
| 4091 | for (didx = 0; didx < (unsigned int)dimm_count; ++didx) |
| 4092 | lmc_dimm01_params_loop(priv); |
| 4093 | |
| 4094 | if (ddr_type == DDR4_DRAM) { |
| 4095 | /* LMC0_DIMM_CTL */ |
| 4096 | dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); |
| 4097 | dimm_ctl.s.dimm0_wmask = 0xdf3f; |
| 4098 | dimm_ctl.s.dimm1_wmask = |
| 4099 | (dimm_count > 1) ? 0xdf3f : 0x0000; |
| 4100 | dimm_ctl.s.tcws = 0x4e0; |
| 4101 | dimm_ctl.s.parity = c_cfg->parity; |
| 4102 | |
| 4103 | s = lookup_env(priv, "ddr_dimm0_wmask"); |
| 4104 | if (s) { |
| 4105 | dimm_ctl.s.dimm0_wmask = |
| 4106 | simple_strtoul(s, NULL, 0); |
| 4107 | } |
| 4108 | |
| 4109 | s = lookup_env(priv, "ddr_dimm1_wmask"); |
| 4110 | if (s) { |
| 4111 | dimm_ctl.s.dimm1_wmask = |
| 4112 | simple_strtoul(s, NULL, 0); |
| 4113 | } |
| 4114 | |
| 4115 | s = lookup_env(priv, "ddr_dimm_ctl_parity"); |
| 4116 | if (s) |
| 4117 | dimm_ctl.s.parity = simple_strtoul(s, NULL, 0); |
| 4118 | |
| 4119 | s = lookup_env(priv, "ddr_dimm_ctl_tcws"); |
| 4120 | if (s) |
| 4121 | dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0); |
| 4122 | |
| 4123 | debug("LMC DIMM_CTL : 0x%016llx\n", |
| 4124 | dimm_ctl.u64); |
| 4125 | lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); |
| 4126 | |
| 4127 | /* Init RCW */ |
| 4128 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); |
| 4129 | |
| 4130 | /* Write RC0D last */ |
| 4131 | dimm_ctl.s.dimm0_wmask = 0x2000; |
| 4132 | dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? |
| 4133 | 0x2000 : 0x0000; |
| 4134 | debug("LMC DIMM_CTL : 0x%016llx\n", |
| 4135 | dimm_ctl.u64); |
| 4136 | lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); |
| 4137 | |
| 4138 | /* |
| 4139 | * Don't write any extended registers the second time |
| 4140 | */ |
| 4141 | lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0); |
| 4142 | |
| 4143 | /* Init RCW */ |
| 4144 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); |
| 4145 | } else { |
| 4146 | /* LMC0_DIMM_CTL */ |
| 4147 | dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); |
| 4148 | dimm_ctl.s.dimm0_wmask = 0xffff; |
| 4149 | // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 |
| 4150 | // registers, and treat it specially |
| 4151 | if (num_ranks == 4 && spd_rdimm_registers == 2) { |
| 4152 | debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n"); |
| 4153 | dimm_ctl.s.dimm1_wmask = 0xffff; |
| 4154 | } else { |
| 4155 | dimm_ctl.s.dimm1_wmask = |
| 4156 | (dimm_count > 1) ? 0xffff : 0x0000; |
| 4157 | } |
| 4158 | dimm_ctl.s.tcws = 0x4e0; |
| 4159 | dimm_ctl.s.parity = c_cfg->parity; |
| 4160 | |
| 4161 | s = lookup_env(priv, "ddr_dimm0_wmask"); |
| 4162 | if (s) { |
| 4163 | dimm_ctl.s.dimm0_wmask = |
| 4164 | simple_strtoul(s, NULL, 0); |
| 4165 | } |
| 4166 | |
| 4167 | s = lookup_env(priv, "ddr_dimm1_wmask"); |
| 4168 | if (s) { |
| 4169 | dimm_ctl.s.dimm1_wmask = |
| 4170 | simple_strtoul(s, NULL, 0); |
| 4171 | } |
| 4172 | |
| 4173 | s = lookup_env(priv, "ddr_dimm_ctl_parity"); |
| 4174 | if (s) |
| 4175 | dimm_ctl.s.parity = simple_strtoul(s, NULL, 0); |
| 4176 | |
| 4177 | s = lookup_env(priv, "ddr_dimm_ctl_tcws"); |
| 4178 | if (s) |
| 4179 | dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0); |
| 4180 | |
| 4181 | debug("LMC DIMM_CTL : 0x%016llx\n", |
| 4182 | dimm_ctl.u64); |
| 4183 | lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); |
| 4184 | |
| 4185 | /* Init RCW */ |
| 4186 | oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); |
| 4187 | } |
| 4188 | |
| 4189 | } else { |
| 4190 | /* Disable register control writes for unbuffered */ |
| 4191 | union cvmx_lmcx_dimm_ctl dimm_ctl; |
| 4192 | |
| 4193 | dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num)); |
| 4194 | dimm_ctl.s.dimm0_wmask = 0; |
| 4195 | dimm_ctl.s.dimm1_wmask = 0; |
| 4196 | lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64); |
| 4197 | } |
| 4198 | } |
| 4199 | |
| 4200 | static int lmc_rank_init(struct ddr_priv *priv) |
| 4201 | { |
| 4202 | char *s; |
| 4203 | |
| 4204 | if (enable_by_rank_init) { |
| 4205 | by_rank = 3; |
| 4206 | saved_rank_mask = rank_mask; |
| 4207 | } |
| 4208 | |
| 4209 | start_by_rank_init: |
| 4210 | |
| 4211 | if (enable_by_rank_init) { |
| 4212 | rank_mask = (1 << by_rank); |
| 4213 | if (!(rank_mask & saved_rank_mask)) |
| 4214 | goto end_by_rank_init; |
| 4215 | if (by_rank == 0) |
| 4216 | rank_mask = saved_rank_mask; |
| 4217 | |
| 4218 | debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n", |
| 4219 | by_rank, rank_mask); |
| 4220 | } |
| 4221 | |
| 4222 | /* |
| 4223 | * Comments (steps 3 through 5) continue in oct3_ddr3_seq() |
| 4224 | */ |
| 4225 | union cvmx_lmcx_modereg_params0 mp0; |
| 4226 | |
| 4227 | if (ddr_memory_preserved(priv)) { |
| 4228 | /* |
| 4229 | * Contents are being preserved. Take DRAM out of self-refresh |
| 4230 | * first. Then init steps can procede normally |
| 4231 | */ |
| 4232 | /* self-refresh exit */ |
| 4233 | oct3_ddr3_seq(priv, rank_mask, if_num, 3); |
| 4234 | } |
| 4235 | |
| 4236 | mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); |
| 4237 | mp0.s.dllr = 1; /* Set during first init sequence */ |
| 4238 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64); |
| 4239 | |
| 4240 | ddr_init_seq(priv, rank_mask, if_num); |
| 4241 | |
| 4242 | mp0.s.dllr = 0; /* Clear for normal operation */ |
| 4243 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64); |
| 4244 | |
| 4245 | if (spd_rdimm && ddr_type == DDR4_DRAM && |
| 4246 | octeon_is_cpuid(OCTEON_CN7XXX)) { |
| 4247 | debug("Running init sequence 1\n"); |
| 4248 | change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count); |
| 4249 | } |
| 4250 | |
| 4251 | memset(lanes, 0, sizeof(lanes)); |
| 4252 | for (lane = 0; lane < last_lane; lane++) { |
| 4253 | // init all lanes to reset value |
| 4254 | dac_settings[lane] = 127; |
| 4255 | } |
| 4256 | |
| 4257 | // FIXME: disable internal VREF if deskew is disabled? |
| 4258 | if (disable_deskew_training) { |
| 4259 | debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n", |
| 4260 | node, if_num); |
| 4261 | num_samples = 0; |
| 4262 | } else if (ddr_type == DDR4_DRAM && |
| 4263 | !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { |
| 4264 | num_samples = DEFAULT_DAC_SAMPLES; |
| 4265 | } else { |
| 4266 | // if DDR3 or no ability to write DAC values |
| 4267 | num_samples = 1; |
| 4268 | } |
| 4269 | |
| 4270 | perform_internal_vref_training: |
| 4271 | |
| 4272 | total_dac_eval_retries = 0; |
| 4273 | dac_eval_exhausted = 0; |
| 4274 | |
| 4275 | for (sample = 0; sample < num_samples; sample++) { |
| 4276 | dac_eval_retries = 0; |
| 4277 | |
| 4278 | // make offset and internal vref training repeatable |
| 4279 | do { |
| 4280 | /* |
| 4281 | * 6.9.8 LMC Offset Training |
| 4282 | * LMC requires input-receiver offset training. |
| 4283 | */ |
| 4284 | perform_offset_training(priv, rank_mask, if_num); |
| 4285 | |
| 4286 | /* |
| 4287 | * 6.9.9 LMC Internal vref Training |
| 4288 | * LMC requires input-reference-voltage training. |
| 4289 | */ |
| 4290 | perform_internal_vref_training(priv, rank_mask, if_num); |
| 4291 | |
| 4292 | // read and maybe display the DAC values for a sample |
| 4293 | read_dac_dbi_settings(priv, if_num, /*DAC*/ 1, |
| 4294 | dac_settings); |
| 4295 | if (num_samples == 1 || ddr_verbose(priv)) { |
| 4296 | display_dac_dbi_settings(if_num, /*DAC*/ 1, |
| 4297 | use_ecc, dac_settings, |
| 4298 | "Internal VREF"); |
| 4299 | } |
| 4300 | |
| 4301 | // for DDR4, evaluate the DAC settings and retry |
| 4302 | // if any issues |
| 4303 | if (ddr_type == DDR4_DRAM) { |
| 4304 | if (evaluate_dac_settings |
| 4305 | (if_64b, use_ecc, dac_settings)) { |
| 4306 | dac_eval_retries += 1; |
| 4307 | if (dac_eval_retries > |
| 4308 | DAC_RETRIES_LIMIT) { |
| 4309 | debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n", |
| 4310 | node, if_num); |
| 4311 | dac_eval_exhausted += 1; |
| 4312 | } else { |
| 4313 | debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n", |
| 4314 | node, if_num); |
| 4315 | total_dac_eval_retries += 1; |
| 4316 | // try another sample |
| 4317 | continue; |
| 4318 | } |
| 4319 | } |
| 4320 | |
| 4321 | // taking multiple samples, otherwise do nothing |
| 4322 | if (num_samples > 1) { |
| 4323 | // good sample or exhausted retries, |
| 4324 | // record it |
| 4325 | for (lane = 0; lane < last_lane; |
| 4326 | lane++) { |
| 4327 | lanes[lane].bytes[sample] = |
| 4328 | dac_settings[lane]; |
| 4329 | } |
| 4330 | } |
| 4331 | } |
| 4332 | // done if DDR3, or good sample, or exhausted retries |
| 4333 | break; |
| 4334 | } while (1); |
| 4335 | } |
| 4336 | |
| 4337 | if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) { |
| 4338 | debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n", |
| 4339 | node, if_num, total_dac_eval_retries, dac_eval_exhausted); |
| 4340 | } |
| 4341 | |
| 4342 | if (num_samples > 1) { |
| 4343 | debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n", |
| 4344 | node, if_num); |
| 4345 | |
| 4346 | for (lane = 0; lane < last_lane; lane++) { |
| 4347 | dac_settings[lane] = |
| 4348 | process_samples_average(&lanes[lane].bytes[0], |
| 4349 | num_samples, if_num, lane); |
| 4350 | } |
| 4351 | display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc, |
| 4352 | dac_settings, "Averaged VREF"); |
| 4353 | |
| 4354 | // finally, write the final DAC values |
| 4355 | for (lane = 0; lane < last_lane; lane++) { |
| 4356 | load_dac_override(priv, if_num, dac_settings[lane], |
| 4357 | lane); |
| 4358 | } |
| 4359 | } |
| 4360 | |
| 4361 | // allow override of any byte-lane internal VREF |
| 4362 | int overrode_vref_dac = 0; |
| 4363 | |
| 4364 | for (lane = 0; lane < last_lane; lane++) { |
| 4365 | s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane); |
| 4366 | if (s) { |
| 4367 | dac_settings[lane] = simple_strtoul(s, NULL, 0); |
| 4368 | overrode_vref_dac = 1; |
| 4369 | // finally, write the new DAC value |
| 4370 | load_dac_override(priv, if_num, dac_settings[lane], |
| 4371 | lane); |
| 4372 | } |
| 4373 | } |
| 4374 | if (overrode_vref_dac) { |
| 4375 | display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc, |
| 4376 | dac_settings, "Override VREF"); |
| 4377 | } |
| 4378 | |
| 4379 | // as a second step, after internal VREF training, before starting |
| 4380 | // deskew training: |
| 4381 | // for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting |
| 4382 | // to 127 |
| 4383 | if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && |
| 4384 | !disable_deskew_training) { |
| 4385 | load_dac_override(priv, if_num, 127, /* all */ 0x0A); |
| 4386 | debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n", |
| 4387 | node, if_num); |
| 4388 | } |
| 4389 | |
| 4390 | /* |
| 4391 | * 4.8.8 LMC Deskew Training |
| 4392 | * |
| 4393 | * LMC requires input-read-data deskew training. |
| 4394 | */ |
| 4395 | if (!disable_deskew_training) { |
| 4396 | deskew_training_errors = |
| 4397 | perform_deskew_training(priv, rank_mask, if_num, |
| 4398 | spd_rawcard_aorb); |
| 4399 | |
| 4400 | // All the Deskew lock and saturation retries (may) have |
| 4401 | // been done, but we ended up with nibble errors; so, |
| 4402 | // as a last ditch effort, try the Internal vref |
| 4403 | // Training again... |
| 4404 | if (deskew_training_errors) { |
| 4405 | if (internal_retries < |
| 4406 | DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) { |
| 4407 | internal_retries++; |
| 4408 | debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n", |
| 4409 | node, if_num, internal_retries); |
| 4410 | goto perform_internal_vref_training; |
| 4411 | } else { |
| 4412 | if (restart_if_dsk_incomplete) { |
| 4413 | debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n", |
| 4414 | node, if_num, internal_retries); |
| 4415 | return -EAGAIN; |
| 4416 | } |
| 4417 | debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n", |
| 4418 | node, if_num, internal_retries); |
| 4419 | } |
| 4420 | } /* if (deskew_training_errors) */ |
| 4421 | |
| 4422 | // FIXME: treat this as the final DSK print from now on, |
| 4423 | // and print if VBL_NORM or above also, save the results |
| 4424 | // of the original training in case we want them later |
| 4425 | validate_deskew_training(priv, rank_mask, if_num, |
| 4426 | &deskew_training_results, 1); |
| 4427 | } else { /* if (! disable_deskew_training) */ |
| 4428 | debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n", |
| 4429 | node, if_num); |
| 4430 | validate_deskew_training(priv, rank_mask, if_num, |
| 4431 | &deskew_training_results, 1); |
| 4432 | } /* if (! disable_deskew_training) */ |
| 4433 | |
| 4434 | if (enable_by_rank_init) { |
| 4435 | read_dac_dbi_settings(priv, if_num, /*dac */ 1, |
| 4436 | &rank_dac[by_rank].bytes[0]); |
| 4437 | get_deskew_settings(priv, if_num, &rank_dsk[by_rank]); |
| 4438 | debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank); |
| 4439 | } |
| 4440 | |
| 4441 | end_by_rank_init: |
| 4442 | |
| 4443 | if (enable_by_rank_init) { |
| 4444 | //debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank); |
| 4445 | |
| 4446 | by_rank--; |
| 4447 | if (by_rank >= 0) |
| 4448 | goto start_by_rank_init; |
| 4449 | |
| 4450 | rank_mask = saved_rank_mask; |
| 4451 | ddr_init_seq(priv, rank_mask, if_num); |
| 4452 | |
| 4453 | process_by_rank_dac(priv, if_num, rank_mask, rank_dac); |
| 4454 | process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk); |
| 4455 | |
| 4456 | // FIXME: set this to prevent later checking!!! |
| 4457 | disable_deskew_training = 1; |
| 4458 | |
| 4459 | debug("\n>>>>> BY_RANK: FINISHED!!\n\n"); |
| 4460 | } |
| 4461 | |
| 4462 | return 0; |
| 4463 | } |
| 4464 | |
| 4465 | static void lmc_config_2(struct ddr_priv *priv) |
| 4466 | { |
| 4467 | union cvmx_lmcx_config lmc_config; |
| 4468 | int save_ref_zqcs_int; |
| 4469 | u64 temp_delay_usecs; |
| 4470 | |
| 4471 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 4472 | |
| 4473 | /* |
| 4474 | * Temporarily select the minimum ZQCS interval and wait |
| 4475 | * long enough for a few ZQCS calibrations to occur. This |
| 4476 | * should ensure that the calibration circuitry is |
| 4477 | * stabilized before read/write leveling occurs. |
| 4478 | */ |
| 4479 | if (octeon_is_cpuid(OCTEON_CN7XXX)) { |
| 4480 | save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int; |
| 4481 | /* set smallest interval */ |
| 4482 | lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7); |
| 4483 | } else { |
| 4484 | save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int; |
| 4485 | /* set smallest interval */ |
| 4486 | lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7); |
| 4487 | } |
| 4488 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); |
| 4489 | lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 4490 | |
| 4491 | /* |
| 4492 | * Compute an appropriate delay based on the current ZQCS |
| 4493 | * interval. The delay should be long enough for the |
| 4494 | * current ZQCS delay counter to expire plus ten of the |
| 4495 | * minimum intarvals to ensure that some calibrations |
| 4496 | * occur. |
| 4497 | */ |
| 4498 | temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs * |
| 4499 | 100 * 512 * 128) / (10000 * 10000) + 10 * |
| 4500 | ((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000); |
| 4501 | |
| 4502 | debug("Waiting %lld usecs for ZQCS calibrations to start\n", |
| 4503 | temp_delay_usecs); |
| 4504 | udelay(temp_delay_usecs); |
| 4505 | |
| 4506 | if (octeon_is_cpuid(OCTEON_CN7XXX)) { |
| 4507 | /* Restore computed interval */ |
| 4508 | lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int; |
| 4509 | } else { |
| 4510 | /* Restore computed interval */ |
| 4511 | lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int; |
| 4512 | } |
| 4513 | |
| 4514 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64); |
| 4515 | lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 4516 | } |
| 4517 | |
| 4518 | static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data"); |
| 4519 | static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data"); |
| 4520 | static union cvmx_lmcx_modereg_params1 mp1 __section(".data"); |
| 4521 | |
| 4522 | static int wl_mask[9] __section(".data"); |
| 4523 | static int byte_idx __section(".data"); |
| 4524 | static int ecc_ena __section(".data"); |
| 4525 | static int wl_roundup __section(".data"); |
| 4526 | static int save_mode32b __section(".data"); |
| 4527 | static int disable_hwl_validity __section(".data"); |
| 4528 | static int default_wl_rtt_nom __section(".data"); |
| 4529 | static int wl_pbm_pump __section(".data"); |
| 4530 | |
| 4531 | static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx) |
| 4532 | { |
| 4533 | int wloop = 0; |
| 4534 | // retries per sample for HW-related issues with bitmasks or values |
| 4535 | int wloop_retries = 0; |
| 4536 | int wloop_retries_total = 0; |
| 4537 | int wloop_retries_exhausted = 0; |
| 4538 | #define WLOOP_RETRIES_DEFAULT 5 |
| 4539 | int wl_val_err; |
| 4540 | int wl_mask_err_rank = 0; |
| 4541 | int wl_val_err_rank = 0; |
| 4542 | // array to collect counts of byte-lane values |
| 4543 | // assume low-order 3 bits and even, so really only 2-bit values |
| 4544 | struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9]; |
| 4545 | int extra_bumps, extra_mask; |
| 4546 | int rank_nom = 0; |
| 4547 | |
| 4548 | if (!(rank_mask & (1 << rankx))) |
| 4549 | return; |
| 4550 | |
| 4551 | if (match_wl_rtt_nom) { |
| 4552 | if (rankx == 0) |
| 4553 | rank_nom = mp1.s.rtt_nom_00; |
| 4554 | if (rankx == 1) |
| 4555 | rank_nom = mp1.s.rtt_nom_01; |
| 4556 | if (rankx == 2) |
| 4557 | rank_nom = mp1.s.rtt_nom_10; |
| 4558 | if (rankx == 3) |
| 4559 | rank_nom = mp1.s.rtt_nom_11; |
| 4560 | |
| 4561 | debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n", |
| 4562 | node, if_num, rankx, rank_nom, |
| 4563 | imp_val->rtt_nom_ohms[rank_nom]); |
| 4564 | } |
| 4565 | |
| 4566 | memset(wl_bytes, 0, sizeof(wl_bytes)); |
| 4567 | memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra)); |
| 4568 | |
| 4569 | // restructure the looping so we can keep trying until we get the |
| 4570 | // samples we want |
| 4571 | while (wloop < wl_loops) { |
| 4572 | wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num)); |
| 4573 | |
| 4574 | wl_ctl.cn78xx.rtt_nom = |
| 4575 | (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7; |
| 4576 | |
| 4577 | if (match_wl_rtt_nom) { |
| 4578 | wl_ctl.cn78xx.rtt_nom = |
| 4579 | (rank_nom > 0) ? (rank_nom - 1) : 7; |
| 4580 | } |
| 4581 | |
| 4582 | /* Clear write-level delays */ |
| 4583 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0); |
| 4584 | |
| 4585 | wl_mask_err = 0; /* Reset error counters */ |
| 4586 | wl_val_err = 0; |
| 4587 | |
| 4588 | for (byte_idx = 0; byte_idx < 9; ++byte_idx) |
| 4589 | wl_mask[byte_idx] = 0; /* Reset bitmasks */ |
| 4590 | |
| 4591 | // do all the byte-lanes at the same time |
| 4592 | wl_ctl.cn78xx.lanemask = 0x1ff; |
| 4593 | |
| 4594 | lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64); |
| 4595 | |
| 4596 | /* |
| 4597 | * Read and write values back in order to update the |
| 4598 | * status field. This insures that we read the updated |
| 4599 | * values after write-leveling has completed. |
| 4600 | */ |
| 4601 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), |
| 4602 | lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num))); |
| 4603 | |
| 4604 | /* write-leveling */ |
| 4605 | oct3_ddr3_seq(priv, 1 << rankx, if_num, 6); |
| 4606 | |
| 4607 | do { |
| 4608 | wl_rank.u64 = lmc_rd(priv, |
| 4609 | CVMX_LMCX_WLEVEL_RANKX(rankx, |
| 4610 | if_num)); |
| 4611 | } while (wl_rank.cn78xx.status != 3); |
| 4612 | |
| 4613 | wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, |
| 4614 | if_num)); |
| 4615 | |
| 4616 | for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) { |
| 4617 | wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv, |
| 4618 | if_num, |
| 4619 | byte_idx); |
| 4620 | if (wl_mask[byte_idx] == 0) |
| 4621 | ++wl_mask_err; |
| 4622 | } |
| 4623 | |
| 4624 | // check validity only if no bitmask errors |
| 4625 | if (wl_mask_err == 0) { |
| 4626 | if ((spd_dimm_type == 1 || spd_dimm_type == 2) && |
| 4627 | dram_width != 16 && if_64b && |
| 4628 | !disable_hwl_validity) { |
| 4629 | // bypass if [mini|SO]-[RU]DIMM or x16 or |
| 4630 | // 32-bit |
| 4631 | wl_val_err = |
| 4632 | validate_hw_wl_settings(if_num, |
| 4633 | &wl_rank, |
| 4634 | spd_rdimm, ecc_ena); |
| 4635 | wl_val_err_rank += (wl_val_err != 0); |
| 4636 | } |
| 4637 | } else { |
| 4638 | wl_mask_err_rank++; |
| 4639 | } |
| 4640 | |
| 4641 | // before we print, if we had bitmask or validity errors, |
| 4642 | // do a retry... |
| 4643 | if (wl_mask_err != 0 || wl_val_err != 0) { |
| 4644 | if (wloop_retries < WLOOP_RETRIES_DEFAULT) { |
| 4645 | wloop_retries++; |
| 4646 | wloop_retries_total++; |
| 4647 | // this printout is per-retry: only when VBL |
| 4648 | // is high enough (DEV?) |
| 4649 | // FIXME: do we want to show the bad bitmaps |
| 4650 | // or delays here also? |
| 4651 | debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n", |
| 4652 | node, if_num, rankx, |
| 4653 | (wl_mask_err) ? "Bitmask" : "Validity"); |
| 4654 | // this takes us back to the top without |
| 4655 | // counting a sample |
| 4656 | return; |
| 4657 | } |
| 4658 | |
| 4659 | // retries exhausted, do not print at normal VBL |
| 4660 | debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n", |
| 4661 | node, if_num, rankx, |
| 4662 | (wl_mask_err) ? "Bitmask" : "Validity"); |
| 4663 | wloop_retries_exhausted++; |
| 4664 | } |
| 4665 | // no errors or exhausted retries, use this sample |
| 4666 | wloop_retries = 0; //reset for next sample |
| 4667 | |
| 4668 | // when only 1 sample or forced, print the bitmasks then |
| 4669 | // current HW WL |
| 4670 | if (wl_loops == 1 || wl_print) { |
| 4671 | if (wl_print > 1) |
| 4672 | display_wl_bm(if_num, rankx, wl_mask); |
| 4673 | display_wl(if_num, wl_rank, rankx); |
| 4674 | } |
| 4675 | |
| 4676 | if (wl_roundup) { /* Round up odd bitmask delays */ |
| 4677 | for (byte_idx = 0; byte_idx < (8 + ecc_ena); |
| 4678 | ++byte_idx) { |
| 4679 | if (!(if_bytemask & (1 << byte_idx))) |
| 4680 | return; |
| 4681 | upd_wl_rank(&wl_rank, byte_idx, |
| 4682 | roundup_ddr3_wlevel_bitmask |
| 4683 | (wl_mask[byte_idx])); |
| 4684 | } |
| 4685 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), |
| 4686 | wl_rank.u64); |
| 4687 | display_wl(if_num, wl_rank, rankx); |
| 4688 | } |
| 4689 | |
| 4690 | // OK, we have a decent sample, no bitmask or validity errors |
| 4691 | extra_bumps = 0; |
| 4692 | extra_mask = 0; |
| 4693 | for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) { |
| 4694 | int ix; |
| 4695 | |
| 4696 | if (!(if_bytemask & (1 << byte_idx))) |
| 4697 | return; |
| 4698 | |
| 4699 | // increment count of byte-lane value |
| 4700 | // only 4 values |
| 4701 | ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3; |
| 4702 | wl_bytes[byte_idx].bitcnt[ix]++; |
| 4703 | wl_bytes_extra[byte_idx].bitcnt[ix]++; |
| 4704 | // if perfect... |
| 4705 | if (__builtin_popcount(wl_mask[byte_idx]) == 4) { |
| 4706 | wl_bytes_extra[byte_idx].bitcnt[ix] += |
| 4707 | wl_pbm_pump; |
| 4708 | extra_bumps++; |
| 4709 | extra_mask |= 1 << byte_idx; |
| 4710 | } |
| 4711 | } |
| 4712 | |
| 4713 | if (extra_bumps) { |
| 4714 | if (wl_print > 1) { |
| 4715 | debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n", |
| 4716 | node, if_num, rankx, extra_bumps, |
| 4717 | extra_mask); |
| 4718 | } |
| 4719 | } |
| 4720 | |
| 4721 | // if we get here, we have taken a decent sample |
| 4722 | wloop++; |
| 4723 | |
| 4724 | } /* while (wloop < wl_loops) */ |
| 4725 | |
| 4726 | // if we did sample more than once, try to pick a majority vote |
| 4727 | if (wl_loops > 1) { |
| 4728 | // look for the majority in each byte-lane |
| 4729 | for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) { |
| 4730 | int mx, mc, xc, cc; |
| 4731 | int ix, alts; |
| 4732 | int maj, xmaj, xmx, xmc, xxc, xcc; |
| 4733 | |
| 4734 | if (!(if_bytemask & (1 << byte_idx))) |
| 4735 | return; |
| 4736 | maj = find_wl_majority(&wl_bytes[byte_idx], &mx, |
| 4737 | &mc, &xc, &cc); |
| 4738 | xmaj = find_wl_majority(&wl_bytes_extra[byte_idx], |
| 4739 | &xmx, &xmc, &xxc, &xcc); |
| 4740 | if (maj != xmaj) { |
| 4741 | if (wl_print) { |
| 4742 | debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n", |
| 4743 | node, if_num, rankx, |
| 4744 | byte_idx, maj, xc, xmaj, xxc); |
| 4745 | } |
| 4746 | mx = xmx; |
| 4747 | mc = xmc; |
| 4748 | xc = xxc; |
| 4749 | cc = xcc; |
| 4750 | } |
| 4751 | |
| 4752 | // see if there was an alternate |
| 4753 | // take out the majority choice |
| 4754 | alts = (mc & ~(1 << mx)); |
| 4755 | if (alts != 0) { |
| 4756 | for (ix = 0; ix < 4; ix++) { |
| 4757 | // FIXME: could be done multiple times? |
| 4758 | // bad if so |
| 4759 | if (alts & (1 << ix)) { |
| 4760 | // set the mask |
| 4761 | hwl_alts[rankx].hwl_alt_mask |= |
| 4762 | (1 << byte_idx); |
| 4763 | // record the value |
| 4764 | hwl_alts[rankx].hwl_alt_delay[byte_idx] = |
| 4765 | ix << 1; |
| 4766 | if (wl_print > 1) { |
| 4767 | debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n", |
| 4768 | node, |
| 4769 | if_num, |
| 4770 | rankx, |
| 4771 | byte_idx, |
| 4772 | mx << 1, |
| 4773 | xc, |
| 4774 | ix << 1, |
| 4775 | wl_bytes |
| 4776 | [byte_idx].bitcnt |
| 4777 | [ix]); |
| 4778 | } |
| 4779 | } |
| 4780 | } |
| 4781 | } |
| 4782 | |
| 4783 | if (cc > 2) { // unlikely, but... |
| 4784 | // assume: counts for 3 indices are all 1 |
| 4785 | // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6 |
| 4786 | // and the desired?: 2 , 4 , 6, 0 |
| 4787 | // we choose the middle, assuming one of the |
| 4788 | // outliers is bad |
| 4789 | // NOTE: this is an ugly hack at the moment; |
| 4790 | // there must be a better way |
| 4791 | switch (mc) { |
| 4792 | case 0x7: |
| 4793 | mx = 1; |
| 4794 | break; // was 0/2/4, choose 2 |
| 4795 | case 0xb: |
| 4796 | mx = 0; |
| 4797 | break; // was 0/2/6, choose 0 |
| 4798 | case 0xd: |
| 4799 | mx = 3; |
| 4800 | break; // was 0/4/6, choose 6 |
| 4801 | case 0xe: |
| 4802 | mx = 2; |
| 4803 | break; // was 2/4/6, choose 4 |
| 4804 | default: |
| 4805 | case 0xf: |
| 4806 | mx = 1; |
| 4807 | break; // was 0/2/4/6, choose 2? |
| 4808 | } |
| 4809 | printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n", |
| 4810 | node, if_num, rankx, byte_idx, mc, |
| 4811 | mx << 1); |
| 4812 | } |
| 4813 | upd_wl_rank(&wl_rank, byte_idx, mx << 1); |
| 4814 | } |
| 4815 | |
| 4816 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), |
| 4817 | wl_rank.u64); |
| 4818 | display_wl_with_final(if_num, wl_rank, rankx); |
| 4819 | |
| 4820 | // FIXME: does this help make the output a little easier |
| 4821 | // to focus? |
| 4822 | if (wl_print > 0) |
| 4823 | debug("-----------\n"); |
| 4824 | |
| 4825 | } /* if (wl_loops > 1) */ |
| 4826 | |
| 4827 | // maybe print an error summary for the rank |
| 4828 | if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) { |
| 4829 | debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n", |
| 4830 | node, if_num, rankx, wl_mask_err_rank, |
| 4831 | wl_val_err_rank, wloop_retries_total, |
| 4832 | wloop_retries_exhausted); |
| 4833 | } |
| 4834 | } |
| 4835 | |
| 4836 | static void lmc_write_leveling(struct ddr_priv *priv) |
| 4837 | { |
| 4838 | union cvmx_lmcx_config cfg; |
| 4839 | int rankx; |
| 4840 | char *s; |
| 4841 | |
| 4842 | /* |
| 4843 | * 4.8.9 LMC Write Leveling |
| 4844 | * |
| 4845 | * LMC supports an automatic write leveling like that described in the |
| 4846 | * JEDEC DDR3 specifications separately per byte-lane. |
| 4847 | * |
| 4848 | * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations |
| 4849 | * must be completed prior to starting this LMC write-leveling sequence. |
| 4850 | * |
| 4851 | * There are many possible procedures that will write-level all the |
| 4852 | * attached DDR3 DRAM parts. One possibility is for software to simply |
| 4853 | * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section |
| 4854 | * describes one possible sequence that uses LMC's autowrite-leveling |
| 4855 | * capabilities. |
| 4856 | * |
| 4857 | * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD |
| 4858 | * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this |
| 4859 | * point. |
| 4860 | * |
| 4861 | * Do the remaining steps 2-7 separately for each rank i with attached |
| 4862 | * DRAM. |
| 4863 | * |
| 4864 | * 2. Write LMC(0)_WLEVEL_RANKi = 0. |
| 4865 | * |
| 4866 | * 3. For x8 parts: |
| 4867 | * |
| 4868 | * Without changing any other fields in LMC(0)_WLEVEL_CTL, write |
| 4869 | * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached |
| 4870 | * DRAM. |
| 4871 | * |
| 4872 | * For x16 parts: |
| 4873 | * |
| 4874 | * Without changing any other fields in LMC(0)_WLEVEL_CTL, write |
| 4875 | * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with |
| 4876 | * attached DRAM. |
| 4877 | * |
| 4878 | * 4. Without changing any other fields in LMC(0)_CONFIG, |
| 4879 | * |
| 4880 | * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling |
| 4881 | * |
| 4882 | * o write LMC(0)_CONFIG[RANKMASK] = (1 << i) |
| 4883 | * |
| 4884 | * o write LMC(0)_SEQ_CTL[INIT_START] = 1 |
| 4885 | * |
| 4886 | * LMC will initiate write-leveling at this point. Assuming |
| 4887 | * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on |
| 4888 | * the selected DRAM rank via a DDR3 MR1 write, then sequences |
| 4889 | * through |
| 4890 | * and accumulates write-leveling results for eight different delay |
| 4891 | * settings twice, starting at a delay of zero in this case since |
| 4892 | * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each |
| 4893 | * setting, covering a total distance of one CK, then disables the |
| 4894 | * write-leveling via another DDR3 MR1 write. |
| 4895 | * |
| 4896 | * After the sequence through 16 delay settings is complete: |
| 4897 | * |
| 4898 | * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3 |
| 4899 | * |
| 4900 | * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected |
| 4901 | * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write |
| 4902 | * leveling result of 1 that followed result of 0 during the |
| 4903 | * sequence, except that the LMC always writes |
| 4904 | * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0. |
| 4905 | * |
| 4906 | * o Software can read the eight write-leveling results from the |
| 4907 | * first pass through the delay settings by reading |
| 4908 | * LMC(0)_WLEVEL_DBG[BITMASK] (after writing |
| 4909 | * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling |
| 4910 | * results from the second pass through the eight delay |
| 4911 | * settings. They should often be identical to the |
| 4912 | * LMC(0)_WLEVEL_DBG[BITMASK] results, though.) |
| 4913 | * |
| 4914 | * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2. |
| 4915 | * |
| 4916 | * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte |
| 4917 | * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point. |
| 4918 | * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that |
| 4919 | * software wrote in substep 2 above, which is 0. |
| 4920 | * |
| 4921 | * 6. For x16 parts: |
| 4922 | * |
| 4923 | * Without changing any other fields in LMC(0)_WLEVEL_CTL, write |
| 4924 | * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with |
| 4925 | * attached DRAM. |
| 4926 | * |
| 4927 | * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK] |
| 4928 | * setting. Skip to substep 7 if this has already been done. |
| 4929 | * |
| 4930 | * For x8 parts: |
| 4931 | * |
| 4932 | * Skip this substep. Go to substep 7. |
| 4933 | * |
| 4934 | * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte |
| 4935 | * lanes on all ranks with attached DRAM. |
| 4936 | * |
| 4937 | * At this point, all byte lanes on rank i with attached DRAM should |
| 4938 | * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has |
| 4939 | * the result for each byte lane. |
| 4940 | * |
| 4941 | * But note that the DDR3 write-leveling sequence will only determine |
| 4942 | * the delay modulo the CK cycle time, and cannot determine how many |
| 4943 | * additional CK cycles of delay are present. Software must calculate |
| 4944 | * the number of CK cycles, or equivalently, the |
| 4945 | * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings. |
| 4946 | * |
| 4947 | * This BYTE*<4:3> calculation is system/board specific. |
| 4948 | * |
| 4949 | * Many techniques can be used to calculate write-leveling BYTE*<4:3> |
| 4950 | * values, including: |
| 4951 | * |
| 4952 | * o Known values for some byte lanes. |
| 4953 | * |
| 4954 | * o Relative values for some byte lanes relative to others. |
| 4955 | * |
| 4956 | * For example, suppose lane X is likely to require a larger |
| 4957 | * write-leveling delay than lane Y. A BYTEX<2:0> value that is much |
| 4958 | * smaller than the BYTEY<2:0> value may then indicate that the |
| 4959 | * required lane X delay wrapped into the next CK, so BYTEX<4:3> |
| 4960 | * should be set to BYTEY<4:3>+1. |
| 4961 | * |
| 4962 | * When ECC DRAM is not present (i.e. when DRAM is not attached to |
| 4963 | * the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the |
| 4964 | * DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write |
| 4965 | * LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0], |
| 4966 | * using the final calculated BYTE0 value. |
| 4967 | * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0], |
| 4968 | * using the final calculated BYTE0 value. |
| 4969 | * |
| 4970 | * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks. |
| 4971 | * |
| 4972 | * Let rank i be a rank with attached DRAM. |
| 4973 | * |
| 4974 | * For all ranks j that do not have attached DRAM, set |
| 4975 | * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi. |
| 4976 | */ |
| 4977 | |
| 4978 | rankx = 0; |
| 4979 | wl_roundup = 0; |
| 4980 | disable_hwl_validity = 0; |
| 4981 | |
| 4982 | // wl_pbm_pump: weight for write-leveling PBMs... |
| 4983 | // 0 causes original behavior |
| 4984 | // 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms |
| 4985 | // 4 would allow a minority of 1 pbm to outscore a majority of 4 |
| 4986 | // non-pbms |
| 4987 | wl_pbm_pump = 4; // FIXME: is 4 too much? |
| 4988 | |
| 4989 | if (wl_loops) { |
| 4990 | debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node, |
| 4991 | if_num); |
| 4992 | } else { |
| 4993 | /* Force software write-leveling to run */ |
| 4994 | wl_mask_err = 1; |
| 4995 | debug("N%d.LMC%d: Forcing software Write-Leveling\n", node, |
| 4996 | if_num); |
| 4997 | } |
| 4998 | |
| 4999 | default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ? |
| 5000 | rttnom_20ohm : ddr4_rttnom_40ohm; |
| 5001 | |
| 5002 | cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 5003 | ecc_ena = cfg.s.ecc_ena; |
| 5004 | save_mode32b = cfg.cn78xx.mode32b; |
| 5005 | cfg.cn78xx.mode32b = (!if_64b); |
| 5006 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); |
| 5007 | debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b); |
| 5008 | |
| 5009 | s = lookup_env(priv, "ddr_wlevel_roundup"); |
| 5010 | if (s) |
| 5011 | wl_roundup = simple_strtoul(s, NULL, 0); |
| 5012 | |
| 5013 | s = lookup_env(priv, "ddr_wlevel_printall"); |
| 5014 | if (s) |
| 5015 | wl_print = strtoul(s, NULL, 0); |
| 5016 | |
| 5017 | s = lookup_env(priv, "ddr_wlevel_pbm_bump"); |
| 5018 | if (s) |
| 5019 | wl_pbm_pump = strtoul(s, NULL, 0); |
| 5020 | |
| 5021 | // default to disable when RL sequential delay check is disabled |
| 5022 | disable_hwl_validity = disable_sequential_delay_check; |
| 5023 | s = lookup_env(priv, "ddr_disable_hwl_validity"); |
| 5024 | if (s) |
| 5025 | disable_hwl_validity = !!strtoul(s, NULL, 0); |
| 5026 | |
| 5027 | s = lookup_env(priv, "ddr_wl_rtt_nom"); |
| 5028 | if (s) |
| 5029 | default_wl_rtt_nom = simple_strtoul(s, NULL, 0); |
| 5030 | |
| 5031 | s = lookup_env(priv, "ddr_match_wl_rtt_nom"); |
| 5032 | if (s) |
| 5033 | match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0); |
| 5034 | |
| 5035 | if (match_wl_rtt_nom) |
| 5036 | mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num)); |
| 5037 | |
| 5038 | // For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK |
| 5039 | // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here |
| 5040 | if (ddr_type == DDR4_DRAM) { |
| 5041 | int default_or_dis = 1; |
| 5042 | int default_bitmask = 0xff; |
| 5043 | |
| 5044 | // when x4, use only the lower nibble |
| 5045 | if (dram_width == 4) { |
| 5046 | default_bitmask = 0x0f; |
| 5047 | if (wl_print) { |
| 5048 | debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n", |
| 5049 | node, if_num, default_bitmask); |
| 5050 | } |
| 5051 | } |
| 5052 | |
| 5053 | wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num)); |
| 5054 | wl_ctl.s.or_dis = default_or_dis; |
| 5055 | wl_ctl.s.bitmask = default_bitmask; |
| 5056 | |
| 5057 | // allow overrides |
| 5058 | s = lookup_env(priv, "ddr_wlevel_ctl_or_dis"); |
| 5059 | if (s) |
| 5060 | wl_ctl.s.or_dis = !!strtoul(s, NULL, 0); |
| 5061 | |
| 5062 | s = lookup_env(priv, "ddr_wlevel_ctl_bitmask"); |
| 5063 | if (s) |
| 5064 | wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0); |
| 5065 | |
| 5066 | // print only if not defaults |
| 5067 | if (wl_ctl.s.or_dis != default_or_dis || |
| 5068 | wl_ctl.s.bitmask != default_bitmask) { |
| 5069 | debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n", |
| 5070 | node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask); |
| 5071 | } |
| 5072 | |
| 5073 | // always write |
| 5074 | lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64); |
| 5075 | } |
| 5076 | |
| 5077 | // Start the hardware write-leveling loop per rank |
| 5078 | for (rankx = 0; rankx < dimm_count * 4; rankx++) |
| 5079 | lmc_write_leveling_loop(priv, rankx); |
| 5080 | |
| 5081 | cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 5082 | cfg.cn78xx.mode32b = save_mode32b; |
| 5083 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); |
| 5084 | debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b); |
| 5085 | |
| 5086 | // At the end of HW Write Leveling, check on some DESKEW things... |
| 5087 | if (!disable_deskew_training) { |
| 5088 | struct deskew_counts dsk_counts; |
| 5089 | int retry_count = 0; |
| 5090 | |
| 5091 | debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n", |
| 5092 | node, if_num); |
| 5093 | |
| 5094 | do { |
| 5095 | validate_deskew_training(priv, rank_mask, if_num, |
| 5096 | &dsk_counts, 1); |
| 5097 | |
| 5098 | // only RAWCARD A or B will not benefit from |
| 5099 | // retraining if there's only saturation |
| 5100 | // or any rawcard if there is a nibble error |
| 5101 | if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) || |
| 5102 | (dsk_counts.nibrng_errs != 0 || |
| 5103 | dsk_counts.nibunl_errs != 0)) { |
| 5104 | retry_count++; |
| 5105 | debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n", |
| 5106 | node, if_num, retry_count); |
| 5107 | perform_deskew_training(priv, rank_mask, if_num, |
| 5108 | spd_rawcard_aorb); |
| 5109 | } else { |
| 5110 | break; |
| 5111 | } |
| 5112 | } while (retry_count < 5); |
| 5113 | } |
| 5114 | } |
| 5115 | |
| 5116 | static void lmc_workaround(struct ddr_priv *priv) |
| 5117 | { |
| 5118 | /* Workaround Trcd overflow by using Additive latency. */ |
| 5119 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { |
| 5120 | union cvmx_lmcx_modereg_params0 mp0; |
| 5121 | union cvmx_lmcx_timing_params1 tp1; |
| 5122 | union cvmx_lmcx_control ctrl; |
| 5123 | int rankx; |
| 5124 | |
| 5125 | tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num)); |
| 5126 | mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); |
| 5127 | ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 5128 | |
| 5129 | if (tp1.cn78xx.trcd == 0) { |
| 5130 | debug("Workaround Trcd overflow by using Additive latency.\n"); |
| 5131 | /* Hard code this to 12 and enable additive latency */ |
| 5132 | tp1.cn78xx.trcd = 12; |
| 5133 | mp0.s.al = 2; /* CL-2 */ |
| 5134 | ctrl.s.pocas = 1; |
| 5135 | |
| 5136 | debug("MODEREG_PARAMS0 : 0x%016llx\n", |
| 5137 | mp0.u64); |
| 5138 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), |
| 5139 | mp0.u64); |
| 5140 | debug("TIMING_PARAMS1 : 0x%016llx\n", |
| 5141 | tp1.u64); |
| 5142 | lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64); |
| 5143 | |
| 5144 | debug("LMC_CONTROL : 0x%016llx\n", |
| 5145 | ctrl.u64); |
| 5146 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); |
| 5147 | |
| 5148 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 5149 | if (!(rank_mask & (1 << rankx))) |
| 5150 | continue; |
| 5151 | |
| 5152 | /* MR1 */ |
| 5153 | ddr4_mrw(priv, if_num, rankx, -1, 1, 0); |
| 5154 | } |
| 5155 | } |
| 5156 | } |
| 5157 | |
| 5158 | // this is here just for output, to allow check of the Deskew |
| 5159 | // settings one last time... |
| 5160 | if (!disable_deskew_training) { |
| 5161 | struct deskew_counts dsk_counts; |
| 5162 | |
| 5163 | debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n", |
| 5164 | node, if_num); |
| 5165 | validate_deskew_training(priv, rank_mask, if_num, &dsk_counts, |
| 5166 | 3); |
| 5167 | } |
| 5168 | |
| 5169 | /* |
| 5170 | * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x) |
| 5171 | * |
| 5172 | * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND |
| 5173 | * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set |
| 5174 | * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1. |
| 5175 | */ |
| 5176 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) || |
| 5177 | octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) { |
| 5178 | union cvmx_lmcx_dll_ctl3 dll_ctl3; |
| 5179 | union cvmx_lmcx_phy_ctl2 phy_ctl2; |
| 5180 | union cvmx_lmcx_ext_config ext_cfg; |
| 5181 | int increased_dsk_adj = 0; |
| 5182 | int byte; |
| 5183 | |
| 5184 | phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num)); |
| 5185 | ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); |
| 5186 | dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); |
| 5187 | |
| 5188 | for (byte = 0; byte < 8; ++byte) { |
| 5189 | if (!(if_bytemask & (1 << byte))) |
| 5190 | continue; |
| 5191 | increased_dsk_adj |= |
| 5192 | (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4); |
| 5193 | } |
| 5194 | |
| 5195 | if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) { |
| 5196 | ext_cfg.s.drive_ena_bprch = 1; |
| 5197 | lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64); |
| 5198 | debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n", |
| 5199 | if_num); |
| 5200 | } |
| 5201 | } |
| 5202 | } |
| 5203 | |
| 5204 | // Software Write-Leveling block |
| 5205 | |
| 5206 | #define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32 |
| 5207 | #define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17 |
| 5208 | // full window is valid for 0x00 to 0x4A |
| 5209 | // let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1 |
| 5210 | #define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT) |
| 5211 | #define VREF_FINAL (VREF_LIMIT - 1) |
| 5212 | |
| 5213 | enum sw_wl_status { |
| 5214 | WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */ |
| 5215 | WL_HARDWARE = 1, /* H/W wleveling succeeded */ |
| 5216 | WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */ |
| 5217 | WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */ |
| 5218 | }; |
| 5219 | |
| 5220 | static u64 rank_addr __section(".data"); |
| 5221 | static int vref_val __section(".data"); |
| 5222 | static int final_vref_val __section(".data"); |
| 5223 | static int final_vref_range __section(".data"); |
| 5224 | static int start_vref_val __section(".data"); |
| 5225 | static int computed_final_vref_val __section(".data"); |
| 5226 | static char best_vref_val_count __section(".data"); |
| 5227 | static char vref_val_count __section(".data"); |
| 5228 | static char best_vref_val_start __section(".data"); |
| 5229 | static char vref_val_start __section(".data"); |
| 5230 | static int bytes_failed __section(".data"); |
| 5231 | static enum sw_wl_status byte_test_status[9] __section(".data"); |
| 5232 | static enum sw_wl_status sw_wl_rank_status __section(".data"); |
| 5233 | static int sw_wl_failed __section(".data"); |
| 5234 | static int sw_wl_hw __section(".data"); |
| 5235 | static int measured_vref_flag __section(".data"); |
| 5236 | |
| 5237 | static void ddr4_vref_loop(struct ddr_priv *priv, int rankx) |
| 5238 | { |
| 5239 | char *s; |
| 5240 | |
| 5241 | if (vref_val < VREF_FINAL) { |
| 5242 | int vrange, vvalue; |
| 5243 | |
| 5244 | if (vref_val < VREF_RANGE2_LIMIT) { |
| 5245 | vrange = 1; |
| 5246 | vvalue = vref_val; |
| 5247 | } else { |
| 5248 | vrange = 0; |
| 5249 | vvalue = vref_val - VREF_RANGE2_LIMIT; |
| 5250 | } |
| 5251 | |
| 5252 | set_vref(priv, if_num, rankx, vrange, vvalue); |
| 5253 | } else { /* if (vref_val < VREF_FINAL) */ |
| 5254 | /* Print the final vref value first. */ |
| 5255 | |
| 5256 | /* Always print the computed first if its valid */ |
| 5257 | if (computed_final_vref_val >= 0) { |
| 5258 | debug("N%d.LMC%d.R%d: vref Computed Summary : %2d (0x%02x)\n", |
| 5259 | node, if_num, rankx, |
| 5260 | computed_final_vref_val, computed_final_vref_val); |
| 5261 | } |
| 5262 | |
| 5263 | if (!measured_vref_flag) { // setup to use the computed |
| 5264 | best_vref_val_count = 1; |
| 5265 | final_vref_val = computed_final_vref_val; |
| 5266 | } else { // setup to use the measured |
| 5267 | if (best_vref_val_count > 0) { |
| 5268 | best_vref_val_count = |
| 5269 | max(best_vref_val_count, (char)2); |
| 5270 | final_vref_val = best_vref_val_start + |
| 5271 | divide_nint(best_vref_val_count - 1, 2); |
| 5272 | |
| 5273 | if (final_vref_val < VREF_RANGE2_LIMIT) { |
| 5274 | final_vref_range = 1; |
| 5275 | } else { |
| 5276 | final_vref_range = 0; |
| 5277 | final_vref_val -= VREF_RANGE2_LIMIT; |
| 5278 | } |
| 5279 | |
| 5280 | int vvlo = best_vref_val_start; |
| 5281 | int vrlo; |
| 5282 | int vvhi = best_vref_val_start + |
| 5283 | best_vref_val_count - 1; |
| 5284 | int vrhi; |
| 5285 | |
| 5286 | if (vvlo < VREF_RANGE2_LIMIT) { |
| 5287 | vrlo = 2; |
| 5288 | } else { |
| 5289 | vrlo = 1; |
| 5290 | vvlo -= VREF_RANGE2_LIMIT; |
| 5291 | } |
| 5292 | |
| 5293 | if (vvhi < VREF_RANGE2_LIMIT) { |
| 5294 | vrhi = 2; |
| 5295 | } else { |
| 5296 | vrhi = 1; |
| 5297 | vvhi -= VREF_RANGE2_LIMIT; |
| 5298 | } |
| 5299 | debug("N%d.LMC%d.R%d: vref Training Summary : 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n", |
| 5300 | node, if_num, rankx, vvlo, vrlo, |
| 5301 | final_vref_val, |
| 5302 | final_vref_range + 1, vvhi, vrhi, |
| 5303 | best_vref_val_count - 1); |
| 5304 | |
| 5305 | } else { |
| 5306 | /* |
| 5307 | * If nothing passed use the default vref |
| 5308 | * value for this rank |
| 5309 | */ |
| 5310 | union cvmx_lmcx_modereg_params2 mp2; |
| 5311 | |
| 5312 | mp2.u64 = |
| 5313 | lmc_rd(priv, |
| 5314 | CVMX_LMCX_MODEREG_PARAMS2(if_num)); |
| 5315 | final_vref_val = (mp2.u64 >> |
| 5316 | (rankx * 10 + 3)) & 0x3f; |
| 5317 | final_vref_range = (mp2.u64 >> |
| 5318 | (rankx * 10 + 9)) & 0x01; |
| 5319 | |
| 5320 | debug("N%d.LMC%d.R%d: vref Using Default : %2d <----- %2d (0x%02x) -----> %2d, range%1d\n", |
| 5321 | node, if_num, rankx, final_vref_val, |
| 5322 | final_vref_val, final_vref_val, |
| 5323 | final_vref_val, final_vref_range + 1); |
| 5324 | } |
| 5325 | } |
| 5326 | |
| 5327 | // allow override |
| 5328 | s = lookup_env(priv, "ddr%d_vref_val_%1d%1d", |
| 5329 | if_num, !!(rankx & 2), !!(rankx & 1)); |
| 5330 | if (s) |
| 5331 | final_vref_val = strtoul(s, NULL, 0); |
| 5332 | |
| 5333 | set_vref(priv, if_num, rankx, final_vref_range, final_vref_val); |
| 5334 | } |
| 5335 | } |
| 5336 | |
| 5337 | #define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors |
| 5338 | |
| 5339 | static int errors __section(".data"); |
| 5340 | static int byte_delay[9] __section(".data"); |
| 5341 | static u64 bytemask __section(".data"); |
| 5342 | static int bytes_todo __section(".data"); |
| 5343 | static int no_errors_count __section(".data"); |
| 5344 | static u64 bad_bits[2] __section(".data"); |
| 5345 | static u64 sum_dram_dclk __section(".data"); |
| 5346 | static u64 sum_dram_ops __section(".data"); |
| 5347 | static u64 start_dram_dclk __section(".data"); |
| 5348 | static u64 stop_dram_dclk __section(".data"); |
| 5349 | static u64 start_dram_ops __section(".data"); |
| 5350 | static u64 stop_dram_ops __section(".data"); |
| 5351 | |
| 5352 | static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx) |
| 5353 | { |
| 5354 | int delay; |
| 5355 | int b; |
| 5356 | |
| 5357 | // write the current set of WL delays |
| 5358 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64); |
| 5359 | wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)); |
| 5360 | |
| 5361 | // do the test |
| 5362 | if (sw_wl_hw) { |
| 5363 | errors = run_best_hw_patterns(priv, if_num, rank_addr, |
| 5364 | DBTRAIN_TEST, bad_bits); |
| 5365 | errors &= bytes_todo; // keep only the ones we are still doing |
| 5366 | } else { |
| 5367 | start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)); |
| 5368 | start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num)); |
| 5369 | errors = test_dram_byte64(priv, if_num, rank_addr, bytemask, |
| 5370 | bad_bits); |
| 5371 | |
| 5372 | stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num)); |
| 5373 | stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num)); |
| 5374 | sum_dram_dclk += stop_dram_dclk - start_dram_dclk; |
| 5375 | sum_dram_ops += stop_dram_ops - start_dram_ops; |
| 5376 | } |
| 5377 | |
| 5378 | debug("WL pass1: test_dram_byte returned 0x%x\n", errors); |
| 5379 | |
| 5380 | // remember, errors will not be returned for byte-lanes that have |
| 5381 | // maxxed out... |
| 5382 | if (errors == 0) { |
| 5383 | no_errors_count++; // bump |
| 5384 | // bypass check/update completely |
| 5385 | if (no_errors_count > 1) |
| 5386 | return; // to end of do-while |
| 5387 | } else { |
| 5388 | no_errors_count = 0; // reset |
| 5389 | } |
| 5390 | |
| 5391 | // check errors by byte |
| 5392 | for (b = 0; b < 9; ++b) { |
| 5393 | if (!(bytes_todo & (1 << b))) |
| 5394 | continue; |
| 5395 | |
| 5396 | delay = byte_delay[b]; |
| 5397 | // yes, an error in this byte lane |
| 5398 | if (errors & (1 << b)) { |
| 5399 | debug(" byte %d delay %2d Errors\n", b, delay); |
| 5400 | // since this byte had an error, we move to the next |
| 5401 | // delay value, unless done with it |
| 5402 | delay += 8; // incr by 8 to do delay high-order bits |
| 5403 | if (delay < 32) { |
| 5404 | upd_wl_rank(&wl_rank, b, delay); |
| 5405 | debug(" byte %d delay %2d New\n", |
| 5406 | b, delay); |
| 5407 | byte_delay[b] = delay; |
| 5408 | } else { |
| 5409 | // reached max delay, maybe really done with |
| 5410 | // this byte |
| 5411 | // consider an alt only for computed VREF and |
| 5412 | if (!measured_vref_flag && |
| 5413 | (hwl_alts[rankx].hwl_alt_mask & (1 << b))) { |
| 5414 | // if an alt exists... |
| 5415 | // just orig low-3 bits |
| 5416 | int bad_delay = delay & 0x6; |
| 5417 | |
| 5418 | // yes, use it |
| 5419 | delay = hwl_alts[rankx].hwl_alt_delay[b]; |
| 5420 | // clear that flag |
| 5421 | hwl_alts[rankx].hwl_alt_mask &= |
| 5422 | ~(1 << b); |
| 5423 | upd_wl_rank(&wl_rank, b, delay); |
| 5424 | byte_delay[b] = delay; |
| 5425 | debug(" byte %d delay %2d ALTERNATE\n", |
| 5426 | b, delay); |
| 5427 | debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n", |
| 5428 | node, if_num, |
| 5429 | rankx, b, bad_delay, delay); |
| 5430 | |
| 5431 | } else { |
| 5432 | unsigned int bits_bad; |
| 5433 | |
| 5434 | if (b < 8) { |
| 5435 | // test no longer, remove from |
| 5436 | // byte mask |
| 5437 | bytemask &= |
| 5438 | ~(0xffULL << (8 * b)); |
| 5439 | bits_bad = (unsigned int) |
| 5440 | ((bad_bits[0] >> |
| 5441 | (8 * b)) & 0xffUL); |
| 5442 | } else { |
| 5443 | bits_bad = (unsigned int) |
| 5444 | (bad_bits[1] & 0xffUL); |
| 5445 | } |
| 5446 | |
| 5447 | // remove from bytes to do |
| 5448 | bytes_todo &= ~(1 << b); |
| 5449 | // make sure this is set for this case |
| 5450 | byte_test_status[b] = WL_ESTIMATED; |
| 5451 | debug(" byte %d delay %2d Exhausted\n", |
| 5452 | b, delay); |
| 5453 | if (!measured_vref_flag) { |
| 5454 | // this is too noisy when doing |
| 5455 | // measured VREF |
| 5456 | debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n", |
| 5457 | node, if_num, rankx, |
| 5458 | b, bits_bad, delay); |
| 5459 | } |
| 5460 | } |
| 5461 | } |
| 5462 | } else { |
| 5463 | // no error, stay with current delay, but keep testing |
| 5464 | // it... |
| 5465 | debug(" byte %d delay %2d Passed\n", b, delay); |
| 5466 | byte_test_status[b] = WL_HARDWARE; // change status |
| 5467 | } |
| 5468 | } /* for (b = 0; b < 9; ++b) */ |
| 5469 | } |
| 5470 | |
| 5471 | static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx) |
| 5472 | { |
| 5473 | int save_byte8 = wl_rank.s.byte8; |
| 5474 | |
| 5475 | byte_test_status[8] = WL_HARDWARE; /* H/W delay value */ |
| 5476 | |
| 5477 | if (save_byte8 != wl_rank.s.byte3 && |
| 5478 | save_byte8 != wl_rank.s.byte4) { |
| 5479 | int test_byte8 = save_byte8; |
| 5480 | int test_byte8_error; |
| 5481 | int byte8_error = 0x1f; |
| 5482 | int adder; |
| 5483 | int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4, |
| 5484 | 2); |
| 5485 | |
| 5486 | for (adder = 0; adder <= 32; adder += 8) { |
| 5487 | test_byte8_error = abs((adder + save_byte8) - |
| 5488 | avg_bytes); |
| 5489 | if (test_byte8_error < byte8_error) { |
| 5490 | byte8_error = test_byte8_error; |
| 5491 | test_byte8 = save_byte8 + adder; |
| 5492 | } |
| 5493 | } |
| 5494 | |
| 5495 | // only do the check if we are not using measured VREF |
| 5496 | if (!measured_vref_flag) { |
| 5497 | /* Use only even settings, rounding down... */ |
| 5498 | test_byte8 &= ~1; |
| 5499 | |
| 5500 | // do validity check on the calculated ECC delay value |
| 5501 | // this depends on the DIMM type |
| 5502 | if (spd_rdimm) { // RDIMM |
| 5503 | // but not mini-RDIMM |
| 5504 | if (spd_dimm_type != 5) { |
| 5505 | // it can be > byte4, but should never |
| 5506 | // be > byte3 |
| 5507 | if (test_byte8 > wl_rank.s.byte3) { |
| 5508 | /* say it is still estimated */ |
| 5509 | byte_test_status[8] = |
| 5510 | WL_ESTIMATED; |
| 5511 | } |
| 5512 | } |
| 5513 | } else { // UDIMM |
| 5514 | if (test_byte8 < wl_rank.s.byte3 || |
| 5515 | test_byte8 > wl_rank.s.byte4) { |
| 5516 | // should never be outside the |
| 5517 | // byte 3-4 range |
| 5518 | /* say it is still estimated */ |
| 5519 | byte_test_status[8] = WL_ESTIMATED; |
| 5520 | } |
| 5521 | } |
| 5522 | /* |
| 5523 | * Report whenever the calculation appears bad. |
| 5524 | * This happens if some of the original values were off, |
| 5525 | * or unexpected geometry from DIMM type, or custom |
| 5526 | * circuitry (NIC225E, I am looking at you!). |
| 5527 | * We will trust the calculated value, and depend on |
| 5528 | * later testing to catch any instances when that |
| 5529 | * value is truly bad. |
| 5530 | */ |
| 5531 | // ESTIMATED means there may be an issue |
| 5532 | if (byte_test_status[8] == WL_ESTIMATED) { |
| 5533 | debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n", |
| 5534 | node, if_num, rankx, |
| 5535 | (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4, |
| 5536 | test_byte8, wl_rank.s.byte3); |
| 5537 | byte_test_status[8] = WL_HARDWARE; |
| 5538 | } |
| 5539 | } |
| 5540 | /* Use only even settings */ |
| 5541 | wl_rank.s.byte8 = test_byte8 & ~1; |
| 5542 | } |
| 5543 | |
| 5544 | if (wl_rank.s.byte8 != save_byte8) { |
| 5545 | /* Change the status if s/w adjusted the delay */ |
| 5546 | byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */ |
| 5547 | } |
| 5548 | } |
| 5549 | |
| 5550 | static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv, |
| 5551 | int rankx) |
| 5552 | { |
| 5553 | int errors; |
| 5554 | int byte_delay[8]; |
| 5555 | int byte_passed[8]; |
| 5556 | u64 bytemask; |
| 5557 | u64 bitmask; |
| 5558 | int wl_offset; |
| 5559 | int bytes_todo; |
| 5560 | int sw_wl_offset = 1; |
| 5561 | int delay; |
| 5562 | int b; |
| 5563 | |
| 5564 | for (b = 0; b < 8; ++b) |
| 5565 | byte_passed[b] = 0; |
| 5566 | |
| 5567 | bytes_todo = if_bytemask; |
| 5568 | |
| 5569 | for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) { |
| 5570 | debug("Starting wl_offset for-loop: %d\n", wl_offset); |
| 5571 | |
| 5572 | bytemask = 0; |
| 5573 | |
| 5574 | for (b = 0; b < 8; ++b) { |
| 5575 | byte_delay[b] = 0; |
| 5576 | // this does not contain fully passed bytes |
| 5577 | if (!(bytes_todo & (1 << b))) |
| 5578 | continue; |
| 5579 | |
| 5580 | // reset across passes if not fully passed |
| 5581 | byte_passed[b] = 0; |
| 5582 | upd_wl_rank(&wl_rank, b, 0); // all delays start at 0 |
| 5583 | bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff; |
| 5584 | // set the bytes bits in the bytemask |
| 5585 | bytemask |= bitmask << (8 * b); |
| 5586 | } /* for (b = 0; b < 8; ++b) */ |
| 5587 | |
| 5588 | // start a pass if there is any byte lane to test |
| 5589 | while (bytemask != 0) { |
| 5590 | debug("Starting bytemask while-loop: 0x%llx\n", |
| 5591 | bytemask); |
| 5592 | |
| 5593 | // write this set of WL delays |
| 5594 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), |
| 5595 | wl_rank.u64); |
| 5596 | wl_rank.u64 = lmc_rd(priv, |
| 5597 | CVMX_LMCX_WLEVEL_RANKX(rankx, |
| 5598 | if_num)); |
| 5599 | |
| 5600 | // do the test |
| 5601 | if (sw_wl_hw) { |
| 5602 | errors = run_best_hw_patterns(priv, if_num, |
| 5603 | rank_addr, |
| 5604 | DBTRAIN_TEST, |
| 5605 | NULL) & 0xff; |
| 5606 | } else { |
| 5607 | errors = test_dram_byte64(priv, if_num, |
| 5608 | rank_addr, bytemask, |
| 5609 | NULL); |
| 5610 | } |
| 5611 | |
| 5612 | debug("test_dram_byte returned 0x%x\n", errors); |
| 5613 | |
| 5614 | // check errors by byte |
| 5615 | for (b = 0; b < 8; ++b) { |
| 5616 | if (!(bytes_todo & (1 << b))) |
| 5617 | continue; |
| 5618 | |
| 5619 | delay = byte_delay[b]; |
| 5620 | if (errors & (1 << b)) { // yes, an error |
| 5621 | debug(" byte %d delay %2d Errors\n", |
| 5622 | b, delay); |
| 5623 | byte_passed[b] = 0; |
| 5624 | } else { // no error |
| 5625 | byte_passed[b] += 1; |
| 5626 | // Look for consecutive working settings |
| 5627 | if (byte_passed[b] == (1 + wl_offset)) { |
| 5628 | debug(" byte %d delay %2d FULLY Passed\n", |
| 5629 | b, delay); |
| 5630 | if (wl_offset == 1) { |
| 5631 | byte_test_status[b] = |
| 5632 | WL_SOFTWARE; |
| 5633 | } else if (wl_offset == 0) { |
| 5634 | byte_test_status[b] = |
| 5635 | WL_SOFTWARE1; |
| 5636 | } |
| 5637 | |
| 5638 | // test no longer, remove |
| 5639 | // from byte mask this pass |
| 5640 | bytemask &= ~(0xffULL << |
| 5641 | (8 * b)); |
| 5642 | // remove completely from |
| 5643 | // concern |
| 5644 | bytes_todo &= ~(1 << b); |
| 5645 | // on to the next byte, bypass |
| 5646 | // delay updating!! |
| 5647 | continue; |
| 5648 | } else { |
| 5649 | debug(" byte %d delay %2d Passed\n", |
| 5650 | b, delay); |
| 5651 | } |
| 5652 | } |
| 5653 | |
| 5654 | // error or no, here we move to the next delay |
| 5655 | // value for this byte, unless done all delays |
| 5656 | // only a byte that has "fully passed" will |
| 5657 | // bypass around this, |
| 5658 | delay += 2; |
| 5659 | if (delay < 32) { |
| 5660 | upd_wl_rank(&wl_rank, b, delay); |
| 5661 | debug(" byte %d delay %2d New\n", |
| 5662 | b, delay); |
| 5663 | byte_delay[b] = delay; |
| 5664 | } else { |
| 5665 | // reached max delay, done with this |
| 5666 | // byte |
| 5667 | debug(" byte %d delay %2d Exhausted\n", |
| 5668 | b, delay); |
| 5669 | // test no longer, remove from byte |
| 5670 | // mask this pass |
| 5671 | bytemask &= ~(0xffULL << (8 * b)); |
| 5672 | } |
| 5673 | } /* for (b = 0; b < 8; ++b) */ |
| 5674 | debug("End of for-loop: bytemask 0x%llx\n", bytemask); |
| 5675 | } /* while (bytemask != 0) */ |
| 5676 | } |
| 5677 | |
| 5678 | for (b = 0; b < 8; ++b) { |
| 5679 | // any bytes left in bytes_todo did not pass |
| 5680 | if (bytes_todo & (1 << b)) { |
| 5681 | union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank; |
| 5682 | |
| 5683 | /* |
| 5684 | * Last resort. Use Rlevel settings to estimate |
| 5685 | * Wlevel if software write-leveling fails |
| 5686 | */ |
| 5687 | debug("Using RLEVEL as WLEVEL estimate for byte %d\n", |
| 5688 | b); |
| 5689 | lmc_rlevel_rank.u64 = |
| 5690 | lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 5691 | if_num)); |
| 5692 | rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b); |
| 5693 | } |
| 5694 | } /* for (b = 0; b < 8; ++b) */ |
| 5695 | } |
| 5696 | |
| 5697 | static int lmc_sw_write_leveling(struct ddr_priv *priv) |
| 5698 | { |
| 5699 | /* Try to determine/optimize write-level delays experimentally. */ |
| 5700 | union cvmx_lmcx_wlevel_rankx wl_rank_hw_res; |
| 5701 | union cvmx_lmcx_config cfg; |
| 5702 | int rankx; |
| 5703 | int byte; |
| 5704 | char *s; |
| 5705 | int i; |
| 5706 | |
| 5707 | int active_rank; |
| 5708 | int sw_wl_enable = 1; /* FIX... Should be customizable. */ |
| 5709 | int interfaces; |
| 5710 | |
| 5711 | static const char * const wl_status_strings[] = { |
| 5712 | "(e)", |
| 5713 | " ", |
| 5714 | " ", |
| 5715 | "(1)" |
| 5716 | }; |
| 5717 | |
| 5718 | // FIXME: make HW-assist the default now? |
| 5719 | int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT; |
| 5720 | int dram_connection = c_cfg->dram_connection; |
| 5721 | |
| 5722 | s = lookup_env(priv, "ddr_sw_wlevel_hw"); |
| 5723 | if (s) |
| 5724 | sw_wl_hw_default = !!strtoul(s, NULL, 0); |
| 5725 | if (!if_64b) // must use SW algo if 32-bit mode |
| 5726 | sw_wl_hw_default = 0; |
| 5727 | |
| 5728 | // can never use hw-assist |
| 5729 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) |
| 5730 | sw_wl_hw_default = 0; |
| 5731 | |
| 5732 | s = lookup_env(priv, "ddr_software_wlevel"); |
| 5733 | if (s) |
| 5734 | sw_wl_enable = strtoul(s, NULL, 0); |
| 5735 | |
| 5736 | s = lookup_env(priv, "ddr%d_dram_connection", if_num); |
| 5737 | if (s) |
| 5738 | dram_connection = !!strtoul(s, NULL, 0); |
| 5739 | |
| 5740 | cvmx_rng_enable(); |
| 5741 | |
| 5742 | /* |
| 5743 | * Get the measured_vref setting from the config, check for an |
| 5744 | * override... |
| 5745 | */ |
| 5746 | /* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */ |
| 5747 | // NOTE: measured VREF can only be done for DDR4 |
| 5748 | if (ddr_type == DDR4_DRAM) { |
| 5749 | measured_vref_flag = c_cfg->measured_vref; |
| 5750 | s = lookup_env(priv, "ddr_measured_vref"); |
| 5751 | if (s) |
| 5752 | measured_vref_flag = !!strtoul(s, NULL, 0); |
| 5753 | } else { |
| 5754 | measured_vref_flag = 0; // OFF for DDR3 |
| 5755 | } |
| 5756 | |
| 5757 | /* |
| 5758 | * Ensure disabled ECC for DRAM tests using the SW algo, else leave |
| 5759 | * it untouched |
| 5760 | */ |
| 5761 | if (!sw_wl_hw_default) { |
| 5762 | cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 5763 | cfg.cn78xx.ecc_ena = 0; |
| 5764 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); |
| 5765 | } |
| 5766 | |
| 5767 | /* |
| 5768 | * We need to track absolute rank number, as well as how many |
| 5769 | * active ranks we have. Two single rank DIMMs show up as |
| 5770 | * ranks 0 and 2, but only 2 ranks are active. |
| 5771 | */ |
| 5772 | active_rank = 0; |
| 5773 | |
| 5774 | interfaces = __builtin_popcount(if_mask); |
| 5775 | |
| 5776 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 5777 | final_vref_range = 0; |
| 5778 | start_vref_val = 0; |
| 5779 | computed_final_vref_val = -1; |
| 5780 | sw_wl_rank_status = WL_HARDWARE; |
| 5781 | sw_wl_failed = 0; |
| 5782 | sw_wl_hw = sw_wl_hw_default; |
| 5783 | |
| 5784 | if (!sw_wl_enable) |
| 5785 | break; |
| 5786 | |
| 5787 | if (!(rank_mask & (1 << rankx))) |
| 5788 | continue; |
| 5789 | |
| 5790 | debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n", |
| 5791 | node, if_num, rankx, |
| 5792 | (sw_wl_hw) ? "with H/W assist" : |
| 5793 | "with S/W algorithm"); |
| 5794 | |
| 5795 | if (ddr_type == DDR4_DRAM && num_ranks != 4) { |
| 5796 | // always compute when we can... |
| 5797 | computed_final_vref_val = |
| 5798 | compute_vref_val(priv, if_num, rankx, dimm_count, |
| 5799 | num_ranks, imp_val, |
| 5800 | is_stacked_die, dram_connection); |
| 5801 | |
| 5802 | // but only use it if allowed |
| 5803 | if (!measured_vref_flag) { |
| 5804 | // skip all the measured vref processing, |
| 5805 | // just the final setting |
| 5806 | start_vref_val = VREF_FINAL; |
| 5807 | } |
| 5808 | } |
| 5809 | |
| 5810 | /* Save off the h/w wl results */ |
| 5811 | wl_rank_hw_res.u64 = lmc_rd(priv, |
| 5812 | CVMX_LMCX_WLEVEL_RANKX(rankx, |
| 5813 | if_num)); |
| 5814 | |
| 5815 | vref_val_count = 0; |
| 5816 | vref_val_start = 0; |
| 5817 | best_vref_val_count = 0; |
| 5818 | best_vref_val_start = 0; |
| 5819 | |
| 5820 | /* Loop one extra time using the Final vref value. */ |
| 5821 | for (vref_val = start_vref_val; vref_val < VREF_LIMIT; |
| 5822 | ++vref_val) { |
| 5823 | if (ddr_type == DDR4_DRAM) |
| 5824 | ddr4_vref_loop(priv, rankx); |
| 5825 | |
| 5826 | /* Restore the saved value */ |
| 5827 | wl_rank.u64 = wl_rank_hw_res.u64; |
| 5828 | |
| 5829 | for (byte = 0; byte < 9; ++byte) |
| 5830 | byte_test_status[byte] = WL_ESTIMATED; |
| 5831 | |
| 5832 | if (wl_mask_err == 0) { |
| 5833 | /* |
| 5834 | * Determine address of DRAM to test for |
| 5835 | * pass 1 of software write leveling. |
| 5836 | */ |
| 5837 | rank_addr = active_rank * |
| 5838 | (1ull << (pbank_lsb - bunk_enable + |
| 5839 | (interfaces / 2))); |
| 5840 | |
| 5841 | /* |
| 5842 | * Adjust address for boot bus hole in memory |
| 5843 | * map. |
| 5844 | */ |
| 5845 | if (rank_addr > 0x10000000) |
| 5846 | rank_addr += 0x10000000; |
| 5847 | |
| 5848 | debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n", |
| 5849 | node, if_num, rankx, active_rank, |
| 5850 | rank_addr); |
| 5851 | |
| 5852 | // start parallel write-leveling block for |
| 5853 | // delay high-order bits |
| 5854 | errors = 0; |
| 5855 | no_errors_count = 0; |
| 5856 | sum_dram_dclk = 0; |
| 5857 | sum_dram_ops = 0; |
| 5858 | |
| 5859 | if (if_64b) { |
| 5860 | bytes_todo = (sw_wl_hw) ? |
| 5861 | if_bytemask : 0xFF; |
| 5862 | bytemask = ~0ULL; |
| 5863 | } else { |
| 5864 | // 32-bit, must be using SW algo, |
| 5865 | // only data bytes |
| 5866 | bytes_todo = 0x0f; |
| 5867 | bytemask = 0x00000000ffffffffULL; |
| 5868 | } |
| 5869 | |
| 5870 | for (byte = 0; byte < 9; ++byte) { |
| 5871 | if (!(bytes_todo & (1 << byte))) { |
| 5872 | byte_delay[byte] = 0; |
| 5873 | } else { |
| 5874 | byte_delay[byte] = |
| 5875 | get_wl_rank(&wl_rank, byte); |
| 5876 | } |
| 5877 | } /* for (byte = 0; byte < 9; ++byte) */ |
| 5878 | |
| 5879 | do { |
| 5880 | lmc_sw_write_leveling_loop(priv, rankx); |
| 5881 | } while (no_errors_count < |
| 5882 | WL_MIN_NO_ERRORS_COUNT); |
| 5883 | |
| 5884 | if (!sw_wl_hw) { |
| 5885 | u64 percent_x10; |
| 5886 | |
| 5887 | if (sum_dram_dclk == 0) |
| 5888 | sum_dram_dclk = 1; |
| 5889 | percent_x10 = sum_dram_ops * 1000 / |
| 5890 | sum_dram_dclk; |
| 5891 | debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n", |
| 5892 | node, if_num, rankx, sum_dram_ops, |
| 5893 | sum_dram_dclk, percent_x10 / 10, |
| 5894 | percent_x10 % 10); |
| 5895 | } |
| 5896 | if (errors) { |
| 5897 | debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n", |
| 5898 | vref_val, vref_val, errors); |
| 5899 | } |
| 5900 | // end parallel write-leveling block for |
| 5901 | // delay high-order bits |
| 5902 | |
| 5903 | // if we used HW-assist, we did the ECC byte |
| 5904 | // when approp. |
| 5905 | if (sw_wl_hw) { |
| 5906 | if (wl_print) { |
| 5907 | debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n", |
| 5908 | node, if_num, rankx); |
| 5909 | } |
| 5910 | goto no_ecc_estimate; |
| 5911 | } |
| 5912 | |
| 5913 | if ((if_bytemask & 0xff) == 0xff) { |
| 5914 | if (use_ecc) { |
| 5915 | sw_write_lvl_use_ecc(priv, |
| 5916 | rankx); |
| 5917 | } else { |
| 5918 | /* H/W delay value */ |
| 5919 | byte_test_status[8] = |
| 5920 | WL_HARDWARE; |
| 5921 | /* ECC is not used */ |
| 5922 | wl_rank.s.byte8 = |
| 5923 | wl_rank.s.byte0; |
| 5924 | } |
| 5925 | } else { |
| 5926 | if (use_ecc) { |
| 5927 | /* Estimate the ECC byte dly */ |
| 5928 | // add hi-order to b4 |
| 5929 | wl_rank.s.byte4 |= |
| 5930 | (wl_rank.s.byte3 & |
| 5931 | 0x38); |
| 5932 | if ((wl_rank.s.byte4 & 0x06) < |
| 5933 | (wl_rank.s.byte3 & 0x06)) { |
| 5934 | // must be next clock |
| 5935 | wl_rank.s.byte4 += 8; |
| 5936 | } |
| 5937 | } else { |
| 5938 | /* ECC is not used */ |
| 5939 | wl_rank.s.byte4 = |
| 5940 | wl_rank.s.byte0; |
| 5941 | } |
| 5942 | |
| 5943 | /* |
| 5944 | * Change the status if s/w adjusted |
| 5945 | * the delay |
| 5946 | */ |
| 5947 | /* Estimated delay */ |
| 5948 | byte_test_status[4] = WL_SOFTWARE; |
| 5949 | } /* if ((if_bytemask & 0xff) == 0xff) */ |
| 5950 | } /* if (wl_mask_err == 0) */ |
| 5951 | |
| 5952 | no_ecc_estimate: |
| 5953 | |
| 5954 | bytes_failed = 0; |
| 5955 | for (byte = 0; byte < 9; ++byte) { |
| 5956 | /* Don't accumulate errors for untested bytes */ |
| 5957 | if (!(if_bytemask & (1 << byte))) |
| 5958 | continue; |
| 5959 | bytes_failed += |
| 5960 | (byte_test_status[byte] == WL_ESTIMATED); |
| 5961 | } |
| 5962 | |
| 5963 | /* vref training loop is only used for DDR4 */ |
| 5964 | if (ddr_type != DDR4_DRAM) |
| 5965 | break; |
| 5966 | |
| 5967 | if (bytes_failed == 0) { |
| 5968 | if (vref_val_count == 0) |
| 5969 | vref_val_start = vref_val; |
| 5970 | |
| 5971 | ++vref_val_count; |
| 5972 | if (vref_val_count > best_vref_val_count) { |
| 5973 | best_vref_val_count = vref_val_count; |
| 5974 | best_vref_val_start = vref_val_start; |
| 5975 | debug("N%d.LMC%d.R%d: vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n", |
| 5976 | node, if_num, rankx, vref_val, |
| 5977 | best_vref_val_start, |
| 5978 | best_vref_val_start + |
| 5979 | best_vref_val_count - 1); |
| 5980 | } |
| 5981 | } else { |
| 5982 | vref_val_count = 0; |
| 5983 | debug("N%d.LMC%d.R%d: vref Training (%2d) : failed\n", |
| 5984 | node, if_num, rankx, vref_val); |
| 5985 | } |
| 5986 | } |
| 5987 | |
| 5988 | /* |
| 5989 | * Determine address of DRAM to test for software write |
| 5990 | * leveling. |
| 5991 | */ |
| 5992 | rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + |
| 5993 | (interfaces / 2))); |
| 5994 | /* Adjust address for boot bus hole in memory map. */ |
| 5995 | if (rank_addr > 0x10000000) |
| 5996 | rank_addr += 0x10000000; |
| 5997 | |
| 5998 | debug("Rank Address: 0x%llx\n", rank_addr); |
| 5999 | |
| 6000 | if (bytes_failed) { |
| 6001 | // FIXME? the big hammer, did not even try SW WL pass2, |
| 6002 | // assume only chip reset will help |
| 6003 | debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n", |
| 6004 | node, if_num, rankx); |
| 6005 | sw_wl_failed = 1; |
| 6006 | } else { /* if (bytes_failed) */ |
| 6007 | // SW WL pass 1 was OK, write the settings |
| 6008 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), |
| 6009 | wl_rank.u64); |
| 6010 | wl_rank.u64 = lmc_rd(priv, |
| 6011 | CVMX_LMCX_WLEVEL_RANKX(rankx, |
| 6012 | if_num)); |
| 6013 | |
| 6014 | // do validity check on the delay values by running |
| 6015 | // the test 1 more time... |
| 6016 | // FIXME: we really need to check the ECC byte setting |
| 6017 | // here as well, so we need to enable ECC for this test! |
| 6018 | // if there are any errors, claim SW WL failure |
| 6019 | u64 datamask = (if_64b) ? 0xffffffffffffffffULL : |
| 6020 | 0x00000000ffffffffULL; |
| 6021 | int errors; |
| 6022 | |
| 6023 | // do the test |
| 6024 | if (sw_wl_hw) { |
| 6025 | errors = run_best_hw_patterns(priv, if_num, |
| 6026 | rank_addr, |
| 6027 | DBTRAIN_TEST, |
| 6028 | NULL) & 0xff; |
| 6029 | } else { |
| 6030 | errors = test_dram_byte64(priv, if_num, |
| 6031 | rank_addr, datamask, |
| 6032 | NULL); |
| 6033 | } |
| 6034 | |
| 6035 | if (errors) { |
| 6036 | debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n", |
| 6037 | node, if_num, rankx, errors); |
| 6038 | sw_wl_failed = 1; |
| 6039 | } |
| 6040 | } /* if (bytes_failed) */ |
| 6041 | |
| 6042 | // FIXME? dump the WL settings, so we get more of a clue |
| 6043 | // as to what happened where |
| 6044 | debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n", |
| 6045 | node, if_num, rankx, wl_rank.s.status, wl_rank.u64, |
| 6046 | wl_rank.s.byte8, wl_status_strings[byte_test_status[8]], |
| 6047 | wl_rank.s.byte7, wl_status_strings[byte_test_status[7]], |
| 6048 | wl_rank.s.byte6, wl_status_strings[byte_test_status[6]], |
| 6049 | wl_rank.s.byte5, wl_status_strings[byte_test_status[5]], |
| 6050 | wl_rank.s.byte4, wl_status_strings[byte_test_status[4]], |
| 6051 | wl_rank.s.byte3, wl_status_strings[byte_test_status[3]], |
| 6052 | wl_rank.s.byte2, wl_status_strings[byte_test_status[2]], |
| 6053 | wl_rank.s.byte1, wl_status_strings[byte_test_status[1]], |
| 6054 | wl_rank.s.byte0, wl_status_strings[byte_test_status[0]], |
| 6055 | (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)"); |
| 6056 | |
| 6057 | // finally, check for fatal conditions: either chip reset |
| 6058 | // right here, or return error flag |
| 6059 | if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) || |
| 6060 | sw_wl_failed) { |
| 6061 | if (!ddr_disable_chip_reset) { // do chip RESET |
| 6062 | printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n", |
| 6063 | node, if_num, rankx); |
| 6064 | mdelay(500); |
| 6065 | do_reset(NULL, 0, 0, NULL); |
| 6066 | } else { |
| 6067 | // return error flag so LMC init can be retried. |
| 6068 | debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n", |
| 6069 | node, if_num, rankx); |
| 6070 | return -EAGAIN; // 0 indicates restart possible. |
| 6071 | } |
| 6072 | } |
| 6073 | active_rank++; |
| 6074 | } |
| 6075 | |
| 6076 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 6077 | int parameter_set = 0; |
| 6078 | u64 value; |
| 6079 | |
| 6080 | if (!(rank_mask & (1 << rankx))) |
| 6081 | continue; |
| 6082 | |
| 6083 | wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, |
| 6084 | if_num)); |
| 6085 | |
| 6086 | for (i = 0; i < 9; ++i) { |
| 6087 | s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d", |
| 6088 | if_num, rankx, i); |
| 6089 | if (s) { |
| 6090 | parameter_set |= 1; |
| 6091 | value = strtoul(s, NULL, 0); |
| 6092 | |
| 6093 | upd_wl_rank(&wl_rank, i, value); |
| 6094 | } |
| 6095 | } |
| 6096 | |
| 6097 | s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx); |
| 6098 | if (s) { |
| 6099 | parameter_set |= 1; |
| 6100 | value = strtoull(s, NULL, 0); |
| 6101 | wl_rank.u64 = value; |
| 6102 | } |
| 6103 | |
| 6104 | if (parameter_set) { |
| 6105 | lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), |
| 6106 | wl_rank.u64); |
| 6107 | wl_rank.u64 = |
| 6108 | lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)); |
| 6109 | display_wl(if_num, wl_rank, rankx); |
| 6110 | } |
| 6111 | // if there are unused entries to be filled |
| 6112 | if ((rank_mask & 0x0F) != 0x0F) { |
| 6113 | if (rankx < 3) { |
| 6114 | debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n", |
| 6115 | node, if_num, rankx); |
| 6116 | |
| 6117 | // if rank 0, write ranks 1 and 2 here if empty |
| 6118 | if (rankx == 0) { |
| 6119 | // check that rank 1 is empty |
| 6120 | if (!(rank_mask & (1 << 1))) { |
| 6121 | debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", |
| 6122 | node, if_num, rankx, 1); |
| 6123 | lmc_wr(priv, |
| 6124 | CVMX_LMCX_WLEVEL_RANKX(1, |
| 6125 | if_num), |
| 6126 | wl_rank.u64); |
| 6127 | } |
| 6128 | |
| 6129 | // check that rank 2 is empty |
| 6130 | if (!(rank_mask & (1 << 2))) { |
| 6131 | debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", |
| 6132 | node, if_num, rankx, 2); |
| 6133 | lmc_wr(priv, |
| 6134 | CVMX_LMCX_WLEVEL_RANKX(2, |
| 6135 | if_num), |
| 6136 | wl_rank.u64); |
| 6137 | } |
| 6138 | } |
| 6139 | |
| 6140 | // if rank 0, 1 or 2, write rank 3 here if empty |
| 6141 | // check that rank 3 is empty |
| 6142 | if (!(rank_mask & (1 << 3))) { |
| 6143 | debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n", |
| 6144 | node, if_num, rankx, 3); |
| 6145 | lmc_wr(priv, |
| 6146 | CVMX_LMCX_WLEVEL_RANKX(3, |
| 6147 | if_num), |
| 6148 | wl_rank.u64); |
| 6149 | } |
| 6150 | } |
| 6151 | } |
| 6152 | } |
| 6153 | |
| 6154 | /* Enable 32-bit mode if required. */ |
| 6155 | cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 6156 | cfg.cn78xx.mode32b = (!if_64b); |
| 6157 | debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b); |
| 6158 | |
| 6159 | /* Restore the ECC configuration */ |
| 6160 | if (!sw_wl_hw_default) |
| 6161 | cfg.cn78xx.ecc_ena = use_ecc; |
| 6162 | |
| 6163 | lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64); |
| 6164 | |
| 6165 | return 0; |
| 6166 | } |
| 6167 | |
| 6168 | static void lmc_dll(struct ddr_priv *priv) |
| 6169 | { |
| 6170 | union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3; |
| 6171 | int setting[9]; |
| 6172 | int i; |
| 6173 | |
| 6174 | ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); |
| 6175 | |
| 6176 | for (i = 0; i < 9; ++i) { |
| 6177 | SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i)); |
| 6178 | lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64); |
| 6179 | lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); |
| 6180 | ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); |
| 6181 | setting[i] = GET_DDR_DLL_CTL3(dll90_setting); |
| 6182 | debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num, |
| 6183 | GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64, |
| 6184 | setting[i]); |
| 6185 | } |
| 6186 | |
| 6187 | debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n", |
| 6188 | node, if_num, "DLL90 Setting 8:0", |
| 6189 | setting[8], setting[7], setting[6], setting[5], setting[4], |
| 6190 | setting[3], setting[2], setting[1], setting[0]); |
| 6191 | |
| 6192 | process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset", |
| 6193 | c_cfg->dll_write_offset, |
| 6194 | "ddr%d_dll_write_offset_byte%d", 1); |
| 6195 | process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset", |
| 6196 | c_cfg->dll_read_offset, |
| 6197 | "ddr%d_dll_read_offset_byte%d", 2); |
| 6198 | } |
| 6199 | |
| 6200 | #define SLOT_CTL_INCR(csr, chip, field, incr) \ |
| 6201 | csr.chip.field = (csr.chip.field < (64 - incr)) ? \ |
| 6202 | (csr.chip.field + incr) : 63 |
| 6203 | |
| 6204 | #define INCR(csr, chip, field, incr) \ |
| 6205 | csr.chip.field = (csr.chip.field < (64 - incr)) ? \ |
| 6206 | (csr.chip.field + incr) : 63 |
| 6207 | |
| 6208 | static void lmc_workaround_2(struct ddr_priv *priv) |
| 6209 | { |
| 6210 | /* Workaround Errata 21063 */ |
| 6211 | if (octeon_is_cpuid(OCTEON_CN78XX) || |
| 6212 | octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) { |
| 6213 | union cvmx_lmcx_slot_ctl0 slot_ctl0; |
| 6214 | union cvmx_lmcx_slot_ctl1 slot_ctl1; |
| 6215 | union cvmx_lmcx_slot_ctl2 slot_ctl2; |
| 6216 | union cvmx_lmcx_ext_config ext_cfg; |
| 6217 | |
| 6218 | slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num)); |
| 6219 | slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num)); |
| 6220 | slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num)); |
| 6221 | |
| 6222 | ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num)); |
| 6223 | |
| 6224 | /* When ext_cfg.s.read_ena_bprch is set add 1 */ |
| 6225 | if (ext_cfg.s.read_ena_bprch) { |
| 6226 | SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1); |
| 6227 | SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1); |
| 6228 | SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1); |
| 6229 | SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1); |
| 6230 | } |
| 6231 | |
| 6232 | /* Always add 2 */ |
| 6233 | SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2); |
| 6234 | SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2); |
| 6235 | |
| 6236 | lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64); |
| 6237 | lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64); |
| 6238 | lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64); |
| 6239 | } |
| 6240 | |
| 6241 | /* Workaround Errata 21216 */ |
| 6242 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) || |
| 6243 | octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) { |
| 6244 | union cvmx_lmcx_slot_ctl1 slot_ctl1; |
| 6245 | union cvmx_lmcx_slot_ctl2 slot_ctl2; |
| 6246 | |
| 6247 | slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num)); |
| 6248 | slot_ctl1.cn78xx.w2w_xrank_init = |
| 6249 | max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init); |
| 6250 | lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64); |
| 6251 | |
| 6252 | slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num)); |
| 6253 | slot_ctl2.cn78xx.w2w_xdimm_init = |
| 6254 | max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init); |
| 6255 | lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64); |
| 6256 | } |
| 6257 | } |
| 6258 | |
| 6259 | static void lmc_final(struct ddr_priv *priv) |
| 6260 | { |
| 6261 | /* |
| 6262 | * 4.8.11 Final LMC Initialization |
| 6263 | * |
| 6264 | * Early LMC initialization, LMC write-leveling, and LMC read-leveling |
| 6265 | * must be completed prior to starting this final LMC initialization. |
| 6266 | * |
| 6267 | * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, |
| 6268 | * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected |
| 6269 | * readleveling and write-leveling settings. Software should not write |
| 6270 | * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2 |
| 6271 | * values until after the final read-leveling and write-leveling |
| 6272 | * settings are written. |
| 6273 | * |
| 6274 | * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and |
| 6275 | * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs |
| 6276 | * select the minimum gaps between read operations and write operations |
| 6277 | * of various types. |
| 6278 | * |
| 6279 | * Software must not reduce the values in these CSR fields below the |
| 6280 | * values previously selected by the LMC hardware (during write-leveling |
| 6281 | * and read-leveling steps above). |
| 6282 | * |
| 6283 | * All sections in this chapter may be used to derive proper settings |
| 6284 | * for these registers. |
| 6285 | * |
| 6286 | * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed |
| 6287 | * properly. This should be done prior to the first read. |
| 6288 | */ |
| 6289 | |
| 6290 | /* Clear any residual ECC errors */ |
| 6291 | int num_tads = 1; |
| 6292 | int tad; |
| 6293 | int num_mcis = 1; |
| 6294 | int mci; |
| 6295 | |
| 6296 | if (octeon_is_cpuid(OCTEON_CN78XX)) { |
| 6297 | num_tads = 8; |
| 6298 | num_mcis = 4; |
| 6299 | } else if (octeon_is_cpuid(OCTEON_CN70XX)) { |
| 6300 | num_tads = 1; |
| 6301 | num_mcis = 1; |
| 6302 | } else if (octeon_is_cpuid(OCTEON_CN73XX) || |
| 6303 | octeon_is_cpuid(OCTEON_CNF75XX)) { |
| 6304 | num_tads = 4; |
| 6305 | num_mcis = 3; |
| 6306 | } |
| 6307 | |
| 6308 | lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL); |
| 6309 | lmc_rd(priv, CVMX_LMCX_INT(if_num)); |
| 6310 | |
| 6311 | for (tad = 0; tad < num_tads; tad++) { |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 6312 | l2c_wr(priv, CVMX_L2C_TADX_INT_REL(tad), |
| 6313 | l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad))); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 6314 | debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad, |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 6315 | l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad))); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 6316 | } |
| 6317 | |
| 6318 | for (mci = 0; mci < num_mcis; mci++) { |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 6319 | l2c_wr(priv, CVMX_L2C_MCIX_INT_REL(mci), |
| 6320 | l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci))); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 6321 | debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci, |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 6322 | l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci))); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 6323 | } |
| 6324 | |
| 6325 | debug("%-45s : 0x%08llx\n", "LMC_INT", |
| 6326 | lmc_rd(priv, CVMX_LMCX_INT(if_num))); |
| 6327 | } |
| 6328 | |
| 6329 | static void lmc_scrambling(struct ddr_priv *priv) |
| 6330 | { |
| 6331 | // Make sure scrambling is disabled during init... |
| 6332 | union cvmx_lmcx_control ctrl; |
| 6333 | union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0; |
| 6334 | union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1; |
| 6335 | union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2; |
| 6336 | union cvmx_lmcx_ns_ctl lmc_ns_ctl; |
| 6337 | int use_scramble = 0; // default OFF |
| 6338 | char *s; |
| 6339 | |
| 6340 | ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 6341 | lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num)); |
| 6342 | lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num)); |
| 6343 | lmc_scramble_cfg2.u64 = 0; // quiet compiler |
| 6344 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { |
| 6345 | lmc_scramble_cfg2.u64 = |
| 6346 | lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num)); |
| 6347 | } |
| 6348 | lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num)); |
| 6349 | |
| 6350 | s = lookup_env_ull(priv, "ddr_use_scramble"); |
| 6351 | if (s) |
| 6352 | use_scramble = simple_strtoull(s, NULL, 0); |
| 6353 | |
| 6354 | /* Generate random values if scrambling is needed */ |
| 6355 | if (use_scramble) { |
| 6356 | lmc_scramble_cfg0.u64 = cvmx_rng_get_random64(); |
| 6357 | lmc_scramble_cfg1.u64 = cvmx_rng_get_random64(); |
| 6358 | lmc_scramble_cfg2.u64 = cvmx_rng_get_random64(); |
| 6359 | lmc_ns_ctl.s.ns_scramble_dis = 0; |
| 6360 | lmc_ns_ctl.s.adr_offset = 0; |
| 6361 | ctrl.s.scramble_ena = 1; |
| 6362 | } |
| 6363 | |
| 6364 | s = lookup_env_ull(priv, "ddr_scramble_cfg0"); |
| 6365 | if (s) { |
| 6366 | lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0); |
| 6367 | ctrl.s.scramble_ena = 1; |
| 6368 | } |
| 6369 | debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0", |
| 6370 | lmc_scramble_cfg0.u64); |
| 6371 | |
| 6372 | lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64); |
| 6373 | |
| 6374 | s = lookup_env_ull(priv, "ddr_scramble_cfg1"); |
| 6375 | if (s) { |
| 6376 | lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0); |
| 6377 | ctrl.s.scramble_ena = 1; |
| 6378 | } |
| 6379 | debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1", |
| 6380 | lmc_scramble_cfg1.u64); |
| 6381 | lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64); |
| 6382 | |
| 6383 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) { |
| 6384 | s = lookup_env_ull(priv, "ddr_scramble_cfg2"); |
| 6385 | if (s) { |
| 6386 | lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0); |
| 6387 | ctrl.s.scramble_ena = 1; |
| 6388 | } |
| 6389 | debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2", |
| 6390 | lmc_scramble_cfg1.u64); |
| 6391 | lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), |
| 6392 | lmc_scramble_cfg2.u64); |
| 6393 | } |
| 6394 | |
| 6395 | s = lookup_env_ull(priv, "ddr_ns_ctl"); |
| 6396 | if (s) |
| 6397 | lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0); |
| 6398 | debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64); |
| 6399 | lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64); |
| 6400 | |
| 6401 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); |
| 6402 | } |
| 6403 | |
| 6404 | struct rl_score { |
| 6405 | u64 setting; |
| 6406 | int score; |
| 6407 | }; |
| 6408 | |
| 6409 | static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data"); |
| 6410 | static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data"); |
| 6411 | static unsigned char rodt_ctl __section(".data"); |
| 6412 | |
| 6413 | static int rl_rodt_err __section(".data"); |
| 6414 | static unsigned char rtt_nom __section(".data"); |
| 6415 | static unsigned char rtt_idx __section(".data"); |
| 6416 | static char min_rtt_nom_idx __section(".data"); |
| 6417 | static char max_rtt_nom_idx __section(".data"); |
| 6418 | static char min_rodt_ctl __section(".data"); |
| 6419 | static char max_rodt_ctl __section(".data"); |
| 6420 | static int rl_dbg_loops __section(".data"); |
| 6421 | static unsigned char save_ddr2t __section(".data"); |
| 6422 | static int rl_samples __section(".data"); |
| 6423 | static char rl_compute __section(".data"); |
| 6424 | static char saved_ddr__ptune __section(".data"); |
| 6425 | static char saved_ddr__ntune __section(".data"); |
| 6426 | static char rl_comp_offs __section(".data"); |
| 6427 | static char saved_int_zqcs_dis __section(".data"); |
| 6428 | static int max_adj_rl_del_inc __section(".data"); |
| 6429 | static int print_nom_ohms __section(".data"); |
| 6430 | static int rl_print __section(".data"); |
| 6431 | |
| 6432 | #ifdef ENABLE_HARDCODED_RLEVEL |
| 6433 | static char part_number[21] __section(".data"); |
| 6434 | #endif /* ENABLE_HARDCODED_RLEVEL */ |
| 6435 | |
| 6436 | struct perfect_counts { |
| 6437 | u16 count[9][32]; // 8+ECC by 64 values |
| 6438 | u32 mask[9]; // 8+ECC, bitmask of perfect delays |
| 6439 | }; |
| 6440 | |
| 6441 | static struct perfect_counts rank_perf[4] __section(".data"); |
| 6442 | static struct perfect_counts rodt_perfect_counts __section(".data"); |
| 6443 | static int pbm_lowsum_limit __section(".data"); |
| 6444 | // FIXME: PBM skip for RODT 240 and 34 |
| 6445 | static u32 pbm_rodt_skip __section(".data"); |
| 6446 | |
| 6447 | // control rank majority processing |
| 6448 | static int disable_rank_majority __section(".data"); |
| 6449 | |
| 6450 | // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE |
| 6451 | // for DDR3 |
| 6452 | static int enable_rldelay_bump __section(".data"); |
| 6453 | static int rldelay_bump_incr __section(".data"); |
| 6454 | static int disable_rlv_bump_this_byte __section(".data"); |
| 6455 | static u64 value_mask __section(".data"); |
| 6456 | |
| 6457 | static struct rlevel_byte_data rl_byte[9] __section(".data"); |
| 6458 | static int sample_loops __section(".data"); |
| 6459 | static int max_samples __section(".data"); |
| 6460 | static int rl_rank_errors __section(".data"); |
| 6461 | static int rl_mask_err __section(".data"); |
| 6462 | static int rl_nonseq_err __section(".data"); |
| 6463 | static struct rlevel_bitmask rl_mask[9] __section(".data"); |
| 6464 | static int rl_best_rank_score __section(".data"); |
| 6465 | |
| 6466 | static int rodt_row_skip_mask __section(".data"); |
| 6467 | |
| 6468 | static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score |
| 6469 | rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4]) |
| 6470 | { |
| 6471 | union cvmx_lmcx_comp_ctl2 cc2; |
| 6472 | const int rl_separate_ab = 1; |
| 6473 | int i; |
| 6474 | |
| 6475 | rl_best_rank_score = DEFAULT_BEST_RANK_SCORE; |
| 6476 | rl_rodt_err = 0; |
| 6477 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 6478 | cc2.cn78xx.rodt_ctl = rodt_ctl; |
| 6479 | lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); |
| 6480 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 6481 | udelay(1); /* Give it a little time to take affect */ |
| 6482 | if (rl_print > 1) { |
| 6483 | debug("Read ODT_CTL : 0x%x (%d ohms)\n", |
| 6484 | cc2.cn78xx.rodt_ctl, |
| 6485 | imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]); |
| 6486 | } |
| 6487 | |
| 6488 | memset(rl_byte, 0, sizeof(rl_byte)); |
| 6489 | memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts)); |
| 6490 | |
| 6491 | // when iter RODT is the target RODT, take more samples... |
| 6492 | max_samples = rl_samples; |
| 6493 | if (rodt_ctl == default_rodt_ctl) |
| 6494 | max_samples += rl_samples + 1; |
| 6495 | |
| 6496 | for (sample_loops = 0; sample_loops < max_samples; sample_loops++) { |
| 6497 | int redoing_nonseq_errs = 0; |
| 6498 | |
| 6499 | rl_mask_err = 0; |
| 6500 | |
| 6501 | if (!(rl_separate_ab && spd_rdimm && |
| 6502 | ddr_type == DDR4_DRAM)) { |
| 6503 | /* Clear read-level delays */ |
| 6504 | lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0); |
| 6505 | |
| 6506 | /* read-leveling */ |
| 6507 | oct3_ddr3_seq(priv, 1 << rankx, if_num, 1); |
| 6508 | |
| 6509 | do { |
| 6510 | rl_rank.u64 = |
| 6511 | lmc_rd(priv, |
| 6512 | CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 6513 | if_num)); |
| 6514 | } while (rl_rank.cn78xx.status != 3); |
| 6515 | } |
| 6516 | |
| 6517 | rl_rank.u64 = |
| 6518 | lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); |
| 6519 | |
| 6520 | // start bitmask interpretation block |
| 6521 | |
| 6522 | memset(rl_mask, 0, sizeof(rl_mask)); |
| 6523 | |
| 6524 | if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) { |
| 6525 | union cvmx_lmcx_rlevel_rankx rl_rank_aside; |
| 6526 | union cvmx_lmcx_modereg_params0 mp0; |
| 6527 | |
| 6528 | /* A-side */ |
| 6529 | mp0.u64 = |
| 6530 | lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); |
| 6531 | mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */ |
| 6532 | lmc_wr(priv, |
| 6533 | CVMX_LMCX_MODEREG_PARAMS0(if_num), |
| 6534 | mp0.u64); |
| 6535 | |
| 6536 | /* Clear read-level delays */ |
| 6537 | lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0); |
| 6538 | |
| 6539 | /* read-leveling */ |
| 6540 | oct3_ddr3_seq(priv, 1 << rankx, if_num, 1); |
| 6541 | |
| 6542 | do { |
| 6543 | rl_rank.u64 = |
| 6544 | lmc_rd(priv, |
| 6545 | CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 6546 | if_num)); |
| 6547 | } while (rl_rank.cn78xx.status != 3); |
| 6548 | |
| 6549 | rl_rank.u64 = |
| 6550 | lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 6551 | if_num)); |
| 6552 | |
| 6553 | rl_rank_aside.u64 = rl_rank.u64; |
| 6554 | |
| 6555 | rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0); |
| 6556 | rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1); |
| 6557 | rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2); |
| 6558 | rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3); |
| 6559 | rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8); |
| 6560 | /* A-side complete */ |
| 6561 | |
| 6562 | /* B-side */ |
| 6563 | mp0.u64 = |
| 6564 | lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); |
| 6565 | mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */ |
| 6566 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), |
| 6567 | mp0.u64); |
| 6568 | |
| 6569 | /* Clear read-level delays */ |
| 6570 | lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0); |
| 6571 | |
| 6572 | /* read-leveling */ |
| 6573 | oct3_ddr3_seq(priv, 1 << rankx, if_num, 1); |
| 6574 | |
| 6575 | do { |
| 6576 | rl_rank.u64 = |
| 6577 | lmc_rd(priv, |
| 6578 | CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 6579 | if_num)); |
| 6580 | } while (rl_rank.cn78xx.status != 3); |
| 6581 | |
| 6582 | rl_rank.u64 = |
| 6583 | lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 6584 | if_num)); |
| 6585 | |
| 6586 | rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4); |
| 6587 | rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5); |
| 6588 | rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6); |
| 6589 | rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7); |
| 6590 | /* B-side complete */ |
| 6591 | |
| 6592 | upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0); |
| 6593 | upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1); |
| 6594 | upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2); |
| 6595 | upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3); |
| 6596 | /* ECC A-side */ |
| 6597 | upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8); |
| 6598 | |
| 6599 | mp0.u64 = |
| 6600 | lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num)); |
| 6601 | mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */ |
| 6602 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), |
| 6603 | mp0.u64); |
| 6604 | } |
| 6605 | |
| 6606 | /* |
| 6607 | * Evaluate the quality of the read-leveling delays from the |
| 6608 | * bitmasks. Also save off a software computed read-leveling |
| 6609 | * mask that may be used later to qualify the delay results |
| 6610 | * from Octeon. |
| 6611 | */ |
| 6612 | for (i = 0; i < (8 + ecc_ena); ++i) { |
| 6613 | int bmerr; |
| 6614 | |
| 6615 | if (!(if_bytemask & (1 << i))) |
| 6616 | continue; |
| 6617 | if (!(rl_separate_ab && spd_rdimm && |
| 6618 | ddr_type == DDR4_DRAM)) { |
| 6619 | rl_mask[i].bm = |
| 6620 | lmc_ddr3_rl_dbg_read(priv, if_num, i); |
| 6621 | } |
| 6622 | bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i], |
| 6623 | ddr_type); |
| 6624 | rl_mask[i].errs = bmerr; |
| 6625 | rl_mask_err += bmerr; |
| 6626 | // count only the "perfect" bitmasks |
| 6627 | if (ddr_type == DDR4_DRAM && !bmerr) { |
| 6628 | int delay; |
| 6629 | // FIXME: for now, simple filtering: |
| 6630 | // do NOT count PBMs for RODTs in skip mask |
| 6631 | if ((1U << rodt_ctl) & pbm_rodt_skip) |
| 6632 | continue; |
| 6633 | // FIXME: could optimize this a bit? |
| 6634 | delay = get_rl_rank(&rl_rank, i); |
| 6635 | rank_perf[rankx].count[i][delay] += 1; |
| 6636 | rank_perf[rankx].mask[i] |= |
| 6637 | (1ULL << delay); |
| 6638 | rodt_perfect_counts.count[i][delay] += 1; |
| 6639 | rodt_perfect_counts.mask[i] |= (1ULL << delay); |
| 6640 | } |
| 6641 | } |
| 6642 | |
| 6643 | /* Set delays for unused bytes to match byte 0. */ |
| 6644 | for (i = 0; i < 9; ++i) { |
| 6645 | if (if_bytemask & (1 << i)) |
| 6646 | continue; |
| 6647 | upd_rl_rank(&rl_rank, i, rl_rank.s.byte0); |
| 6648 | } |
| 6649 | |
| 6650 | /* |
| 6651 | * Save a copy of the byte delays in physical |
| 6652 | * order for sequential evaluation. |
| 6653 | */ |
| 6654 | unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank); |
| 6655 | |
| 6656 | redo_nonseq_errs: |
| 6657 | |
| 6658 | rl_nonseq_err = 0; |
| 6659 | if (!disable_sequential_delay_check) { |
| 6660 | for (i = 0; i < 9; ++i) |
| 6661 | rl_byte[i].sqerrs = 0; |
| 6662 | |
| 6663 | if ((if_bytemask & 0xff) == 0xff) { |
| 6664 | /* |
| 6665 | * Evaluate delay sequence across the whole |
| 6666 | * range of bytes for standard dimms. |
| 6667 | */ |
| 6668 | /* 1=RDIMM, 5=Mini-RDIMM */ |
| 6669 | if (spd_dimm_type == 1 || spd_dimm_type == 5) { |
| 6670 | int reg_adj_del = abs(rl_byte[4].delay - |
| 6671 | rl_byte[5].delay); |
| 6672 | |
| 6673 | /* |
| 6674 | * Registered dimm topology routes |
| 6675 | * from the center. |
| 6676 | */ |
| 6677 | rl_nonseq_err += |
| 6678 | nonseq_del(rl_byte, 0, |
| 6679 | 3 + ecc_ena, |
| 6680 | max_adj_rl_del_inc); |
| 6681 | rl_nonseq_err += |
| 6682 | nonseq_del(rl_byte, 5, |
| 6683 | 7 + ecc_ena, |
| 6684 | max_adj_rl_del_inc); |
| 6685 | // byte 5 sqerrs never gets cleared |
| 6686 | // for RDIMMs |
| 6687 | rl_byte[5].sqerrs = 0; |
| 6688 | if (reg_adj_del > 1) { |
| 6689 | /* |
| 6690 | * Assess proximity of bytes on |
| 6691 | * opposite sides of register |
| 6692 | */ |
| 6693 | rl_nonseq_err += (reg_adj_del - |
| 6694 | 1) * |
| 6695 | RLEVEL_ADJACENT_DELAY_ERROR; |
| 6696 | // update byte 5 error |
| 6697 | rl_byte[5].sqerrs += |
| 6698 | (reg_adj_del - 1) * |
| 6699 | RLEVEL_ADJACENT_DELAY_ERROR; |
| 6700 | } |
| 6701 | } |
| 6702 | |
| 6703 | /* 2=UDIMM, 6=Mini-UDIMM */ |
| 6704 | if (spd_dimm_type == 2 || spd_dimm_type == 6) { |
| 6705 | /* |
| 6706 | * Unbuffered dimm topology routes |
| 6707 | * from end to end. |
| 6708 | */ |
| 6709 | rl_nonseq_err += nonseq_del(rl_byte, 0, |
| 6710 | 7 + ecc_ena, |
| 6711 | max_adj_rl_del_inc); |
| 6712 | } |
| 6713 | } else { |
| 6714 | rl_nonseq_err += nonseq_del(rl_byte, 0, |
| 6715 | 3 + ecc_ena, |
| 6716 | max_adj_rl_del_inc); |
| 6717 | } |
| 6718 | } /* if (! disable_sequential_delay_check) */ |
| 6719 | |
| 6720 | rl_rank_errors = rl_mask_err + rl_nonseq_err; |
| 6721 | |
| 6722 | // print original sample here only if we are not really |
| 6723 | // averaging or picking best |
| 6724 | // also do not print if we were redoing the NONSEQ score |
| 6725 | // for using COMPUTED |
| 6726 | if (!redoing_nonseq_errs && rl_samples < 2) { |
| 6727 | if (rl_print > 1) { |
| 6728 | display_rl_bm(if_num, rankx, rl_mask, ecc_ena); |
| 6729 | display_rl_bm_scores(if_num, rankx, rl_mask, |
| 6730 | ecc_ena); |
| 6731 | display_rl_seq_scores(if_num, rankx, rl_byte, |
| 6732 | ecc_ena); |
| 6733 | } |
| 6734 | display_rl_with_score(if_num, rl_rank, rankx, |
| 6735 | rl_rank_errors); |
| 6736 | } |
| 6737 | |
| 6738 | if (rl_compute) { |
| 6739 | if (!redoing_nonseq_errs) { |
| 6740 | /* Recompute the delays based on the bitmask */ |
| 6741 | for (i = 0; i < (8 + ecc_ena); ++i) { |
| 6742 | if (!(if_bytemask & (1 << i))) |
| 6743 | continue; |
| 6744 | |
| 6745 | upd_rl_rank(&rl_rank, i, |
| 6746 | compute_ddr3_rlevel_delay( |
| 6747 | rl_mask[i].mstart, |
| 6748 | rl_mask[i].width, |
| 6749 | rl_ctl)); |
| 6750 | } |
| 6751 | |
| 6752 | /* |
| 6753 | * Override the copy of byte delays with the |
| 6754 | * computed results. |
| 6755 | */ |
| 6756 | unpack_rlevel_settings(if_bytemask, ecc_ena, |
| 6757 | rl_byte, rl_rank); |
| 6758 | |
| 6759 | redoing_nonseq_errs = 1; |
| 6760 | goto redo_nonseq_errs; |
| 6761 | |
| 6762 | } else { |
| 6763 | /* |
| 6764 | * now print this if already printed the |
| 6765 | * original sample |
| 6766 | */ |
| 6767 | if (rl_samples < 2 || rl_print) { |
| 6768 | display_rl_with_computed(if_num, |
| 6769 | rl_rank, rankx, |
| 6770 | rl_rank_errors); |
| 6771 | } |
| 6772 | } |
| 6773 | } /* if (rl_compute) */ |
| 6774 | |
| 6775 | // end bitmask interpretation block |
| 6776 | |
| 6777 | // if it is a better (lower) score, then keep it |
| 6778 | if (rl_rank_errors < rl_best_rank_score) { |
| 6779 | rl_best_rank_score = rl_rank_errors; |
| 6780 | |
| 6781 | // save the new best delays and best errors |
| 6782 | for (i = 0; i < (8 + ecc_ena); ++i) { |
| 6783 | rl_byte[i].best = rl_byte[i].delay; |
| 6784 | rl_byte[i].bestsq = rl_byte[i].sqerrs; |
| 6785 | // save bitmasks and their scores as well |
| 6786 | // xlate UNPACKED index to PACKED index to |
| 6787 | // get from rl_mask |
| 6788 | rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm; |
| 6789 | rl_byte[i].bmerrs = |
| 6790 | rl_mask[XUP(i, !!ecc_ena)].errs; |
| 6791 | } |
| 6792 | } |
| 6793 | |
| 6794 | rl_rodt_err += rl_rank_errors; |
| 6795 | } |
| 6796 | |
| 6797 | /* We recorded the best score across the averaging loops */ |
| 6798 | rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score; |
| 6799 | |
| 6800 | /* |
| 6801 | * Restore the delays from the best fields that go with the best |
| 6802 | * score |
| 6803 | */ |
| 6804 | for (i = 0; i < 9; ++i) { |
| 6805 | rl_byte[i].delay = rl_byte[i].best; |
| 6806 | rl_byte[i].sqerrs = rl_byte[i].bestsq; |
| 6807 | } |
| 6808 | |
| 6809 | rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); |
| 6810 | |
| 6811 | pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank); |
| 6812 | |
| 6813 | if (rl_samples > 1) { |
| 6814 | // restore the "best" bitmasks and their scores for printing |
| 6815 | for (i = 0; i < 9; ++i) { |
| 6816 | if ((if_bytemask & (1 << i)) == 0) |
| 6817 | continue; |
| 6818 | // xlate PACKED index to UNPACKED index to get from |
| 6819 | // rl_byte |
| 6820 | rl_mask[i].bm = rl_byte[XPU(i, !!ecc_ena)].bm; |
| 6821 | rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs; |
| 6822 | } |
| 6823 | |
| 6824 | // maybe print bitmasks/scores here |
| 6825 | if (rl_print > 1) { |
| 6826 | display_rl_bm(if_num, rankx, rl_mask, ecc_ena); |
| 6827 | display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena); |
| 6828 | display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena); |
| 6829 | |
| 6830 | display_rl_with_rodt(if_num, rl_rank, rankx, |
| 6831 | rl_score[rtt_nom][rodt_ctl][rankx].score, |
| 6832 | print_nom_ohms, |
| 6833 | imp_val->rodt_ohms[rodt_ctl], |
| 6834 | WITH_RODT_BESTSCORE); |
| 6835 | |
| 6836 | debug("-----------\n"); |
| 6837 | } |
| 6838 | } |
| 6839 | |
| 6840 | rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64; |
| 6841 | |
| 6842 | // print out the PBMs for the current RODT |
| 6843 | if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity? |
| 6844 | // FIXME: change verbosity level after debug complete... |
| 6845 | |
| 6846 | for (i = 0; i < 9; i++) { |
| 6847 | u64 temp_mask; |
| 6848 | int num_values; |
| 6849 | |
| 6850 | // FIXME: PBM skip for RODTs in mask |
| 6851 | if ((1U << rodt_ctl) & pbm_rodt_skip) |
| 6852 | continue; |
| 6853 | |
| 6854 | temp_mask = rodt_perfect_counts.mask[i]; |
| 6855 | num_values = __builtin_popcountll(temp_mask); |
| 6856 | i = __builtin_ffsll(temp_mask) - 1; |
| 6857 | |
| 6858 | debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ", |
| 6859 | node, if_num, rankx, |
| 6860 | imp_val->rodt_ohms[rodt_ctl], |
| 6861 | i, temp_mask >> i, num_values); |
| 6862 | |
| 6863 | while (temp_mask != 0) { |
| 6864 | i = __builtin_ffsll(temp_mask) - 1; |
| 6865 | debug("%2d(%2d) ", i, |
| 6866 | rodt_perfect_counts.count[i][i]); |
| 6867 | temp_mask &= ~(1UL << i); |
| 6868 | } /* while (temp_mask != 0) */ |
| 6869 | debug("\n"); |
| 6870 | } |
| 6871 | } |
| 6872 | } |
| 6873 | |
| 6874 | static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score |
| 6875 | rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4]) |
| 6876 | { |
| 6877 | /* Start with an arbitrarily high score */ |
| 6878 | int best_rank_score = DEFAULT_BEST_RANK_SCORE; |
| 6879 | int best_rank_rtt_nom = 0; |
| 6880 | int best_rank_ctl = 0; |
| 6881 | int best_rank_ohms = 0; |
| 6882 | int best_rankx = 0; |
| 6883 | int dimm_rank_mask; |
| 6884 | int max_rank_score; |
| 6885 | union cvmx_lmcx_rlevel_rankx saved_rl_rank; |
| 6886 | int next_ohms; |
| 6887 | int orankx; |
| 6888 | int next_score = 0; |
| 6889 | int best_byte, new_byte, temp_byte, orig_best_byte; |
| 6890 | int rank_best_bytes[9]; |
| 6891 | int byte_sh; |
| 6892 | int avg_byte; |
| 6893 | int avg_diff; |
| 6894 | int i; |
| 6895 | |
| 6896 | if (!(rank_mask & (1 << rankx))) |
| 6897 | return; |
| 6898 | |
| 6899 | // some of the rank-related loops below need to operate only on |
| 6900 | // the ranks of a single DIMM, |
| 6901 | // so create a mask for their use here |
| 6902 | if (num_ranks == 4) { |
| 6903 | dimm_rank_mask = rank_mask; // should be 1111 |
| 6904 | } else { |
| 6905 | dimm_rank_mask = rank_mask & 3; // should be 01 or 11 |
| 6906 | if (rankx >= 2) { |
| 6907 | // doing a rank on the second DIMM, should be |
| 6908 | // 0100 or 1100 |
| 6909 | dimm_rank_mask <<= 2; |
| 6910 | } |
| 6911 | } |
| 6912 | debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n", |
| 6913 | dimm_rank_mask, rank_mask, rankx); |
| 6914 | |
| 6915 | // this is the start of the BEST ROW SCORE LOOP |
| 6916 | |
| 6917 | for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { |
| 6918 | rtt_nom = imp_val->rtt_nom_table[rtt_idx]; |
| 6919 | |
| 6920 | debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n", |
| 6921 | node, if_num, rankx, rtt_nom, |
| 6922 | imp_val->rtt_nom_ohms[rtt_nom]); |
| 6923 | |
| 6924 | for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; |
| 6925 | --rodt_ctl) { |
| 6926 | next_ohms = imp_val->rodt_ohms[rodt_ctl]; |
| 6927 | |
| 6928 | // skip RODT rows in mask, but *NOT* rows with too |
| 6929 | // high a score; |
| 6930 | // we will not use the skipped ones for printing or |
| 6931 | // evaluating, but we need to allow all the |
| 6932 | // non-skipped ones to be candidates for "best" |
| 6933 | if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) { |
| 6934 | debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n", |
| 6935 | node, if_num, rankx, rodt_ctl, |
| 6936 | next_ohms, next_score); |
| 6937 | continue; |
| 6938 | } |
| 6939 | |
| 6940 | // this is ROFFIX-0528 |
| 6941 | for (orankx = 0; orankx < dimm_count * 4; orankx++) { |
| 6942 | // stay on the same DIMM |
| 6943 | if (!(dimm_rank_mask & (1 << orankx))) |
| 6944 | continue; |
| 6945 | |
| 6946 | next_score = rl_score[rtt_nom][rodt_ctl][orankx].score; |
| 6947 | |
| 6948 | // always skip a higher score |
| 6949 | if (next_score > best_rank_score) |
| 6950 | continue; |
| 6951 | |
| 6952 | // if scores are equal |
| 6953 | if (next_score == best_rank_score) { |
| 6954 | // always skip lower ohms |
| 6955 | if (next_ohms < best_rank_ohms) |
| 6956 | continue; |
| 6957 | |
| 6958 | // if same ohms |
| 6959 | if (next_ohms == best_rank_ohms) { |
| 6960 | // always skip the other rank(s) |
| 6961 | if (orankx != rankx) |
| 6962 | continue; |
| 6963 | } |
| 6964 | // else next_ohms are greater, |
| 6965 | // always choose it |
| 6966 | } |
| 6967 | // else next_score is less than current best, |
| 6968 | // so always choose it |
| 6969 | debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n", |
| 6970 | node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score, |
| 6971 | best_rank_score, best_rank_ohms); |
| 6972 | best_rank_score = next_score; |
| 6973 | best_rank_rtt_nom = rtt_nom; |
| 6974 | //best_rank_nom_ohms = rtt_nom_ohms; |
| 6975 | best_rank_ctl = rodt_ctl; |
| 6976 | best_rank_ohms = next_ohms; |
| 6977 | best_rankx = orankx; |
| 6978 | rl_rank.u64 = |
| 6979 | rl_score[rtt_nom][rodt_ctl][orankx].setting; |
| 6980 | } |
| 6981 | } |
| 6982 | } |
| 6983 | |
| 6984 | // this is the end of the BEST ROW SCORE LOOP |
| 6985 | |
| 6986 | // DANGER, Will Robinson!! Abort now if we did not find a best |
| 6987 | // score at all... |
| 6988 | if (best_rank_score == DEFAULT_BEST_RANK_SCORE) { |
| 6989 | printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n", |
| 6990 | node, if_num, rankx); |
| 6991 | mdelay(500); |
| 6992 | do_reset(NULL, 0, 0, NULL); |
| 6993 | } |
| 6994 | |
| 6995 | // FIXME: relative now, but still arbitrary... |
| 6996 | max_rank_score = best_rank_score; |
| 6997 | if (ddr_type == DDR4_DRAM) { |
| 6998 | // halve the range if 2 DIMMs unless they are single rank... |
| 6999 | max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ? |
| 7000 | dimm_count : 1)); |
| 7001 | } else { |
| 7002 | // Since DDR3 typically has a wider score range, |
| 7003 | // keep more of them always |
| 7004 | max_rank_score += MAX_RANK_SCORE_LIMIT; |
| 7005 | } |
| 7006 | |
| 7007 | if (!ecc_ena) { |
| 7008 | /* ECC is not used */ |
| 7009 | rl_rank.s.byte8 = rl_rank.s.byte0; |
| 7010 | } |
| 7011 | |
| 7012 | // at the end, write the best row settings to the current rank |
| 7013 | lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64); |
| 7014 | rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); |
| 7015 | |
| 7016 | saved_rl_rank.u64 = rl_rank.u64; |
| 7017 | |
| 7018 | // this is the start of the PRINT LOOP |
| 7019 | int pass; |
| 7020 | |
| 7021 | // for pass==0, print current rank, pass==1 print other rank(s) |
| 7022 | // this is done because we want to show each ranks RODT values |
| 7023 | // together, not interlaced |
| 7024 | // keep separates for ranks - pass=0 target rank, pass=1 other |
| 7025 | // rank on DIMM |
| 7026 | int mask_skipped[2] = {0, 0}; |
| 7027 | int score_skipped[2] = {0, 0}; |
| 7028 | int selected_rows[2] = {0, 0}; |
| 7029 | int zero_scores[2] = {0, 0}; |
| 7030 | for (pass = 0; pass < 2; pass++) { |
| 7031 | for (orankx = 0; orankx < dimm_count * 4; orankx++) { |
| 7032 | // stay on the same DIMM |
| 7033 | if (!(dimm_rank_mask & (1 << orankx))) |
| 7034 | continue; |
| 7035 | |
| 7036 | if ((pass == 0 && orankx != rankx) || |
| 7037 | (pass != 0 && orankx == rankx)) |
| 7038 | continue; |
| 7039 | |
| 7040 | for (rtt_idx = min_rtt_nom_idx; |
| 7041 | rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { |
| 7042 | rtt_nom = imp_val->rtt_nom_table[rtt_idx]; |
| 7043 | if (dyn_rtt_nom_mask == 0) { |
| 7044 | print_nom_ohms = -1; |
| 7045 | } else { |
| 7046 | print_nom_ohms = |
| 7047 | imp_val->rtt_nom_ohms[rtt_nom]; |
| 7048 | } |
| 7049 | |
| 7050 | // cycle through all the RODT values... |
| 7051 | for (rodt_ctl = max_rodt_ctl; |
| 7052 | rodt_ctl >= min_rodt_ctl; --rodt_ctl) { |
| 7053 | union cvmx_lmcx_rlevel_rankx |
| 7054 | temp_rl_rank; |
| 7055 | int temp_score = |
| 7056 | rl_score[rtt_nom][rodt_ctl][orankx].score; |
| 7057 | int skip_row; |
| 7058 | |
| 7059 | temp_rl_rank.u64 = |
| 7060 | rl_score[rtt_nom][rodt_ctl][orankx].setting; |
| 7061 | |
| 7062 | // skip RODT rows in mask, or rows |
| 7063 | // with too high a score; |
| 7064 | // we will not use them for printing |
| 7065 | // or evaluating... |
| 7066 | if ((1 << rodt_ctl) & |
| 7067 | rodt_row_skip_mask) { |
| 7068 | skip_row = WITH_RODT_SKIPPING; |
| 7069 | ++mask_skipped[pass]; |
| 7070 | } else if (temp_score > |
| 7071 | max_rank_score) { |
| 7072 | skip_row = WITH_RODT_SKIPPING; |
| 7073 | ++score_skipped[pass]; |
| 7074 | } else { |
| 7075 | skip_row = WITH_RODT_BLANK; |
| 7076 | ++selected_rows[pass]; |
| 7077 | if (temp_score == 0) |
| 7078 | ++zero_scores[pass]; |
| 7079 | } |
| 7080 | |
| 7081 | // identify and print the BEST ROW |
| 7082 | // when it comes up |
| 7083 | if (skip_row == WITH_RODT_BLANK && |
| 7084 | best_rankx == orankx && |
| 7085 | best_rank_rtt_nom == rtt_nom && |
| 7086 | best_rank_ctl == rodt_ctl) |
| 7087 | skip_row = WITH_RODT_BESTROW; |
| 7088 | |
| 7089 | if (rl_print) { |
| 7090 | display_rl_with_rodt(if_num, |
| 7091 | temp_rl_rank, orankx, temp_score, |
| 7092 | print_nom_ohms, |
| 7093 | imp_val->rodt_ohms[rodt_ctl], |
| 7094 | skip_row); |
| 7095 | } |
| 7096 | } |
| 7097 | } |
| 7098 | } |
| 7099 | } |
| 7100 | debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n", |
| 7101 | node, if_num, rankx, selected_rows[0], selected_rows[1], |
| 7102 | zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1], |
| 7103 | score_skipped[0], score_skipped[1]); |
| 7104 | // this is the end of the PRINT LOOP |
| 7105 | |
| 7106 | // now evaluate which bytes need adjusting |
| 7107 | // collect the new byte values; first init with current best for |
| 7108 | // neighbor use |
| 7109 | for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) { |
| 7110 | rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) & |
| 7111 | RLEVEL_BYTE_MSK; |
| 7112 | } |
| 7113 | |
| 7114 | // this is the start of the BEST BYTE LOOP |
| 7115 | |
| 7116 | for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) { |
| 7117 | int sum = 0, count = 0; |
| 7118 | int count_less = 0, count_same = 0, count_more = 0; |
| 7119 | int count_byte; // save the value we counted around |
| 7120 | // for rank majority use |
| 7121 | int rank_less = 0, rank_same = 0, rank_more = 0; |
| 7122 | int neighbor; |
| 7123 | int neigh_byte; |
| 7124 | |
| 7125 | best_byte = rank_best_bytes[i]; |
| 7126 | orig_best_byte = rank_best_bytes[i]; |
| 7127 | |
| 7128 | // this is the start of the BEST BYTE AVERAGING LOOP |
| 7129 | |
| 7130 | // validate the initial "best" byte by looking at the |
| 7131 | // average of the unskipped byte-column entries |
| 7132 | // we want to do this before we go further, so we can |
| 7133 | // try to start with a better initial value |
| 7134 | // this is the so-called "BESTBUY" patch set |
| 7135 | |
| 7136 | for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; |
| 7137 | ++rtt_idx) { |
| 7138 | rtt_nom = imp_val->rtt_nom_table[rtt_idx]; |
| 7139 | |
| 7140 | for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; |
| 7141 | --rodt_ctl) { |
| 7142 | union cvmx_lmcx_rlevel_rankx temp_rl_rank; |
| 7143 | int temp_score; |
| 7144 | |
| 7145 | // average over all the ranks |
| 7146 | for (orankx = 0; orankx < dimm_count * 4; |
| 7147 | orankx++) { |
| 7148 | // stay on the same DIMM |
| 7149 | if (!(dimm_rank_mask & (1 << orankx))) |
| 7150 | continue; |
| 7151 | |
| 7152 | temp_score = |
| 7153 | rl_score[rtt_nom][rodt_ctl][orankx].score; |
| 7154 | // skip RODT rows in mask, or rows with |
| 7155 | // too high a score; |
| 7156 | // we will not use them for printing or |
| 7157 | // evaluating... |
| 7158 | |
| 7159 | if (!((1 << rodt_ctl) & |
| 7160 | rodt_row_skip_mask) && |
| 7161 | temp_score <= max_rank_score) { |
| 7162 | temp_rl_rank.u64 = |
| 7163 | rl_score[rtt_nom][rodt_ctl][orankx].setting; |
| 7164 | temp_byte = |
| 7165 | (int)(temp_rl_rank.u64 >> byte_sh) & |
| 7166 | RLEVEL_BYTE_MSK; |
| 7167 | sum += temp_byte; |
| 7168 | count++; |
| 7169 | } |
| 7170 | } |
| 7171 | } |
| 7172 | } |
| 7173 | |
| 7174 | // this is the end of the BEST BYTE AVERAGING LOOP |
| 7175 | |
| 7176 | // FIXME: validate count and sum?? |
| 7177 | avg_byte = (int)divide_nint(sum, count); |
| 7178 | avg_diff = best_byte - avg_byte; |
| 7179 | new_byte = best_byte; |
| 7180 | if (avg_diff != 0) { |
| 7181 | // bump best up/dn by 1, not necessarily all the |
| 7182 | // way to avg |
| 7183 | new_byte = best_byte + ((avg_diff > 0) ? -1 : 1); |
| 7184 | } |
| 7185 | |
| 7186 | if (rl_print) { |
| 7187 | debug("N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n", |
| 7188 | node, if_num, rankx, |
| 7189 | i, best_byte, avg_diff, avg_byte, new_byte); |
| 7190 | } |
| 7191 | best_byte = new_byte; |
| 7192 | count_byte = new_byte; // save the value we will count around |
| 7193 | |
| 7194 | // At this point best_byte is either: |
| 7195 | // 1. the original byte-column value from the best scoring |
| 7196 | // RODT row, OR |
| 7197 | // 2. that value bumped toward the average of all the |
| 7198 | // byte-column values |
| 7199 | // |
| 7200 | // best_byte will not change from here on... |
| 7201 | |
| 7202 | // this is the start of the BEST BYTE COUNTING LOOP |
| 7203 | |
| 7204 | // NOTE: we do this next loop separately from above, because |
| 7205 | // we count relative to "best_byte" |
| 7206 | // which may have been modified by the above averaging |
| 7207 | // operation... |
| 7208 | |
| 7209 | for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; |
| 7210 | ++rtt_idx) { |
| 7211 | rtt_nom = imp_val->rtt_nom_table[rtt_idx]; |
| 7212 | |
| 7213 | for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; |
| 7214 | --rodt_ctl) { |
| 7215 | union cvmx_lmcx_rlevel_rankx temp_rl_rank; |
| 7216 | int temp_score; |
| 7217 | |
| 7218 | for (orankx = 0; orankx < dimm_count * 4; |
| 7219 | orankx++) { // count over all the ranks |
| 7220 | // stay on the same DIMM |
| 7221 | if (!(dimm_rank_mask & (1 << orankx))) |
| 7222 | continue; |
| 7223 | |
| 7224 | temp_score = |
| 7225 | rl_score[rtt_nom][rodt_ctl][orankx].score; |
| 7226 | // skip RODT rows in mask, or rows |
| 7227 | // with too high a score; |
| 7228 | // we will not use them for printing |
| 7229 | // or evaluating... |
| 7230 | if (((1 << rodt_ctl) & |
| 7231 | rodt_row_skip_mask) || |
| 7232 | temp_score > max_rank_score) |
| 7233 | continue; |
| 7234 | |
| 7235 | temp_rl_rank.u64 = |
| 7236 | rl_score[rtt_nom][rodt_ctl][orankx].setting; |
| 7237 | temp_byte = (temp_rl_rank.u64 >> |
| 7238 | byte_sh) & RLEVEL_BYTE_MSK; |
| 7239 | |
| 7240 | if (temp_byte == 0) |
| 7241 | ; // do not count it if illegal |
| 7242 | else if (temp_byte == best_byte) |
| 7243 | count_same++; |
| 7244 | else if (temp_byte == best_byte - 1) |
| 7245 | count_less++; |
| 7246 | else if (temp_byte == best_byte + 1) |
| 7247 | count_more++; |
| 7248 | // else do not count anything more |
| 7249 | // than 1 away from the best |
| 7250 | |
| 7251 | // no rank counting if disabled |
| 7252 | if (disable_rank_majority) |
| 7253 | continue; |
| 7254 | |
| 7255 | // FIXME? count is relative to |
| 7256 | // best_byte; should it be rank-based? |
| 7257 | // rank counts only on main rank |
| 7258 | if (orankx != rankx) |
| 7259 | continue; |
| 7260 | else if (temp_byte == best_byte) |
| 7261 | rank_same++; |
| 7262 | else if (temp_byte == best_byte - 1) |
| 7263 | rank_less++; |
| 7264 | else if (temp_byte == best_byte + 1) |
| 7265 | rank_more++; |
| 7266 | } |
| 7267 | } |
| 7268 | } |
| 7269 | |
| 7270 | if (rl_print) { |
| 7271 | debug("N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n", |
| 7272 | node, if_num, rankx, |
| 7273 | i, orig_best_byte, best_byte, |
| 7274 | count_more, count_same, count_less, |
| 7275 | rank_more, rank_same, rank_less); |
| 7276 | } |
| 7277 | |
| 7278 | // this is the end of the BEST BYTE COUNTING LOOP |
| 7279 | |
| 7280 | // choose the new byte value |
| 7281 | // we need to check that there is no gap greater than 2 |
| 7282 | // between adjacent bytes (adjacency depends on DIMM type) |
| 7283 | // use the neighbor value to help decide |
| 7284 | // initially, the rank_best_bytes[] will contain values from |
| 7285 | // the chosen lowest score rank |
| 7286 | new_byte = 0; |
| 7287 | |
| 7288 | // neighbor is index-1 unless we are index 0 or index 8 (ECC) |
| 7289 | neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1); |
| 7290 | neigh_byte = rank_best_bytes[neighbor]; |
| 7291 | |
| 7292 | // can go up or down or stay the same, so look at a numeric |
| 7293 | // average to help |
| 7294 | new_byte = (int)divide_nint(((count_more * (best_byte + 1)) + |
| 7295 | (count_same * (best_byte + 0)) + |
| 7296 | (count_less * (best_byte - 1))), |
| 7297 | max(1, (count_more + count_same + |
| 7298 | count_less))); |
| 7299 | |
| 7300 | // use neighbor to help choose with average |
| 7301 | if (i > 0 && (abs(neigh_byte - new_byte) > 2) && |
| 7302 | !disable_sequential_delay_check) { |
| 7303 | // but not for byte 0 |
| 7304 | int avg_pick = new_byte; |
| 7305 | |
| 7306 | if ((new_byte - best_byte) != 0) { |
| 7307 | // back to best, average did not get better |
| 7308 | new_byte = best_byte; |
| 7309 | } else { |
| 7310 | // avg was the same, still too far, now move |
| 7311 | // it towards the neighbor |
| 7312 | new_byte += (neigh_byte > new_byte) ? 1 : -1; |
| 7313 | } |
| 7314 | |
| 7315 | if (rl_print) { |
| 7316 | debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n", |
| 7317 | node, if_num, rankx, |
| 7318 | i, neighbor, neigh_byte, avg_pick, |
| 7319 | new_byte); |
| 7320 | } |
| 7321 | } else { |
| 7322 | // NOTE: |
| 7323 | // For now, we let the neighbor processing above trump |
| 7324 | // the new simple majority processing here. |
| 7325 | // This is mostly because we have seen no smoking gun |
| 7326 | // for a neighbor bad choice (yet?). |
| 7327 | // Also note that we will ALWAYS be using byte 0 |
| 7328 | // majority, because of the if clause above. |
| 7329 | |
| 7330 | // majority is dependent on the counts, which are |
| 7331 | // relative to best_byte, so start there |
| 7332 | int maj_byte = best_byte; |
| 7333 | int rank_maj; |
| 7334 | int rank_sum; |
| 7335 | |
| 7336 | if (count_more > count_same && |
| 7337 | count_more > count_less) { |
| 7338 | maj_byte++; |
| 7339 | } else if (count_less > count_same && |
| 7340 | count_less > count_more) { |
| 7341 | maj_byte--; |
| 7342 | } |
| 7343 | |
| 7344 | if (maj_byte != new_byte) { |
| 7345 | // print only when majority choice is |
| 7346 | // different from average |
| 7347 | if (rl_print) { |
| 7348 | debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n", |
| 7349 | node, if_num, rankx, i, maj_byte, |
| 7350 | new_byte); |
| 7351 | } |
| 7352 | new_byte = maj_byte; |
| 7353 | } else { |
| 7354 | if (rl_print) { |
| 7355 | debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n", |
| 7356 | node, if_num, rankx, i, new_byte); |
| 7357 | } |
| 7358 | } |
| 7359 | |
| 7360 | if (!disable_rank_majority) { |
| 7361 | // rank majority is dependent on the rank |
| 7362 | // counts, which are relative to best_byte, |
| 7363 | // so start there, and adjust according to the |
| 7364 | // rank counts majority |
| 7365 | rank_maj = best_byte; |
| 7366 | if (rank_more > rank_same && |
| 7367 | rank_more > rank_less) { |
| 7368 | rank_maj++; |
| 7369 | } else if (rank_less > rank_same && |
| 7370 | rank_less > rank_more) { |
| 7371 | rank_maj--; |
| 7372 | } |
| 7373 | rank_sum = rank_more + rank_same + rank_less; |
| 7374 | |
| 7375 | // now, let rank majority possibly rule over |
| 7376 | // the current new_byte however we got it |
| 7377 | if (rank_maj != new_byte) { // only if different |
| 7378 | // Here is where we decide whether to |
| 7379 | // completely apply RANK_MAJORITY or not |
| 7380 | // ignore if less than |
| 7381 | if (rank_maj < new_byte) { |
| 7382 | if (rl_print) { |
| 7383 | debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n", |
| 7384 | node, if_num, |
| 7385 | rankx, i, |
| 7386 | rank_maj, |
| 7387 | new_byte); |
| 7388 | } |
| 7389 | } else { |
| 7390 | // For the moment, we do it |
| 7391 | // ONLY when running 2-slot |
| 7392 | // configs |
| 7393 | // OR when rank_sum is big |
| 7394 | // enough |
| 7395 | if (dimm_count > 1 || |
| 7396 | rank_sum > 2) { |
| 7397 | // print only when rank |
| 7398 | // majority choice is |
| 7399 | // selected |
| 7400 | if (rl_print) { |
| 7401 | debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n", |
| 7402 | node, |
| 7403 | if_num, |
| 7404 | rankx, |
| 7405 | i, |
| 7406 | rank_maj, |
| 7407 | new_byte); |
| 7408 | } |
| 7409 | new_byte = rank_maj; |
| 7410 | } else { |
| 7411 | // FIXME: print some |
| 7412 | // info when we could |
| 7413 | // have chosen RANKMAJ |
| 7414 | // but did not |
| 7415 | if (rl_print) { |
| 7416 | debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n", |
| 7417 | node, |
| 7418 | if_num, |
| 7419 | rankx, |
| 7420 | i, |
| 7421 | rank_maj, |
| 7422 | new_byte, |
| 7423 | best_byte, |
| 7424 | rank_sum); |
| 7425 | } |
| 7426 | } |
| 7427 | } |
| 7428 | } |
| 7429 | } /* if (!disable_rank_majority) */ |
| 7430 | } |
| 7431 | // one last check: |
| 7432 | // if new_byte is still count_byte, BUT there was no count |
| 7433 | // for that value, DO SOMETHING!!! |
| 7434 | // FIXME: go back to original best byte from the best row |
| 7435 | if (new_byte == count_byte && count_same == 0) { |
| 7436 | new_byte = orig_best_byte; |
| 7437 | if (rl_print) { |
| 7438 | debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n", |
| 7439 | node, if_num, rankx, i, new_byte); |
| 7440 | } |
| 7441 | } |
| 7442 | // Look at counts for "perfect" bitmasks (PBMs) if we had |
| 7443 | // any for this byte-lane. |
| 7444 | // Remember, we only counted for DDR4, so zero means none |
| 7445 | // or DDR3, and we bypass this... |
| 7446 | value_mask = rank_perf[rankx].mask[i]; |
| 7447 | disable_rlv_bump_this_byte = 0; |
| 7448 | |
| 7449 | if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) { |
| 7450 | int i, delay_count, delay_max = 0, del_val = 0; |
| 7451 | int num_values = __builtin_popcountll(value_mask); |
| 7452 | int sum_counts = 0; |
| 7453 | u64 temp_mask = value_mask; |
| 7454 | |
| 7455 | disable_rlv_bump_this_byte = 1; |
| 7456 | i = __builtin_ffsll(temp_mask) - 1; |
| 7457 | if (rl_print) |
| 7458 | debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ", |
| 7459 | node, if_num, rankx, i, value_mask >> i, |
| 7460 | num_values); |
| 7461 | |
| 7462 | while (temp_mask != 0) { |
| 7463 | i = __builtin_ffsll(temp_mask) - 1; |
| 7464 | delay_count = rank_perf[rankx].count[i][i]; |
| 7465 | sum_counts += delay_count; |
| 7466 | if (rl_print) |
| 7467 | debug("%2d(%2d) ", i, delay_count); |
| 7468 | if (delay_count >= delay_max) { |
| 7469 | delay_max = delay_count; |
| 7470 | del_val = i; |
| 7471 | } |
| 7472 | temp_mask &= ~(1UL << i); |
| 7473 | } /* while (temp_mask != 0) */ |
| 7474 | |
| 7475 | // if sum_counts is small, just use NEW_BYTE |
| 7476 | if (sum_counts < pbm_lowsum_limit) { |
| 7477 | if (rl_print) |
| 7478 | debug(": LOWSUM (%2d), choose ORIG ", |
| 7479 | sum_counts); |
| 7480 | del_val = new_byte; |
| 7481 | delay_max = rank_perf[rankx].count[i][del_val]; |
| 7482 | } |
| 7483 | |
| 7484 | // finish printing here... |
| 7485 | if (rl_print) { |
| 7486 | debug(": USING %2d (%2d) D%d\n", del_val, |
| 7487 | delay_max, disable_rlv_bump_this_byte); |
| 7488 | } |
| 7489 | |
| 7490 | new_byte = del_val; // override with best PBM choice |
| 7491 | |
| 7492 | } else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) { |
| 7493 | // if (value_mask != 0) { |
| 7494 | int i, delay_count, del_val; |
| 7495 | int num_values = __builtin_popcountll(value_mask); |
| 7496 | int sum_counts = 0; |
| 7497 | u64 temp_mask = value_mask; |
| 7498 | |
| 7499 | i = __builtin_ffsll(temp_mask) - 1; |
| 7500 | if (rl_print) |
| 7501 | debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ", |
| 7502 | node, if_num, rankx, i, value_mask >> i, |
| 7503 | num_values); |
| 7504 | while (temp_mask != 0) { |
| 7505 | i = __builtin_ffsll(temp_mask) - 1; |
| 7506 | delay_count = rank_perf[rankx].count[i][i]; |
| 7507 | sum_counts += delay_count; |
| 7508 | if (rl_print) |
| 7509 | debug("%2d(%2d) ", i, delay_count); |
| 7510 | temp_mask &= ~(1UL << i); |
| 7511 | } /* while (temp_mask != 0) */ |
| 7512 | |
| 7513 | del_val = __builtin_ffsll(value_mask) - 1; |
| 7514 | delay_count = |
| 7515 | rank_perf[rankx].count[i][del_val]; |
| 7516 | |
| 7517 | // overkill, normally only 1-4 bits |
| 7518 | i = (value_mask >> del_val) & 0x1F; |
| 7519 | |
| 7520 | // if sum_counts is small, treat as special and use |
| 7521 | // NEW_BYTE |
| 7522 | if (sum_counts < pbm_lowsum_limit) { |
| 7523 | if (rl_print) |
| 7524 | debug(": LOWSUM (%2d), choose ORIG", |
| 7525 | sum_counts); |
| 7526 | i = 99; // SPECIAL case... |
| 7527 | } |
| 7528 | |
| 7529 | switch (i) { |
| 7530 | case 0x01 /* 00001b */: |
| 7531 | // allow BUMP |
| 7532 | break; |
| 7533 | |
| 7534 | case 0x13 /* 10011b */: |
| 7535 | case 0x0B /* 01011b */: |
| 7536 | case 0x03 /* 00011b */: |
| 7537 | del_val += 1; // take the second |
| 7538 | disable_rlv_bump_this_byte = 1; // allow no BUMP |
| 7539 | break; |
| 7540 | |
| 7541 | case 0x0D /* 01101b */: |
| 7542 | case 0x05 /* 00101b */: |
| 7543 | // test count of lowest and all |
| 7544 | if (delay_count >= 5 || sum_counts <= 5) |
| 7545 | del_val += 1; // take the hole |
| 7546 | else |
| 7547 | del_val += 2; // take the next set |
| 7548 | disable_rlv_bump_this_byte = 1; // allow no BUMP |
| 7549 | break; |
| 7550 | |
| 7551 | case 0x0F /* 01111b */: |
| 7552 | case 0x17 /* 10111b */: |
| 7553 | case 0x07 /* 00111b */: |
| 7554 | del_val += 1; // take the second |
| 7555 | if (delay_count < 5) { // lowest count is small |
| 7556 | int second = |
| 7557 | rank_perf[rankx].count[i][del_val]; |
| 7558 | int third = |
| 7559 | rank_perf[rankx].count[i][del_val + 1]; |
| 7560 | // test if middle is more than 1 OR |
| 7561 | // top is more than 1; |
| 7562 | // this means if they are BOTH 1, |
| 7563 | // then we keep the second... |
| 7564 | if (second > 1 || third > 1) { |
| 7565 | // if middle is small OR top |
| 7566 | // is large |
| 7567 | if (second < 5 || |
| 7568 | third > 1) { |
| 7569 | // take the top |
| 7570 | del_val += 1; |
| 7571 | if (rl_print) |
| 7572 | debug(": TOP7 "); |
| 7573 | } |
| 7574 | } |
| 7575 | } |
| 7576 | disable_rlv_bump_this_byte = 1; // allow no BUMP |
| 7577 | break; |
| 7578 | |
| 7579 | default: // all others... |
| 7580 | if (rl_print) |
| 7581 | debug(": ABNORMAL, choose ORIG"); |
| 7582 | |
| 7583 | case 99: // special |
| 7584 | // FIXME: choose original choice? |
| 7585 | del_val = new_byte; |
| 7586 | disable_rlv_bump_this_byte = 1; // allow no BUMP |
| 7587 | break; |
| 7588 | } |
| 7589 | delay_count = |
| 7590 | rank_perf[rankx].count[i][del_val]; |
| 7591 | |
| 7592 | // finish printing here... |
| 7593 | if (rl_print) |
| 7594 | debug(": USING %2d (%2d) D%d\n", del_val, |
| 7595 | delay_count, disable_rlv_bump_this_byte); |
| 7596 | new_byte = del_val; // override with best PBM choice |
| 7597 | } else { |
| 7598 | if (ddr_type == DDR4_DRAM) { // only report when DDR4 |
| 7599 | // FIXME: remove or increase VBL for this |
| 7600 | // output... |
| 7601 | if (rl_print) |
| 7602 | debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n", |
| 7603 | node, if_num, rankx, i, |
| 7604 | new_byte); |
| 7605 | // prevent ODD bump, rely on original |
| 7606 | disable_rlv_bump_this_byte = 1; |
| 7607 | } |
| 7608 | } /* if (value_mask != 0) */ |
| 7609 | |
| 7610 | // optionally bump the delay value |
| 7611 | if (enable_rldelay_bump && !disable_rlv_bump_this_byte) { |
| 7612 | if ((new_byte & enable_rldelay_bump) == |
| 7613 | enable_rldelay_bump) { |
| 7614 | int bump_value = new_byte + rldelay_bump_incr; |
| 7615 | |
| 7616 | if (rl_print) { |
| 7617 | debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n", |
| 7618 | node, if_num, rankx, i, |
| 7619 | new_byte, bump_value, |
| 7620 | (value_mask & |
| 7621 | (1 << bump_value)) ? |
| 7622 | "PBM" : "NOPBM"); |
| 7623 | } |
| 7624 | new_byte = bump_value; |
| 7625 | } |
| 7626 | } |
| 7627 | |
| 7628 | // last checks for count-related purposes |
| 7629 | if (new_byte == best_byte && count_more > 0 && |
| 7630 | count_less == 0) { |
| 7631 | // we really should take best_byte + 1 |
| 7632 | if (rl_print) { |
| 7633 | debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n", |
| 7634 | node, if_num, rankx, i, |
| 7635 | new_byte, best_byte + 1); |
| 7636 | new_byte = best_byte + 1; |
| 7637 | } |
| 7638 | } else if ((new_byte < best_byte) && (count_same > 0)) { |
| 7639 | // we really should take best_byte |
| 7640 | if (rl_print) { |
| 7641 | debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n", |
| 7642 | node, if_num, rankx, i, |
| 7643 | new_byte, best_byte); |
| 7644 | new_byte = best_byte; |
| 7645 | } |
| 7646 | } else if (new_byte > best_byte) { |
| 7647 | if ((new_byte == (best_byte + 1)) && |
| 7648 | count_more == 0 && count_less > 0) { |
| 7649 | // we really should take best_byte |
| 7650 | if (rl_print) { |
| 7651 | debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n", |
| 7652 | node, if_num, rankx, i, |
| 7653 | new_byte, best_byte); |
| 7654 | new_byte = best_byte; |
| 7655 | } |
| 7656 | } else if ((new_byte >= (best_byte + 2)) && |
| 7657 | ((count_more > 0) || (count_same > 0))) { |
| 7658 | if (rl_print) { |
| 7659 | debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n", |
| 7660 | node, if_num, rankx, i, |
| 7661 | new_byte, best_byte + 1); |
| 7662 | new_byte = best_byte + 1; |
| 7663 | } |
| 7664 | } |
| 7665 | } |
| 7666 | |
| 7667 | if (rl_print) { |
| 7668 | debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n", |
| 7669 | node, if_num, rankx, i, orig_best_byte, |
| 7670 | best_byte, count_more, count_same, count_less, |
| 7671 | new_byte); |
| 7672 | } |
| 7673 | |
| 7674 | // update the byte with the new value (NOTE: orig value in |
| 7675 | // the CSR may not be current "best") |
| 7676 | upd_rl_rank(&rl_rank, i, new_byte); |
| 7677 | |
| 7678 | // save new best for neighbor use |
| 7679 | rank_best_bytes[i] = new_byte; |
| 7680 | } /* for (i = 0; i < 8+ecc_ena; i++) */ |
| 7681 | |
| 7682 | ////////////////// this is the end of the BEST BYTE LOOP |
| 7683 | |
| 7684 | if (saved_rl_rank.u64 != rl_rank.u64) { |
| 7685 | lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), |
| 7686 | rl_rank.u64); |
| 7687 | rl_rank.u64 = lmc_rd(priv, |
| 7688 | CVMX_LMCX_RLEVEL_RANKX(rankx, if_num)); |
| 7689 | debug("Adjusting Read-Leveling per-RANK settings.\n"); |
| 7690 | } else { |
| 7691 | debug("Not Adjusting Read-Leveling per-RANK settings.\n"); |
| 7692 | } |
| 7693 | display_rl_with_final(if_num, rl_rank, rankx); |
| 7694 | |
| 7695 | // FIXME: does this help make the output a little easier to focus? |
| 7696 | if (rl_print > 0) |
| 7697 | debug("-----------\n"); |
| 7698 | |
| 7699 | #define RLEVEL_RANKX_EXTRAS_INCR 0 |
| 7700 | // if there are unused entries to be filled |
| 7701 | if ((rank_mask & 0x0f) != 0x0f) { |
| 7702 | // copy the current rank |
| 7703 | union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank; |
| 7704 | |
| 7705 | if (rankx < 3) { |
| 7706 | #if RLEVEL_RANKX_EXTRAS_INCR > 0 |
| 7707 | int byte, delay; |
| 7708 | |
| 7709 | // modify the copy in prep for writing to empty slot(s) |
| 7710 | for (byte = 0; byte < 9; byte++) { |
| 7711 | delay = get_rl_rank(&temp_rl_rank, byte) + |
| 7712 | RLEVEL_RANKX_EXTRAS_INCR; |
| 7713 | if (delay > RLEVEL_BYTE_MSK) |
| 7714 | delay = RLEVEL_BYTE_MSK; |
| 7715 | upd_rl_rank(&temp_rl_rank, byte, delay); |
| 7716 | } |
| 7717 | #endif |
| 7718 | |
| 7719 | // if rank 0, write rank 1 and rank 2 here if empty |
| 7720 | if (rankx == 0) { |
| 7721 | // check that rank 1 is empty |
| 7722 | if (!(rank_mask & (1 << 1))) { |
| 7723 | debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", |
| 7724 | node, if_num, rankx, 1); |
| 7725 | lmc_wr(priv, |
| 7726 | CVMX_LMCX_RLEVEL_RANKX(1, |
| 7727 | if_num), |
| 7728 | temp_rl_rank.u64); |
| 7729 | } |
| 7730 | |
| 7731 | // check that rank 2 is empty |
| 7732 | if (!(rank_mask & (1 << 2))) { |
| 7733 | debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", |
| 7734 | node, if_num, rankx, 2); |
| 7735 | lmc_wr(priv, |
| 7736 | CVMX_LMCX_RLEVEL_RANKX(2, |
| 7737 | if_num), |
| 7738 | temp_rl_rank.u64); |
| 7739 | } |
| 7740 | } |
| 7741 | |
| 7742 | // if ranks 0, 1 or 2, write rank 3 here if empty |
| 7743 | // check that rank 3 is empty |
| 7744 | if (!(rank_mask & (1 << 3))) { |
| 7745 | debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n", |
| 7746 | node, if_num, rankx, 3); |
| 7747 | lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num), |
| 7748 | temp_rl_rank.u64); |
| 7749 | } |
| 7750 | } |
| 7751 | } |
| 7752 | } |
| 7753 | |
| 7754 | static void lmc_read_leveling(struct ddr_priv *priv) |
| 7755 | { |
| 7756 | struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4]; |
| 7757 | union cvmx_lmcx_control ctl; |
| 7758 | union cvmx_lmcx_config cfg; |
| 7759 | int rankx; |
| 7760 | char *s; |
| 7761 | int i; |
| 7762 | |
| 7763 | /* |
| 7764 | * 4.8.10 LMC Read Leveling |
| 7765 | * |
| 7766 | * LMC supports an automatic read-leveling separately per byte-lane |
| 7767 | * using the DDR3 multipurpose register predefined pattern for system |
| 7768 | * calibration defined in the JEDEC DDR3 specifications. |
| 7769 | * |
| 7770 | * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations |
| 7771 | * must be completed prior to starting this LMC read-leveling sequence. |
| 7772 | * |
| 7773 | * Software could simply write the desired read-leveling values into |
| 7774 | * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses |
| 7775 | * LMC's autoread-leveling capabilities. |
| 7776 | * |
| 7777 | * When LMC does the read-leveling sequence for a rank, it first enables |
| 7778 | * the DDR3 multipurpose register predefined pattern for system |
| 7779 | * calibration on the selected DRAM rank via a DDR3 MR3 write, then |
| 7780 | * executes 64 RD operations at different internal delay settings, then |
| 7781 | * disables the predefined pattern via another DDR3 MR3 write |
| 7782 | * operation. LMC determines the pass or fail of each of the 64 settings |
| 7783 | * independently for each byte lane, then writes appropriate |
| 7784 | * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank. |
| 7785 | * |
| 7786 | * After read-leveling for a rank, software can read the 64 pass/fail |
| 7787 | * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK]. |
| 7788 | * Software can observe all pass/fail results for all byte lanes in a |
| 7789 | * rank via separate read-leveling sequences on the rank with different |
| 7790 | * LMC(0)_RLEVEL_CTL[BYTE] values. |
| 7791 | * |
| 7792 | * The 64 pass/fail results will typically have failures for the low |
| 7793 | * delays, followed by a run of some passing settings, followed by more |
| 7794 | * failures in the remaining high delays. LMC sets |
| 7795 | * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings. |
| 7796 | * First, LMC selects the longest run of successes in the 64 results. |
| 7797 | * (In the unlikely event that there is more than one longest run, LMC |
| 7798 | * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and |
| 7799 | * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes, |
| 7800 | * LMC selects the last passing setting in the run minus |
| 7801 | * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting |
| 7802 | * in the run (rounding earlier when necessary). We expect the |
| 7803 | * read-leveling sequence to produce good results with the reset values |
| 7804 | * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2. |
| 7805 | * |
| 7806 | * The read-leveling sequence has the following steps: |
| 7807 | * |
| 7808 | * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings. |
| 7809 | * Do the remaining substeps 2-4 separately for each rank i with |
| 7810 | * attached DRAM. |
| 7811 | * |
| 7812 | * 2. Without changing any other fields in LMC(0)_CONFIG, |
| 7813 | * |
| 7814 | * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling |
| 7815 | * |
| 7816 | * o write LMC(0)_CONFIG[RANKMASK] = (1 << i) |
| 7817 | * |
| 7818 | * o write LMC(0)_SEQ_CTL[INIT_START] = 1 |
| 7819 | * |
| 7820 | * This initiates the previously-described read-leveling. |
| 7821 | * |
| 7822 | * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2 |
| 7823 | * |
| 7824 | * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte |
| 7825 | * lanes at this point. |
| 7826 | * |
| 7827 | * If ECC DRAM is not present (i.e. when DRAM is not attached to the |
| 7828 | * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and |
| 7829 | * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] = |
| 7830 | * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] = |
| 7831 | * LMC(0)_RLEVEL_RANK*[BYTE0]. |
| 7832 | * |
| 7833 | * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to |
| 7834 | * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by |
| 7835 | * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify |
| 7836 | * LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all |
| 7837 | * BITMASKs can be observed. |
| 7838 | * |
| 7839 | * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks. |
| 7840 | * |
| 7841 | * Let rank i be a rank with attached DRAM. |
| 7842 | * |
| 7843 | * For all ranks j that do not have attached DRAM, set |
| 7844 | * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi. |
| 7845 | * |
| 7846 | * This read-leveling sequence can help select the proper CN70XX ODT |
| 7847 | * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated |
| 7848 | * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is |
| 7849 | * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk] |
| 7850 | * (for a used byte lane k) can indicate that the CN70XX ODT value is |
| 7851 | * bad. It is possible to simultaneously optimize both |
| 7852 | * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by |
| 7853 | * performing this read-leveling sequence for several |
| 7854 | * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the |
| 7855 | * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks. |
| 7856 | */ |
| 7857 | |
| 7858 | rl_rodt_err = 0; |
| 7859 | rl_dbg_loops = 1; |
| 7860 | saved_int_zqcs_dis = 0; |
| 7861 | max_adj_rl_del_inc = 0; |
| 7862 | rl_print = RLEVEL_PRINTALL_DEFAULT; |
| 7863 | |
| 7864 | #ifdef ENABLE_HARDCODED_RLEVEL |
| 7865 | part_number[21] = {0}; |
| 7866 | #endif /* ENABLE_HARDCODED_RLEVEL */ |
| 7867 | |
| 7868 | pbm_lowsum_limit = 5; // FIXME: is this a good default? |
| 7869 | // FIXME: PBM skip for RODT 240 and 34 |
| 7870 | pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) | |
| 7871 | (1U << ddr4_rodt_ctl_34_ohm); |
| 7872 | |
| 7873 | disable_rank_majority = 0; // control rank majority processing |
| 7874 | |
| 7875 | // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE |
| 7876 | // for DDR3 |
| 7877 | rldelay_bump_incr = 0; |
| 7878 | disable_rlv_bump_this_byte = 0; |
| 7879 | |
| 7880 | enable_rldelay_bump = (ddr_type == DDR4_DRAM) ? |
| 7881 | ((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0; |
| 7882 | |
| 7883 | s = lookup_env(priv, "ddr_disable_rank_majority"); |
| 7884 | if (s) |
| 7885 | disable_rank_majority = !!simple_strtoul(s, NULL, 0); |
| 7886 | |
| 7887 | s = lookup_env(priv, "ddr_pbm_lowsum_limit"); |
| 7888 | if (s) |
| 7889 | pbm_lowsum_limit = simple_strtoul(s, NULL, 0); |
| 7890 | |
| 7891 | s = lookup_env(priv, "ddr_pbm_rodt_skip"); |
| 7892 | if (s) |
| 7893 | pbm_rodt_skip = simple_strtoul(s, NULL, 0); |
| 7894 | memset(rank_perf, 0, sizeof(rank_perf)); |
| 7895 | |
| 7896 | ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 7897 | save_ddr2t = ctl.cn78xx.ddr2t; |
| 7898 | |
| 7899 | cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num)); |
| 7900 | ecc_ena = cfg.cn78xx.ecc_ena; |
| 7901 | |
| 7902 | s = lookup_env(priv, "ddr_rlevel_2t"); |
| 7903 | if (s) |
| 7904 | ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0); |
| 7905 | |
| 7906 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); |
| 7907 | |
| 7908 | debug("LMC%d: Performing Read-Leveling\n", if_num); |
| 7909 | |
| 7910 | rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num)); |
| 7911 | |
| 7912 | rl_samples = c_cfg->rlevel_average_loops; |
| 7913 | if (rl_samples == 0) { |
| 7914 | rl_samples = RLEVEL_SAMPLES_DEFAULT; |
| 7915 | // up the samples for these cases |
| 7916 | if (dimm_count == 1 || num_ranks == 1) |
| 7917 | rl_samples = rl_samples * 2 + 1; |
| 7918 | } |
| 7919 | |
| 7920 | rl_compute = c_cfg->rlevel_compute; |
| 7921 | rl_ctl.cn78xx.offset_en = c_cfg->offset_en; |
| 7922 | rl_ctl.cn78xx.offset = spd_rdimm |
| 7923 | ? c_cfg->offset_rdimm |
| 7924 | : c_cfg->offset_udimm; |
| 7925 | |
| 7926 | int value = 1; // should ALWAYS be set |
| 7927 | |
| 7928 | s = lookup_env(priv, "ddr_rlevel_delay_unload"); |
| 7929 | if (s) |
| 7930 | value = !!simple_strtoul(s, NULL, 0); |
| 7931 | rl_ctl.cn78xx.delay_unload_0 = value; |
| 7932 | rl_ctl.cn78xx.delay_unload_1 = value; |
| 7933 | rl_ctl.cn78xx.delay_unload_2 = value; |
| 7934 | rl_ctl.cn78xx.delay_unload_3 = value; |
| 7935 | |
| 7936 | // use OR_DIS=1 to try for better results |
| 7937 | rl_ctl.cn78xx.or_dis = 1; |
| 7938 | |
| 7939 | /* |
| 7940 | * If we will be switching to 32bit mode level based on only |
| 7941 | * four bits because there are only 4 ECC bits. |
| 7942 | */ |
| 7943 | rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F; |
| 7944 | |
| 7945 | // allow overrides |
| 7946 | s = lookup_env(priv, "ddr_rlevel_ctl_or_dis"); |
| 7947 | if (s) |
| 7948 | rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0); |
| 7949 | |
| 7950 | s = lookup_env(priv, "ddr_rlevel_ctl_bitmask"); |
| 7951 | if (s) |
| 7952 | rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0); |
| 7953 | |
| 7954 | rl_comp_offs = spd_rdimm |
| 7955 | ? c_cfg->rlevel_comp_offset_rdimm |
| 7956 | : c_cfg->rlevel_comp_offset_udimm; |
| 7957 | s = lookup_env(priv, "ddr_rlevel_comp_offset"); |
| 7958 | if (s) |
| 7959 | rl_comp_offs = strtoul(s, NULL, 0); |
| 7960 | |
| 7961 | s = lookup_env(priv, "ddr_rlevel_offset"); |
| 7962 | if (s) |
| 7963 | rl_ctl.cn78xx.offset = simple_strtoul(s, NULL, 0); |
| 7964 | |
| 7965 | s = lookup_env(priv, "ddr_rlevel_offset_en"); |
| 7966 | if (s) |
| 7967 | rl_ctl.cn78xx.offset_en = simple_strtoul(s, NULL, 0); |
| 7968 | |
| 7969 | s = lookup_env(priv, "ddr_rlevel_ctl"); |
| 7970 | if (s) |
| 7971 | rl_ctl.u64 = simple_strtoul(s, NULL, 0); |
| 7972 | |
| 7973 | lmc_wr(priv, |
| 7974 | CVMX_LMCX_RLEVEL_CTL(if_num), |
| 7975 | rl_ctl.u64); |
| 7976 | |
| 7977 | // do this here so we can look at final RLEVEL_CTL[offset] setting... |
| 7978 | s = lookup_env(priv, "ddr_enable_rldelay_bump"); |
| 7979 | if (s) { |
| 7980 | // also use as mask bits |
| 7981 | enable_rldelay_bump = strtoul(s, NULL, 0); |
| 7982 | } |
| 7983 | |
| 7984 | if (enable_rldelay_bump != 0) |
| 7985 | rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1; |
| 7986 | |
| 7987 | s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num); |
| 7988 | if (s) |
| 7989 | rl_dbg_loops = simple_strtoul(s, NULL, 0); |
| 7990 | |
| 7991 | s = lookup_env(priv, "ddr_rtt_nom_auto"); |
| 7992 | if (s) |
| 7993 | ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0); |
| 7994 | |
| 7995 | s = lookup_env(priv, "ddr_rlevel_average"); |
| 7996 | if (s) |
| 7997 | rl_samples = simple_strtoul(s, NULL, 0); |
| 7998 | |
| 7999 | s = lookup_env(priv, "ddr_rlevel_compute"); |
| 8000 | if (s) |
| 8001 | rl_compute = simple_strtoul(s, NULL, 0); |
| 8002 | |
| 8003 | s = lookup_env(priv, "ddr_rlevel_printall"); |
| 8004 | if (s) |
| 8005 | rl_print = simple_strtoul(s, NULL, 0); |
| 8006 | |
| 8007 | debug("RLEVEL_CTL : 0x%016llx\n", |
| 8008 | rl_ctl.u64); |
| 8009 | debug("RLEVEL_OFFSET : %6d\n", |
| 8010 | rl_ctl.cn78xx.offset); |
| 8011 | debug("RLEVEL_OFFSET_EN : %6d\n", |
| 8012 | rl_ctl.cn78xx.offset_en); |
| 8013 | |
| 8014 | /* |
| 8015 | * The purpose for the indexed table is to sort the settings |
| 8016 | * by the ohm value to simplify the testing when incrementing |
| 8017 | * through the settings. (index => ohms) 1=120, 2=60, 3=40, |
| 8018 | * 4=30, 5=20 |
| 8019 | */ |
| 8020 | min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ? |
| 8021 | 1 : c_cfg->min_rtt_nom_idx; |
| 8022 | max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ? |
| 8023 | 5 : c_cfg->max_rtt_nom_idx; |
| 8024 | |
| 8025 | min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl; |
| 8026 | max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl; |
| 8027 | |
| 8028 | s = lookup_env(priv, "ddr_min_rodt_ctl"); |
| 8029 | if (s) |
| 8030 | min_rodt_ctl = simple_strtoul(s, NULL, 0); |
| 8031 | |
| 8032 | s = lookup_env(priv, "ddr_max_rodt_ctl"); |
| 8033 | if (s) |
| 8034 | max_rodt_ctl = simple_strtoul(s, NULL, 0); |
| 8035 | |
| 8036 | s = lookup_env(priv, "ddr_min_rtt_nom_idx"); |
| 8037 | if (s) |
| 8038 | min_rtt_nom_idx = simple_strtoul(s, NULL, 0); |
| 8039 | |
| 8040 | s = lookup_env(priv, "ddr_max_rtt_nom_idx"); |
| 8041 | if (s) |
| 8042 | max_rtt_nom_idx = simple_strtoul(s, NULL, 0); |
| 8043 | |
| 8044 | #ifdef ENABLE_HARDCODED_RLEVEL |
| 8045 | if (c_cfg->rl_tbl) { |
| 8046 | /* Check for hard-coded read-leveling settings */ |
| 8047 | get_dimm_part_number(part_number, &dimm_config_table[0], |
| 8048 | 0, ddr_type); |
| 8049 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 8050 | if (!(rank_mask & (1 << rankx))) |
| 8051 | continue; |
| 8052 | |
| 8053 | rl_rank.u64 = lmc_rd(priv, |
| 8054 | CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 8055 | if_num)); |
| 8056 | |
| 8057 | i = 0; |
| 8058 | while (c_cfg->rl_tbl[i].part) { |
| 8059 | debug("DIMM part number:\"%s\", SPD: \"%s\"\n", |
| 8060 | c_cfg->rl_tbl[i].part, part_number); |
| 8061 | if ((strcmp(part_number, |
| 8062 | c_cfg->rl_tbl[i].part) == 0) && |
| 8063 | (abs(c_cfg->rl_tbl[i].speed - |
| 8064 | 2 * ddr_hertz / (1000 * 1000)) < 10)) { |
| 8065 | debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n", |
| 8066 | part_number); |
| 8067 | rl_rank.u64 = |
| 8068 | c_cfg->rl_tbl[i].rl_rank[if_num][rankx]; |
| 8069 | lmc_wr(priv, |
| 8070 | CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 8071 | if_num), |
| 8072 | rl_rank.u64); |
| 8073 | rl_rank.u64 = |
| 8074 | lmc_rd(priv, |
| 8075 | CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 8076 | if_num)); |
| 8077 | display_rl(if_num, rl_rank, rankx); |
| 8078 | /* Disable h/w read-leveling */ |
| 8079 | rl_dbg_loops = 0; |
| 8080 | break; |
| 8081 | } |
| 8082 | ++i; |
| 8083 | } |
| 8084 | } |
| 8085 | } |
| 8086 | #endif /* ENABLE_HARDCODED_RLEVEL */ |
| 8087 | |
| 8088 | max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment; |
| 8089 | s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment"); |
| 8090 | if (s) |
| 8091 | max_adj_rl_del_inc = strtoul(s, NULL, 0); |
| 8092 | |
| 8093 | while (rl_dbg_loops--) { |
| 8094 | union cvmx_lmcx_modereg_params1 mp1; |
| 8095 | union cvmx_lmcx_comp_ctl2 cc2; |
| 8096 | |
| 8097 | /* Initialize the error scoreboard */ |
| 8098 | memset(rl_score, 0, sizeof(rl_score)); |
| 8099 | |
| 8100 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8101 | saved_ddr__ptune = cc2.cn78xx.ddr__ptune; |
| 8102 | saved_ddr__ntune = cc2.cn78xx.ddr__ntune; |
| 8103 | |
| 8104 | /* Disable dynamic compensation settings */ |
| 8105 | if (rl_comp_offs != 0) { |
| 8106 | cc2.cn78xx.ptune = saved_ddr__ptune; |
| 8107 | cc2.cn78xx.ntune = saved_ddr__ntune; |
| 8108 | |
| 8109 | /* |
| 8110 | * Round up the ptune calculation to bias the odd |
| 8111 | * cases toward ptune |
| 8112 | */ |
| 8113 | cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2); |
| 8114 | cc2.cn78xx.ntune -= rl_comp_offs / 2; |
| 8115 | |
| 8116 | ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 8117 | saved_int_zqcs_dis = ctl.s.int_zqcs_dis; |
| 8118 | /* Disable ZQCS while in bypass. */ |
| 8119 | ctl.s.int_zqcs_dis = 1; |
| 8120 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); |
| 8121 | |
| 8122 | cc2.cn78xx.byp = 1; /* Enable bypass mode */ |
| 8123 | lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); |
| 8124 | lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8125 | /* Read again */ |
| 8126 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8127 | debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n", |
| 8128 | cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune); |
| 8129 | } |
| 8130 | |
| 8131 | mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num)); |
| 8132 | |
| 8133 | for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; |
| 8134 | ++rtt_idx) { |
| 8135 | rtt_nom = imp_val->rtt_nom_table[rtt_idx]; |
| 8136 | |
| 8137 | /* |
| 8138 | * When the read ODT mask is zero the dyn_rtt_nom_mask |
| 8139 | * is zero than RTT_NOM will not be changing during |
| 8140 | * read-leveling. Since the value is fixed we only need |
| 8141 | * to test it once. |
| 8142 | */ |
| 8143 | if (dyn_rtt_nom_mask == 0) { |
| 8144 | // flag not to print NOM ohms |
| 8145 | print_nom_ohms = -1; |
| 8146 | } else { |
| 8147 | if (dyn_rtt_nom_mask & 1) |
| 8148 | mp1.s.rtt_nom_00 = rtt_nom; |
| 8149 | if (dyn_rtt_nom_mask & 2) |
| 8150 | mp1.s.rtt_nom_01 = rtt_nom; |
| 8151 | if (dyn_rtt_nom_mask & 4) |
| 8152 | mp1.s.rtt_nom_10 = rtt_nom; |
| 8153 | if (dyn_rtt_nom_mask & 8) |
| 8154 | mp1.s.rtt_nom_11 = rtt_nom; |
| 8155 | // FIXME? rank 0 ohms always? |
| 8156 | print_nom_ohms = |
| 8157 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00]; |
| 8158 | } |
| 8159 | |
| 8160 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), |
| 8161 | mp1.u64); |
| 8162 | |
| 8163 | if (print_nom_ohms >= 0 && rl_print > 1) { |
| 8164 | debug("\n"); |
| 8165 | debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 8166 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11], |
| 8167 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10], |
| 8168 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01], |
| 8169 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00], |
| 8170 | mp1.s.rtt_nom_11, |
| 8171 | mp1.s.rtt_nom_10, |
| 8172 | mp1.s.rtt_nom_01, |
| 8173 | mp1.s.rtt_nom_00); |
| 8174 | } |
| 8175 | |
| 8176 | ddr_init_seq(priv, rank_mask, if_num); |
| 8177 | |
| 8178 | // Try RANK outside RODT to rearrange the output... |
| 8179 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 8180 | if (!(rank_mask & (1 << rankx))) |
| 8181 | continue; |
| 8182 | |
| 8183 | for (rodt_ctl = max_rodt_ctl; |
| 8184 | rodt_ctl >= min_rodt_ctl; --rodt_ctl) |
| 8185 | rodt_loop(priv, rankx, rl_score); |
| 8186 | } |
| 8187 | } |
| 8188 | |
| 8189 | /* Re-enable dynamic compensation settings. */ |
| 8190 | if (rl_comp_offs != 0) { |
| 8191 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8192 | |
| 8193 | cc2.cn78xx.ptune = 0; |
| 8194 | cc2.cn78xx.ntune = 0; |
| 8195 | cc2.cn78xx.byp = 0; /* Disable bypass mode */ |
| 8196 | lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); |
| 8197 | /* Read once */ |
| 8198 | lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8199 | |
| 8200 | /* Read again */ |
| 8201 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8202 | debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n", |
| 8203 | cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune); |
| 8204 | |
| 8205 | ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 8206 | /* Restore original setting */ |
| 8207 | ctl.s.int_zqcs_dis = saved_int_zqcs_dis; |
| 8208 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); |
| 8209 | } |
| 8210 | |
| 8211 | int override_compensation = 0; |
| 8212 | |
| 8213 | s = lookup_env(priv, "ddr__ptune"); |
| 8214 | if (s) |
| 8215 | saved_ddr__ptune = strtoul(s, NULL, 0); |
| 8216 | |
| 8217 | s = lookup_env(priv, "ddr__ntune"); |
| 8218 | if (s) { |
| 8219 | saved_ddr__ntune = strtoul(s, NULL, 0); |
| 8220 | override_compensation = 1; |
| 8221 | } |
| 8222 | |
| 8223 | if (override_compensation) { |
| 8224 | cc2.cn78xx.ptune = saved_ddr__ptune; |
| 8225 | cc2.cn78xx.ntune = saved_ddr__ntune; |
| 8226 | |
| 8227 | ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 8228 | saved_int_zqcs_dis = ctl.s.int_zqcs_dis; |
| 8229 | /* Disable ZQCS while in bypass. */ |
| 8230 | ctl.s.int_zqcs_dis = 1; |
| 8231 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); |
| 8232 | |
| 8233 | cc2.cn78xx.byp = 1; /* Enable bypass mode */ |
| 8234 | lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); |
| 8235 | /* Read again */ |
| 8236 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8237 | |
| 8238 | debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n", |
| 8239 | cc2.cn78xx.ptune, cc2.cn78xx.ntune); |
| 8240 | } |
| 8241 | |
| 8242 | /* Evaluation block */ |
| 8243 | /* Still at initial value? */ |
| 8244 | int best_rodt_score = DEFAULT_BEST_RANK_SCORE; |
| 8245 | int auto_rodt_ctl = 0; |
| 8246 | int auto_rtt_nom = 0; |
| 8247 | int rodt_score; |
| 8248 | |
| 8249 | rodt_row_skip_mask = 0; |
| 8250 | |
| 8251 | // just add specific RODT rows to the skip mask for DDR4 |
| 8252 | // at this time... |
| 8253 | if (ddr_type == DDR4_DRAM) { |
| 8254 | // skip RODT row 34 ohms for all DDR4 types |
| 8255 | rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm); |
| 8256 | // skip RODT row 40 ohms for all DDR4 types |
| 8257 | rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm); |
| 8258 | // For now, do not skip RODT row 40 or 48 ohm when |
| 8259 | // ddr_hertz is above 1075 MHz |
| 8260 | if (ddr_hertz > 1075000000) { |
| 8261 | // noskip RODT row 40 ohms |
| 8262 | rodt_row_skip_mask &= |
| 8263 | ~(1 << ddr4_rodt_ctl_40_ohm); |
| 8264 | // noskip RODT row 48 ohms |
| 8265 | rodt_row_skip_mask &= |
| 8266 | ~(1 << ddr4_rodt_ctl_48_ohm); |
| 8267 | } |
| 8268 | // For now, do not skip RODT row 48 ohm for 2Rx4 |
| 8269 | // stacked die DIMMs |
| 8270 | if (is_stacked_die && num_ranks == 2 && |
| 8271 | dram_width == 4) { |
| 8272 | // noskip RODT row 48 ohms |
| 8273 | rodt_row_skip_mask &= |
| 8274 | ~(1 << ddr4_rodt_ctl_48_ohm); |
| 8275 | } |
| 8276 | // for now, leave all rows eligible when we have |
| 8277 | // mini-DIMMs... |
| 8278 | if (spd_dimm_type == 5 || spd_dimm_type == 6) |
| 8279 | rodt_row_skip_mask = 0; |
| 8280 | // for now, leave all rows eligible when we have |
| 8281 | // a 2-slot 1-rank config |
| 8282 | if (dimm_count == 2 && num_ranks == 1) |
| 8283 | rodt_row_skip_mask = 0; |
| 8284 | |
| 8285 | debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n"); |
| 8286 | for (rtt_idx = min_rtt_nom_idx; |
| 8287 | rtt_idx <= max_rtt_nom_idx; ++rtt_idx) { |
| 8288 | rtt_nom = imp_val->rtt_nom_table[rtt_idx]; |
| 8289 | |
| 8290 | for (rodt_ctl = max_rodt_ctl; |
| 8291 | rodt_ctl >= min_rodt_ctl; --rodt_ctl) { |
| 8292 | rodt_score = 0; |
| 8293 | for (rankx = 0; rankx < dimm_count * 4; |
| 8294 | rankx++) { |
| 8295 | if (!(rank_mask & (1 << rankx))) |
| 8296 | continue; |
| 8297 | |
| 8298 | debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n", |
| 8299 | rtt_nom, rodt_ctl, rankx, |
| 8300 | rl_score[rtt_nom][rodt_ctl][rankx].score); |
| 8301 | rodt_score += |
| 8302 | rl_score[rtt_nom][rodt_ctl][rankx].score; |
| 8303 | } |
| 8304 | // FIXME: do we need to skip RODT rows |
| 8305 | // here, like we do below in the |
| 8306 | // by-RANK settings? |
| 8307 | |
| 8308 | /* |
| 8309 | * When using automatic ODT settings use |
| 8310 | * the ODT settings associated with the |
| 8311 | * best score for all of the tested ODT |
| 8312 | * combinations. |
| 8313 | */ |
| 8314 | |
| 8315 | if (rodt_score < best_rodt_score || |
| 8316 | (rodt_score == best_rodt_score && |
| 8317 | (imp_val->rodt_ohms[rodt_ctl] > |
| 8318 | imp_val->rodt_ohms[auto_rodt_ctl]))) { |
| 8319 | debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n", |
| 8320 | rodt_ctl, |
| 8321 | imp_val->rodt_ohms[rodt_ctl], |
| 8322 | rodt_score, |
| 8323 | best_rodt_score); |
| 8324 | best_rodt_score = rodt_score; |
| 8325 | auto_rodt_ctl = rodt_ctl; |
| 8326 | auto_rtt_nom = rtt_nom; |
| 8327 | } |
| 8328 | } |
| 8329 | } |
| 8330 | |
| 8331 | mp1.u64 = lmc_rd(priv, |
| 8332 | CVMX_LMCX_MODEREG_PARAMS1(if_num)); |
| 8333 | |
| 8334 | if (ddr_rtt_nom_auto) { |
| 8335 | /* Store the automatically set RTT_NOM value */ |
| 8336 | if (dyn_rtt_nom_mask & 1) |
| 8337 | mp1.s.rtt_nom_00 = auto_rtt_nom; |
| 8338 | if (dyn_rtt_nom_mask & 2) |
| 8339 | mp1.s.rtt_nom_01 = auto_rtt_nom; |
| 8340 | if (dyn_rtt_nom_mask & 4) |
| 8341 | mp1.s.rtt_nom_10 = auto_rtt_nom; |
| 8342 | if (dyn_rtt_nom_mask & 8) |
| 8343 | mp1.s.rtt_nom_11 = auto_rtt_nom; |
| 8344 | } else { |
| 8345 | /* |
| 8346 | * restore the manual settings to the register |
| 8347 | */ |
| 8348 | mp1.s.rtt_nom_00 = default_rtt_nom[0]; |
| 8349 | mp1.s.rtt_nom_01 = default_rtt_nom[1]; |
| 8350 | mp1.s.rtt_nom_10 = default_rtt_nom[2]; |
| 8351 | mp1.s.rtt_nom_11 = default_rtt_nom[3]; |
| 8352 | } |
| 8353 | |
| 8354 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), |
| 8355 | mp1.u64); |
| 8356 | debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 8357 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11], |
| 8358 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10], |
| 8359 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01], |
| 8360 | imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00], |
| 8361 | mp1.s.rtt_nom_11, |
| 8362 | mp1.s.rtt_nom_10, |
| 8363 | mp1.s.rtt_nom_01, |
| 8364 | mp1.s.rtt_nom_00); |
| 8365 | |
| 8366 | debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 8367 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)], |
| 8368 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)], |
| 8369 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)], |
| 8370 | imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)], |
| 8371 | extr_wr(mp1.u64, 3), |
| 8372 | extr_wr(mp1.u64, 2), |
| 8373 | extr_wr(mp1.u64, 1), |
| 8374 | extr_wr(mp1.u64, 0)); |
| 8375 | |
| 8376 | debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 8377 | imp_val->dic_ohms[mp1.s.dic_11], |
| 8378 | imp_val->dic_ohms[mp1.s.dic_10], |
| 8379 | imp_val->dic_ohms[mp1.s.dic_01], |
| 8380 | imp_val->dic_ohms[mp1.s.dic_00], |
| 8381 | mp1.s.dic_11, |
| 8382 | mp1.s.dic_10, |
| 8383 | mp1.s.dic_01, |
| 8384 | mp1.s.dic_00); |
| 8385 | |
| 8386 | if (ddr_type == DDR4_DRAM) { |
| 8387 | union cvmx_lmcx_modereg_params2 mp2; |
| 8388 | /* |
| 8389 | * We must read the CSR, and not depend on |
| 8390 | * odt_config[odt_idx].odt_mask2, since we could |
| 8391 | * have overridden values with envvars. |
| 8392 | * NOTE: this corrects the printout, since the |
| 8393 | * CSR is not written with the old values... |
| 8394 | */ |
| 8395 | mp2.u64 = lmc_rd(priv, |
| 8396 | CVMX_LMCX_MODEREG_PARAMS2(if_num)); |
| 8397 | |
| 8398 | debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n", |
| 8399 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_11], |
| 8400 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_10], |
| 8401 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_01], |
| 8402 | imp_val->rtt_nom_ohms[mp2.s.rtt_park_00], |
| 8403 | mp2.s.rtt_park_11, |
| 8404 | mp2.s.rtt_park_10, |
| 8405 | mp2.s.rtt_park_01, |
| 8406 | mp2.s.rtt_park_00); |
| 8407 | |
| 8408 | debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", |
| 8409 | "VREF_RANGE", |
| 8410 | mp2.s.vref_range_11, |
| 8411 | mp2.s.vref_range_10, |
| 8412 | mp2.s.vref_range_01, |
| 8413 | mp2.s.vref_range_00); |
| 8414 | |
| 8415 | debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", |
| 8416 | "VREF_VALUE", |
| 8417 | mp2.s.vref_value_11, |
| 8418 | mp2.s.vref_value_10, |
| 8419 | mp2.s.vref_value_01, |
| 8420 | mp2.s.vref_value_00); |
| 8421 | } |
| 8422 | |
| 8423 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8424 | if (ddr_rodt_ctl_auto) { |
| 8425 | cc2.cn78xx.rodt_ctl = auto_rodt_ctl; |
| 8426 | } else { |
| 8427 | // back to the original setting |
| 8428 | cc2.cn78xx.rodt_ctl = default_rodt_ctl; |
| 8429 | } |
| 8430 | lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64); |
| 8431 | cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num)); |
| 8432 | debug("Read ODT_CTL : 0x%x (%d ohms)\n", |
| 8433 | cc2.cn78xx.rodt_ctl, |
| 8434 | imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]); |
| 8435 | |
| 8436 | /* |
| 8437 | * Use the delays associated with the best score for |
| 8438 | * each individual rank |
| 8439 | */ |
| 8440 | debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n"); |
| 8441 | |
| 8442 | // this is the the RANK MAJOR LOOP |
| 8443 | for (rankx = 0; rankx < dimm_count * 4; rankx++) |
| 8444 | rank_major_loop(priv, rankx, rl_score); |
| 8445 | } /* Evaluation block */ |
| 8446 | } /* while(rl_dbg_loops--) */ |
| 8447 | |
| 8448 | ctl.cn78xx.ddr2t = save_ddr2t; |
| 8449 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64); |
| 8450 | ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 8451 | /* Display final 2T value */ |
| 8452 | debug("DDR2T : %6d\n", |
| 8453 | ctl.cn78xx.ddr2t); |
| 8454 | |
| 8455 | ddr_init_seq(priv, rank_mask, if_num); |
| 8456 | |
| 8457 | for (rankx = 0; rankx < dimm_count * 4; rankx++) { |
| 8458 | u64 value; |
| 8459 | int parameter_set = 0; |
| 8460 | |
| 8461 | if (!(rank_mask & (1 << rankx))) |
| 8462 | continue; |
| 8463 | |
| 8464 | rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 8465 | if_num)); |
| 8466 | |
| 8467 | for (i = 0; i < 9; ++i) { |
| 8468 | s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d", |
| 8469 | if_num, rankx, i); |
| 8470 | if (s) { |
| 8471 | parameter_set |= 1; |
| 8472 | value = simple_strtoul(s, NULL, 0); |
| 8473 | |
| 8474 | upd_rl_rank(&rl_rank, i, value); |
| 8475 | } |
| 8476 | } |
| 8477 | |
| 8478 | s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx); |
| 8479 | if (s) { |
| 8480 | parameter_set |= 1; |
| 8481 | value = simple_strtoull(s, NULL, 0); |
| 8482 | rl_rank.u64 = value; |
| 8483 | } |
| 8484 | |
| 8485 | if (parameter_set) { |
| 8486 | lmc_wr(priv, |
| 8487 | CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), |
| 8488 | rl_rank.u64); |
| 8489 | rl_rank.u64 = lmc_rd(priv, |
| 8490 | CVMX_LMCX_RLEVEL_RANKX(rankx, |
| 8491 | if_num)); |
| 8492 | display_rl(if_num, rl_rank, rankx); |
| 8493 | } |
| 8494 | } |
| 8495 | } |
| 8496 | |
| 8497 | int init_octeon3_ddr3_interface(struct ddr_priv *priv, |
| 8498 | struct ddr_conf *_ddr_conf, u32 _ddr_hertz, |
| 8499 | u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num, |
| 8500 | u32 _if_mask) |
| 8501 | { |
| 8502 | union cvmx_lmcx_control ctrl; |
| 8503 | int ret; |
| 8504 | char *s; |
| 8505 | int i; |
| 8506 | |
| 8507 | if_num = _if_num; |
| 8508 | ddr_hertz = _ddr_hertz; |
| 8509 | ddr_conf = _ddr_conf; |
| 8510 | if_mask = _if_mask; |
| 8511 | odt_1rank_config = ddr_conf->odt_1rank_config; |
| 8512 | odt_2rank_config = ddr_conf->odt_2rank_config; |
| 8513 | odt_4rank_config = ddr_conf->odt_4rank_config; |
| 8514 | dimm_config_table = ddr_conf->dimm_config_table; |
| 8515 | c_cfg = &ddr_conf->custom_lmc_config; |
| 8516 | |
| 8517 | /* |
| 8518 | * Compute clock rates to the nearest picosecond. |
| 8519 | */ |
| 8520 | tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */ |
| 8521 | eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */ |
| 8522 | |
| 8523 | dimm_count = 0; |
| 8524 | /* Accumulate and report all the errors before giving up */ |
| 8525 | fatal_error = 0; |
| 8526 | |
| 8527 | /* Flag that indicates safe DDR settings should be used */ |
| 8528 | safe_ddr_flag = 0; |
| 8529 | if_64b = 1; /* Octeon II Default: 64bit interface width */ |
| 8530 | mem_size_mbytes = 0; |
| 8531 | bank_bits = 0; |
| 8532 | column_bits_start = 1; |
| 8533 | use_ecc = 1; |
| 8534 | min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0; |
| 8535 | spd_package = 0; |
| 8536 | spd_rawcard = 0; |
| 8537 | spd_rawcard_aorb = 0; |
| 8538 | spd_rdimm_registers = 0; |
| 8539 | is_stacked_die = 0; |
| 8540 | is_3ds_dimm = 0; // 3DS |
| 8541 | lranks_per_prank = 1; // 3DS: logical ranks per package rank |
| 8542 | lranks_bits = 0; // 3DS: logical ranks bits |
| 8543 | die_capacity = 0; // in Mbits; only used for 3DS |
| 8544 | |
| 8545 | wl_mask_err = 0; |
| 8546 | dyn_rtt_nom_mask = 0; |
| 8547 | ddr_disable_chip_reset = 1; |
| 8548 | match_wl_rtt_nom = 0; |
| 8549 | |
| 8550 | internal_retries = 0; |
| 8551 | |
| 8552 | disable_deskew_training = 0; |
| 8553 | restart_if_dsk_incomplete = 0; |
| 8554 | last_lane = ((if_64b) ? 8 : 4) + use_ecc; |
| 8555 | |
| 8556 | disable_sequential_delay_check = 0; |
| 8557 | wl_print = WLEVEL_PRINTALL_DEFAULT; |
| 8558 | |
| 8559 | enable_by_rank_init = 1; // FIXME: default by-rank ON |
| 8560 | saved_rank_mask = 0; |
| 8561 | |
| 8562 | node = 0; |
| 8563 | |
| 8564 | memset(hwl_alts, 0, sizeof(hwl_alts)); |
| 8565 | |
| 8566 | /* |
| 8567 | * Initialize these to shut up the compiler. They are configured |
| 8568 | * and used only for DDR4 |
| 8569 | */ |
| 8570 | ddr4_trrd_lmin = 6000; |
| 8571 | ddr4_tccd_lmin = 6000; |
| 8572 | |
| 8573 | debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n", |
| 8574 | node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid()); |
| 8575 | |
| 8576 | if (dimm_config_table[0].spd_addrs[0] == 0 && |
| 8577 | !dimm_config_table[0].spd_ptrs[0]) { |
| 8578 | printf("ERROR: No dimms specified in the dimm_config_table.\n"); |
| 8579 | return -1; |
| 8580 | } |
| 8581 | |
| 8582 | // allow some overrides to be done |
| 8583 | |
| 8584 | // this one controls several things related to DIMM geometry: HWL and RL |
| 8585 | disable_sequential_delay_check = c_cfg->disable_sequential_delay_check; |
| 8586 | s = lookup_env(priv, "ddr_disable_sequential_delay_check"); |
| 8587 | if (s) |
| 8588 | disable_sequential_delay_check = strtoul(s, NULL, 0); |
| 8589 | |
| 8590 | // this one controls whether chip RESET is done, or LMC init restarted |
| 8591 | // from step 6.9.6 |
| 8592 | s = lookup_env(priv, "ddr_disable_chip_reset"); |
| 8593 | if (s) |
| 8594 | ddr_disable_chip_reset = !!strtoul(s, NULL, 0); |
| 8595 | |
| 8596 | // this one controls whether Deskew Training is performed |
| 8597 | s = lookup_env(priv, "ddr_disable_deskew_training"); |
| 8598 | if (s) |
| 8599 | disable_deskew_training = !!strtoul(s, NULL, 0); |
| 8600 | |
| 8601 | if (ddr_verbose(priv)) { |
| 8602 | printf("DDR SPD Table:"); |
| 8603 | for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) { |
| 8604 | if (dimm_config_table[didx].spd_addrs[0] == 0) |
| 8605 | break; |
| 8606 | |
| 8607 | printf(" --ddr%dspd=0x%02x", if_num, |
| 8608 | dimm_config_table[didx].spd_addrs[0]); |
| 8609 | if (dimm_config_table[didx].spd_addrs[1] != 0) |
| 8610 | printf(",0x%02x", |
| 8611 | dimm_config_table[didx].spd_addrs[1]); |
| 8612 | } |
| 8613 | printf("\n"); |
| 8614 | } |
| 8615 | |
| 8616 | /* |
| 8617 | * Walk the DRAM Socket Configuration Table to see what is installed. |
| 8618 | */ |
| 8619 | for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) { |
| 8620 | /* Check for lower DIMM socket populated */ |
| 8621 | if (validate_dimm(priv, &dimm_config_table[didx], 0)) { |
| 8622 | if (ddr_verbose(priv)) |
| 8623 | report_dimm(&dimm_config_table[didx], 0, |
| 8624 | dimm_count, if_num); |
| 8625 | ++dimm_count; |
| 8626 | } else { |
| 8627 | break; |
| 8628 | } /* Finished when there is no lower DIMM */ |
| 8629 | } |
| 8630 | |
| 8631 | initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz, |
| 8632 | ddr_ref_hertz, if_num, if_mask); |
| 8633 | |
| 8634 | if (!odt_1rank_config) |
| 8635 | odt_1rank_config = disable_odt_config; |
| 8636 | if (!odt_2rank_config) |
| 8637 | odt_2rank_config = disable_odt_config; |
| 8638 | if (!odt_4rank_config) |
| 8639 | odt_4rank_config = disable_odt_config; |
| 8640 | |
| 8641 | s = env_get("ddr_safe"); |
| 8642 | if (s) { |
| 8643 | safe_ddr_flag = !!simple_strtoul(s, NULL, 0); |
| 8644 | printf("Parameter found in environment. ddr_safe = %d\n", |
| 8645 | safe_ddr_flag); |
| 8646 | } |
| 8647 | |
| 8648 | if (dimm_count == 0) { |
| 8649 | printf("ERROR: DIMM 0 not detected.\n"); |
| 8650 | return (-1); |
| 8651 | } |
| 8652 | |
| 8653 | if (c_cfg->mode32b) |
| 8654 | if_64b = 0; |
| 8655 | |
| 8656 | s = lookup_env(priv, "if_64b"); |
| 8657 | if (s) |
| 8658 | if_64b = !!simple_strtoul(s, NULL, 0); |
| 8659 | |
| 8660 | if (if_64b == 1) { |
| 8661 | if (octeon_is_cpuid(OCTEON_CN70XX)) { |
| 8662 | printf("64-bit interface width is not supported for this Octeon model\n"); |
| 8663 | ++fatal_error; |
| 8664 | } |
| 8665 | } |
| 8666 | |
| 8667 | /* ddr_type only indicates DDR4 or DDR3 */ |
| 8668 | ddr_type = (read_spd(&dimm_config_table[0], 0, |
| 8669 | DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3; |
| 8670 | debug("DRAM Device Type: DDR%d\n", ddr_type); |
| 8671 | |
| 8672 | if (ddr_type == DDR4_DRAM) { |
| 8673 | int spd_module_type; |
| 8674 | int asymmetric; |
| 8675 | const char *signal_load[4] = { "", "MLS", "3DS", "RSV" }; |
| 8676 | |
| 8677 | imp_val = &ddr4_impedence_val; |
| 8678 | |
| 8679 | spd_addr = |
| 8680 | read_spd(&dimm_config_table[0], 0, |
| 8681 | DDR4_SPD_ADDRESSING_ROW_COL_BITS); |
| 8682 | spd_org = |
| 8683 | read_spd(&dimm_config_table[0], 0, |
| 8684 | DDR4_SPD_MODULE_ORGANIZATION); |
| 8685 | spd_banks = |
| 8686 | 0xFF & read_spd(&dimm_config_table[0], 0, |
| 8687 | DDR4_SPD_DENSITY_BANKS); |
| 8688 | |
| 8689 | bank_bits = |
| 8690 | (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3); |
| 8691 | /* Controller can only address 4 bits. */ |
| 8692 | bank_bits = min((int)bank_bits, 4); |
| 8693 | |
| 8694 | spd_package = |
| 8695 | 0XFF & read_spd(&dimm_config_table[0], 0, |
| 8696 | DDR4_SPD_PACKAGE_TYPE); |
| 8697 | if (spd_package & 0x80) { // non-monolithic device |
| 8698 | is_stacked_die = ((spd_package & 0x73) == 0x11); |
| 8699 | debug("DDR4: Package Type 0x%02x (%s), %d die\n", |
| 8700 | spd_package, signal_load[(spd_package & 3)], |
| 8701 | ((spd_package >> 4) & 7) + 1); |
| 8702 | is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS? |
| 8703 | if (is_3ds_dimm) { // is it 3DS? |
| 8704 | lranks_per_prank = ((spd_package >> 4) & 7) + 1; |
| 8705 | // FIXME: should make sure it is only 2H or 4H |
| 8706 | // or 8H? |
| 8707 | lranks_bits = lranks_per_prank >> 1; |
| 8708 | if (lranks_bits == 4) |
| 8709 | lranks_bits = 3; |
| 8710 | } |
| 8711 | } else if (spd_package != 0) { |
| 8712 | // FIXME: print non-zero monolithic device definition |
| 8713 | debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n", |
| 8714 | ((spd_package >> 4) & 7) + 1, (spd_package & 3)); |
| 8715 | } |
| 8716 | |
| 8717 | asymmetric = (spd_org >> 6) & 1; |
| 8718 | if (asymmetric) { |
| 8719 | int spd_secondary_pkg = |
| 8720 | read_spd(&dimm_config_table[0], 0, |
| 8721 | DDR4_SPD_SECONDARY_PACKAGE_TYPE); |
| 8722 | debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n", |
| 8723 | spd_secondary_pkg); |
| 8724 | } else { |
| 8725 | u64 bus_width = |
| 8726 | 8 << (0x07 & |
| 8727 | read_spd(&dimm_config_table[0], 0, |
| 8728 | DDR4_SPD_MODULE_MEMORY_BUS_WIDTH)); |
| 8729 | u64 ddr_width = 4 << ((spd_org >> 0) & 0x7); |
| 8730 | u64 module_cap; |
| 8731 | int shift = (spd_banks & 0x0F); |
| 8732 | |
| 8733 | die_capacity = (shift < 8) ? (256UL << shift) : |
| 8734 | ((12UL << (shift & 1)) << 10); |
| 8735 | debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n", |
| 8736 | (die_capacity > 512) ? (die_capacity >> 10) : |
| 8737 | die_capacity, (die_capacity > 512) ? 'G' : 'M'); |
| 8738 | module_cap = ((u64)die_capacity << 20) / 8UL * |
| 8739 | bus_width / ddr_width * |
| 8740 | (1UL + ((spd_org >> 3) & 0x7)); |
| 8741 | |
| 8742 | // is it 3DS? |
| 8743 | if (is_3ds_dimm) { |
| 8744 | module_cap *= (u64)(((spd_package >> 4) & 7) + |
| 8745 | 1); |
| 8746 | } |
| 8747 | debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n", |
| 8748 | module_cap >> 30); |
| 8749 | } |
| 8750 | |
| 8751 | spd_rawcard = |
| 8752 | 0xFF & read_spd(&dimm_config_table[0], 0, |
| 8753 | DDR4_SPD_REFERENCE_RAW_CARD); |
| 8754 | debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard); |
| 8755 | |
| 8756 | spd_module_type = |
| 8757 | read_spd(&dimm_config_table[0], 0, |
| 8758 | DDR4_SPD_KEY_BYTE_MODULE_TYPE); |
| 8759 | if (spd_module_type & 0x80) { // HYBRID module |
| 8760 | debug("DDR4: HYBRID module, type %s\n", |
| 8761 | ((spd_module_type & 0x70) == |
| 8762 | 0x10) ? "NVDIMM" : "UNKNOWN"); |
| 8763 | } |
| 8764 | spd_thermal_sensor = |
| 8765 | read_spd(&dimm_config_table[0], 0, |
| 8766 | DDR4_SPD_MODULE_THERMAL_SENSOR); |
| 8767 | spd_dimm_type = spd_module_type & 0x0F; |
| 8768 | spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || |
| 8769 | (spd_dimm_type == 8); |
| 8770 | if (spd_rdimm) { |
| 8771 | u16 spd_mfgr_id, spd_register_rev, spd_mod_attr; |
| 8772 | static const u16 manu_ids[4] = { |
| 8773 | 0xb380, 0x3286, 0x9780, 0xb304 |
| 8774 | }; |
| 8775 | static const char *manu_names[4] = { |
| 8776 | "XXX", "XXXXXXX", "XX", "XXXXX" |
| 8777 | }; |
| 8778 | int mc; |
| 8779 | |
| 8780 | spd_mfgr_id = |
| 8781 | (0xFFU & |
| 8782 | read_spd(&dimm_config_table[0], 0, |
| 8783 | DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) | |
| 8784 | ((0xFFU & |
| 8785 | read_spd(&dimm_config_table[0], 0, |
| 8786 | DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB)) |
| 8787 | << 8); |
| 8788 | spd_register_rev = |
| 8789 | 0xFFU & read_spd(&dimm_config_table[0], 0, |
| 8790 | DDR4_SPD_REGISTER_REVISION_NUMBER); |
| 8791 | for (mc = 0; mc < 4; mc++) |
| 8792 | if (manu_ids[mc] == spd_mfgr_id) |
| 8793 | break; |
| 8794 | |
| 8795 | debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n", |
| 8796 | (mc >= 4) ? "UNKNOWN" : manu_names[mc], |
| 8797 | spd_register_rev); |
| 8798 | |
| 8799 | // RAWCARD A or B must be bit 7=0 and bits 4-0 |
| 8800 | // either 00000(A) or 00001(B) |
| 8801 | spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1); |
| 8802 | // RDIMM Module Attributes |
| 8803 | spd_mod_attr = |
| 8804 | 0xFFU & read_spd(&dimm_config_table[0], 0, |
| 8805 | DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE); |
| 8806 | spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1); |
| 8807 | debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n", |
| 8808 | spd_mod_attr, (spd_mod_attr >> 4) + 1, |
| 8809 | ((1 << ((spd_mod_attr >> 2) & 3)) >> 1), |
| 8810 | spd_rdimm_registers); |
| 8811 | } |
| 8812 | dimm_type_name = ddr4_dimm_types[spd_dimm_type]; |
| 8813 | } else { /* if (ddr_type == DDR4_DRAM) */ |
| 8814 | const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" }; |
| 8815 | |
| 8816 | imp_val = &ddr3_impedence_val; |
| 8817 | |
| 8818 | spd_addr = |
| 8819 | read_spd(&dimm_config_table[0], 0, |
| 8820 | DDR3_SPD_ADDRESSING_ROW_COL_BITS); |
| 8821 | spd_org = |
| 8822 | read_spd(&dimm_config_table[0], 0, |
| 8823 | DDR3_SPD_MODULE_ORGANIZATION); |
| 8824 | spd_banks = |
| 8825 | read_spd(&dimm_config_table[0], 0, |
| 8826 | DDR3_SPD_DENSITY_BANKS) & 0xff; |
| 8827 | |
| 8828 | bank_bits = 3 + ((spd_banks >> 4) & 0x7); |
| 8829 | /* Controller can only address 3 bits. */ |
| 8830 | bank_bits = min((int)bank_bits, 3); |
| 8831 | spd_dimm_type = |
| 8832 | 0x0f & read_spd(&dimm_config_table[0], 0, |
| 8833 | DDR3_SPD_KEY_BYTE_MODULE_TYPE); |
| 8834 | spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || |
| 8835 | (spd_dimm_type == 9); |
| 8836 | |
| 8837 | spd_package = |
| 8838 | 0xFF & read_spd(&dimm_config_table[0], 0, |
| 8839 | DDR3_SPD_SDRAM_DEVICE_TYPE); |
| 8840 | if (spd_package & 0x80) { // non-standard device |
| 8841 | debug("DDR3: Device Type 0x%02x (%s), %d die\n", |
| 8842 | spd_package, signal_load[(spd_package & 3)], |
| 8843 | ((1 << ((spd_package >> 4) & 7)) >> 1)); |
| 8844 | } else if (spd_package != 0) { |
| 8845 | // FIXME: print non-zero monolithic device definition |
| 8846 | debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n", |
| 8847 | ((1 << (spd_package >> 4) & 7) >> 1), |
| 8848 | (spd_package & 3)); |
| 8849 | } |
| 8850 | |
| 8851 | spd_rawcard = |
| 8852 | 0xFF & read_spd(&dimm_config_table[0], 0, |
| 8853 | DDR3_SPD_REFERENCE_RAW_CARD); |
| 8854 | debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard); |
| 8855 | spd_thermal_sensor = |
| 8856 | read_spd(&dimm_config_table[0], 0, |
| 8857 | DDR3_SPD_MODULE_THERMAL_SENSOR); |
| 8858 | |
| 8859 | if (spd_rdimm) { |
| 8860 | int spd_mfgr_id, spd_register_rev, spd_mod_attr; |
| 8861 | |
| 8862 | spd_mfgr_id = |
| 8863 | (0xFFU & |
| 8864 | read_spd(&dimm_config_table[0], 0, |
| 8865 | DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) | |
| 8866 | ((0xFFU & |
| 8867 | read_spd(&dimm_config_table[0], 0, |
| 8868 | DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB)) |
| 8869 | << 8); |
| 8870 | spd_register_rev = |
| 8871 | 0xFFU & read_spd(&dimm_config_table[0], 0, |
| 8872 | DDR3_SPD_REGISTER_REVISION_NUMBER); |
| 8873 | debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n", |
| 8874 | spd_mfgr_id, spd_register_rev); |
| 8875 | // Module Attributes |
| 8876 | spd_mod_attr = |
| 8877 | 0xFFU & read_spd(&dimm_config_table[0], 0, |
| 8878 | DDR3_SPD_ADDRESS_MAPPING); |
| 8879 | spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1); |
| 8880 | debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n", |
| 8881 | spd_mod_attr, |
| 8882 | ((1 << ((spd_mod_attr >> 2) & 3)) >> 1), |
| 8883 | spd_rdimm_registers); |
| 8884 | } |
| 8885 | dimm_type_name = ddr3_dimm_types[spd_dimm_type]; |
| 8886 | } |
| 8887 | |
| 8888 | if (spd_thermal_sensor & 0x80) { |
| 8889 | debug("DDR%d: SPD: Thermal Sensor PRESENT\n", |
| 8890 | (ddr_type == DDR4_DRAM) ? 4 : 3); |
| 8891 | } |
| 8892 | |
| 8893 | debug("spd_addr : %#06x\n", spd_addr); |
| 8894 | debug("spd_org : %#06x\n", spd_org); |
| 8895 | debug("spd_banks : %#06x\n", spd_banks); |
| 8896 | |
| 8897 | row_bits = 12 + ((spd_addr >> 3) & 0x7); |
| 8898 | col_bits = 9 + ((spd_addr >> 0) & 0x7); |
| 8899 | |
| 8900 | num_ranks = 1 + ((spd_org >> 3) & 0x7); |
| 8901 | dram_width = 4 << ((spd_org >> 0) & 0x7); |
| 8902 | num_banks = 1 << bank_bits; |
| 8903 | |
| 8904 | s = lookup_env(priv, "ddr_num_ranks"); |
| 8905 | if (s) |
| 8906 | num_ranks = simple_strtoul(s, NULL, 0); |
| 8907 | |
| 8908 | s = lookup_env(priv, "ddr_enable_by_rank_init"); |
| 8909 | if (s) |
| 8910 | enable_by_rank_init = !!simple_strtoul(s, NULL, 0); |
| 8911 | |
| 8912 | // FIXME: for now, we can only handle a DDR4 2rank-1slot config |
| 8913 | // FIXME: also, by-rank init does not work correctly if 32-bit mode... |
| 8914 | if (enable_by_rank_init && (ddr_type != DDR4_DRAM || |
| 8915 | dimm_count != 1 || if_64b != 1 || |
| 8916 | num_ranks != 2)) |
| 8917 | enable_by_rank_init = 0; |
| 8918 | |
| 8919 | if (enable_by_rank_init) { |
| 8920 | struct dimm_odt_config *odt_config; |
| 8921 | union cvmx_lmcx_modereg_params1 mp1; |
| 8922 | union cvmx_lmcx_modereg_params2 modereg_params2; |
| 8923 | int by_rank_rodt, by_rank_wr, by_rank_park; |
| 8924 | |
| 8925 | // Do ODT settings changes which work best for 2R-1S configs |
| 8926 | debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n"); |
| 8927 | |
| 8928 | // setup for modifying config table values - 2 ranks and 1 DIMM |
| 8929 | odt_config = |
| 8930 | (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0]; |
| 8931 | |
| 8932 | // original was 80, first try was 60 |
| 8933 | by_rank_rodt = ddr4_rodt_ctl_48_ohm; |
| 8934 | s = lookup_env(priv, "ddr_by_rank_rodt"); |
| 8935 | if (s) |
| 8936 | by_rank_rodt = strtoul(s, NULL, 0); |
| 8937 | |
| 8938 | odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt; |
| 8939 | |
| 8940 | // this is for MODEREG_PARAMS1 fields |
| 8941 | // fetch the original settings |
| 8942 | mp1.u64 = odt_config->modereg_params1.u64; |
| 8943 | |
| 8944 | by_rank_wr = ddr4_rttwr_80ohm; // originals were 240 |
| 8945 | s = lookup_env(priv, "ddr_by_rank_wr"); |
| 8946 | if (s) |
| 8947 | by_rank_wr = simple_strtoul(s, NULL, 0); |
| 8948 | |
| 8949 | // change specific settings here... |
| 8950 | insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr); |
| 8951 | insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr); |
| 8952 | |
| 8953 | // save final settings |
| 8954 | odt_config->modereg_params1.u64 = mp1.u64; |
| 8955 | |
| 8956 | // this is for MODEREG_PARAMS2 fields |
| 8957 | // fetch the original settings |
| 8958 | modereg_params2.u64 = odt_config->modereg_params2.u64; |
| 8959 | |
| 8960 | by_rank_park = ddr4_rttpark_none; // originals were 120 |
| 8961 | s = lookup_env(priv, "ddr_by_rank_park"); |
| 8962 | if (s) |
| 8963 | by_rank_park = simple_strtoul(s, NULL, 0); |
| 8964 | |
| 8965 | // change specific settings here... |
| 8966 | modereg_params2.s.rtt_park_00 = by_rank_park; |
| 8967 | modereg_params2.s.rtt_park_01 = by_rank_park; |
| 8968 | |
| 8969 | // save final settings |
| 8970 | odt_config->modereg_params2.u64 = modereg_params2.u64; |
| 8971 | } |
| 8972 | |
| 8973 | /* |
| 8974 | * FIX |
| 8975 | * Check that values are within some theoretical limits. |
| 8976 | * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) = |
| 8977 | * 14 - 3 - 4 = 7 |
| 8978 | * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) = |
| 8979 | * 18 - 2 - 3 = 13 |
| 8980 | */ |
| 8981 | if (col_bits > 13 || col_bits < 7) { |
| 8982 | printf("Unsupported number of Col Bits: %d\n", col_bits); |
| 8983 | ++fatal_error; |
| 8984 | } |
| 8985 | |
| 8986 | /* |
| 8987 | * FIX |
| 8988 | * Check that values are within some theoretical limits. |
| 8989 | * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits = |
| 8990 | * 26 - 18 - 1 = 7 |
| 8991 | * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits = |
| 8992 | * 33 - 14 - 1 = 18 |
| 8993 | */ |
| 8994 | if (row_bits > 18 || row_bits < 7) { |
| 8995 | printf("Unsupported number of Row Bits: %d\n", row_bits); |
| 8996 | ++fatal_error; |
| 8997 | } |
| 8998 | |
| 8999 | s = lookup_env(priv, "ddr_rdimm_ena"); |
| 9000 | if (s) |
| 9001 | spd_rdimm = !!simple_strtoul(s, NULL, 0); |
| 9002 | |
| 9003 | wl_loops = WLEVEL_LOOPS_DEFAULT; |
| 9004 | // accept generic or interface-specific override |
| 9005 | s = lookup_env(priv, "ddr_wlevel_loops"); |
| 9006 | if (!s) |
| 9007 | s = lookup_env(priv, "ddr%d_wlevel_loops", if_num); |
| 9008 | |
| 9009 | if (s) |
| 9010 | wl_loops = strtoul(s, NULL, 0); |
| 9011 | |
| 9012 | s = lookup_env(priv, "ddr_ranks"); |
| 9013 | if (s) |
| 9014 | num_ranks = simple_strtoul(s, NULL, 0); |
| 9015 | |
| 9016 | bunk_enable = (num_ranks > 1); |
| 9017 | |
| 9018 | if (octeon_is_cpuid(OCTEON_CN7XXX)) |
| 9019 | column_bits_start = 3; |
| 9020 | else |
| 9021 | printf("ERROR: Unsupported Octeon model: 0x%x\n", |
| 9022 | read_c0_prid()); |
| 9023 | |
| 9024 | row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b); |
| 9025 | debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n", |
| 9026 | row_lsb); |
| 9027 | |
| 9028 | pbank_lsb = row_lsb + row_bits + bunk_enable; |
| 9029 | debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb); |
| 9030 | |
| 9031 | if (lranks_per_prank > 1) { |
| 9032 | pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable; |
| 9033 | debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n", |
| 9034 | row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb); |
| 9035 | } |
| 9036 | |
| 9037 | mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20); |
| 9038 | if (num_ranks == 4) { |
| 9039 | /* |
| 9040 | * Quad rank dimm capacity is equivalent to two dual-rank |
| 9041 | * dimms. |
| 9042 | */ |
| 9043 | mem_size_mbytes *= 2; |
| 9044 | } |
| 9045 | |
| 9046 | /* |
| 9047 | * Mask with 1 bits set for for each active rank, allowing 2 bits |
| 9048 | * per dimm. This makes later calculations simpler, as a variety |
| 9049 | * of CSRs use this layout. This init needs to be updated for dual |
| 9050 | * configs (ie non-identical DIMMs). |
| 9051 | * |
| 9052 | * Bit 0 = dimm0, rank 0 |
| 9053 | * Bit 1 = dimm0, rank 1 |
| 9054 | * Bit 2 = dimm1, rank 0 |
| 9055 | * Bit 3 = dimm1, rank 1 |
| 9056 | * ... |
| 9057 | */ |
| 9058 | rank_mask = 0x1; |
| 9059 | if (num_ranks > 1) |
| 9060 | rank_mask = 0x3; |
| 9061 | if (num_ranks > 2) |
| 9062 | rank_mask = 0xf; |
| 9063 | |
| 9064 | for (i = 1; i < dimm_count; i++) |
| 9065 | rank_mask |= ((rank_mask & 0x3) << (2 * i)); |
| 9066 | |
| 9067 | /* |
| 9068 | * If we are booting from RAM, the DRAM controller is |
| 9069 | * already set up. Just return the memory size |
| 9070 | */ |
| 9071 | if (priv->flags & FLAG_RAM_RESIDENT) { |
| 9072 | debug("Ram Boot: Skipping LMC config\n"); |
| 9073 | return mem_size_mbytes; |
| 9074 | } |
| 9075 | |
| 9076 | if (ddr_type == DDR4_DRAM) { |
| 9077 | spd_ecc = |
| 9078 | !!(read_spd |
| 9079 | (&dimm_config_table[0], 0, |
| 9080 | DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8); |
| 9081 | } else { |
| 9082 | spd_ecc = |
| 9083 | !!(read_spd |
| 9084 | (&dimm_config_table[0], 0, |
| 9085 | DDR3_SPD_MEMORY_BUS_WIDTH) & 8); |
| 9086 | } |
| 9087 | |
| 9088 | char rank_spec[8]; |
| 9089 | |
| 9090 | printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package); |
| 9091 | debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n", |
| 9092 | dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "", |
| 9093 | rank_spec, |
| 9094 | (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits); |
| 9095 | |
| 9096 | if (ddr_type == DDR4_DRAM) { |
| 9097 | spd_cas_latency = |
| 9098 | ((0xff & |
| 9099 | read_spd(&dimm_config_table[0], 0, |
| 9100 | DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0); |
| 9101 | spd_cas_latency |= |
| 9102 | ((0xff & |
| 9103 | read_spd(&dimm_config_table[0], 0, |
| 9104 | DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8); |
| 9105 | spd_cas_latency |= |
| 9106 | ((0xff & |
| 9107 | read_spd(&dimm_config_table[0], 0, |
| 9108 | DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16); |
| 9109 | spd_cas_latency |= |
| 9110 | ((0xff & |
| 9111 | read_spd(&dimm_config_table[0], 0, |
| 9112 | DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24); |
| 9113 | } else { |
| 9114 | spd_cas_latency = |
| 9115 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9116 | DDR3_SPD_CAS_LATENCIES_LSB); |
| 9117 | spd_cas_latency |= |
| 9118 | ((0xff & |
| 9119 | read_spd(&dimm_config_table[0], 0, |
| 9120 | DDR3_SPD_CAS_LATENCIES_MSB)) << 8); |
| 9121 | } |
| 9122 | debug("spd_cas_latency : %#06x\n", spd_cas_latency); |
| 9123 | |
| 9124 | if (ddr_type == DDR4_DRAM) { |
| 9125 | /* |
| 9126 | * No other values for DDR4 MTB and FTB are specified at the |
| 9127 | * current time so don't bother reading them. Can't speculate |
| 9128 | * how new values will be represented. |
| 9129 | */ |
| 9130 | int spdmtb = 125; |
| 9131 | int spdftb = 1; |
| 9132 | |
| 9133 | taamin = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9134 | DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) + |
| 9135 | spdftb * (signed char)read_spd(&dimm_config_table[0], |
| 9136 | 0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN); |
| 9137 | |
| 9138 | ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9139 | DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) + |
| 9140 | spdftb * (signed char)read_spd(&dimm_config_table[0], 0, |
| 9141 | DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN); |
| 9142 | |
| 9143 | ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9144 | DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) + |
| 9145 | spdftb * (signed char)read_spd(&dimm_config_table[0], 0, |
| 9146 | DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX); |
| 9147 | |
| 9148 | ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9149 | DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) + |
| 9150 | spdftb * (signed char)read_spd(&dimm_config_table[0], 0, |
| 9151 | DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN); |
| 9152 | |
| 9153 | ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9154 | DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) + |
| 9155 | spdftb * (signed char)read_spd(&dimm_config_table[0], 0, |
| 9156 | DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN); |
| 9157 | |
| 9158 | ddr4_trasmin = spdmtb * |
| 9159 | (((read_spd |
| 9160 | (&dimm_config_table[0], 0, |
| 9161 | DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) + |
| 9162 | (read_spd |
| 9163 | (&dimm_config_table[0], 0, |
| 9164 | DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff)); |
| 9165 | |
| 9166 | ddr4_trcmin = spdmtb * |
| 9167 | ((((read_spd |
| 9168 | (&dimm_config_table[0], 0, |
| 9169 | DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) << |
| 9170 | 8) + (read_spd |
| 9171 | (&dimm_config_table[0], 0, |
| 9172 | DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) & |
| 9173 | 0xff)) |
| 9174 | + spdftb * (signed char)read_spd(&dimm_config_table[0], |
| 9175 | 0, |
| 9176 | DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN); |
| 9177 | |
| 9178 | ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0, |
| 9179 | DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) << |
| 9180 | 8) + (read_spd(&dimm_config_table[0], 0, |
| 9181 | DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff)); |
| 9182 | |
| 9183 | ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0, |
| 9184 | DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) << |
| 9185 | 8) + (read_spd(&dimm_config_table[0], 0, |
| 9186 | DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff)); |
| 9187 | |
| 9188 | ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0, |
| 9189 | DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) << |
| 9190 | 8) + (read_spd(&dimm_config_table[0], 0, |
| 9191 | DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff)); |
| 9192 | |
| 9193 | ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0, |
| 9194 | DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) << |
| 9195 | 8) + (read_spd(&dimm_config_table[0], 0, |
| 9196 | DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff)); |
| 9197 | |
| 9198 | ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9199 | DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) + |
| 9200 | spdftb * (signed char)read_spd(&dimm_config_table[0], 0, |
| 9201 | DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN); |
| 9202 | |
| 9203 | ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9204 | DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) + |
| 9205 | spdftb * (signed char)read_spd(&dimm_config_table[0], 0, |
| 9206 | DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN); |
| 9207 | |
| 9208 | ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0, |
| 9209 | DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) + |
| 9210 | spdftb * (signed char)read_spd(&dimm_config_table[0], 0, |
| 9211 | DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN); |
| 9212 | |
| 9213 | debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb); |
| 9214 | debug("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdftb); |
| 9215 | |
| 9216 | debug("%-45s : %6d ps (%ld MT/s)\n", |
| 9217 | "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin, |
| 9218 | pretty_psecs_to_mts(ddr4_tckavgmin)); |
| 9219 | debug("%-45s : %6d ps\n", |
| 9220 | "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax); |
| 9221 | debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)", |
| 9222 | taamin); |
| 9223 | debug("%-45s : %6d ps\n", |
| 9224 | "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin); |
| 9225 | debug("%-45s : %6d ps\n", |
| 9226 | "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin); |
| 9227 | debug("%-45s : %6d ps\n", |
| 9228 | "Minimum Active to Precharge Delay (tRASmin)", |
| 9229 | ddr4_trasmin); |
| 9230 | debug("%-45s : %6d ps\n", |
| 9231 | "Minimum Active to Active/Refr. Delay (tRCmin)", |
| 9232 | ddr4_trcmin); |
| 9233 | debug("%-45s : %6d ps\n", |
| 9234 | "Minimum Refresh Recovery Delay (tRFC1min)", |
| 9235 | ddr4_trfc1min); |
| 9236 | debug("%-45s : %6d ps\n", |
| 9237 | "Minimum Refresh Recovery Delay (tRFC2min)", |
| 9238 | ddr4_trfc2min); |
| 9239 | debug("%-45s : %6d ps\n", |
| 9240 | "Minimum Refresh Recovery Delay (tRFC4min)", |
| 9241 | ddr4_trfc4min); |
| 9242 | debug("%-45s : %6d ps\n", |
| 9243 | "Minimum Four Activate Window Time (tFAWmin)", |
| 9244 | ddr4_tfawmin); |
| 9245 | debug("%-45s : %6d ps\n", |
| 9246 | "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin); |
| 9247 | debug("%-45s : %6d ps\n", |
| 9248 | "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin); |
| 9249 | debug("%-45s : %6d ps\n", |
| 9250 | "Minimum CAS to CAS Delay Time (tCCD_Lmin)", |
| 9251 | ddr4_tccd_lmin); |
| 9252 | |
| 9253 | #define DDR4_TWR 15000 |
| 9254 | #define DDR4_TWTR_S 2500 |
| 9255 | |
| 9256 | tckmin = ddr4_tckavgmin; |
| 9257 | twr = DDR4_TWR; |
| 9258 | trcd = ddr4_trdcmin; |
| 9259 | trrd = ddr4_trrd_smin; |
| 9260 | trp = ddr4_trpmin; |
| 9261 | tras = ddr4_trasmin; |
| 9262 | trc = ddr4_trcmin; |
| 9263 | trfc = ddr4_trfc1min; |
| 9264 | twtr = DDR4_TWTR_S; |
| 9265 | tfaw = ddr4_tfawmin; |
| 9266 | |
| 9267 | if (spd_rdimm) { |
| 9268 | spd_addr_mirror = read_spd(&dimm_config_table[0], 0, |
| 9269 | DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) & |
| 9270 | 0x1; |
| 9271 | } else { |
| 9272 | spd_addr_mirror = read_spd(&dimm_config_table[0], 0, |
| 9273 | DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1; |
| 9274 | } |
| 9275 | debug("spd_addr_mirror : %#06x\n", spd_addr_mirror); |
| 9276 | } else { |
| 9277 | spd_mtb_dividend = |
| 9278 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9279 | DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND); |
| 9280 | spd_mtb_divisor = |
| 9281 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9282 | DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR); |
| 9283 | spd_tck_min = |
| 9284 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9285 | DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN); |
| 9286 | spd_taa_min = |
| 9287 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9288 | DDR3_SPD_MIN_CAS_LATENCY_TAAMIN); |
| 9289 | |
| 9290 | spd_twr = |
| 9291 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9292 | DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN); |
| 9293 | spd_trcd = |
| 9294 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9295 | DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN); |
| 9296 | spd_trrd = |
| 9297 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9298 | DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN); |
| 9299 | spd_trp = |
| 9300 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9301 | DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN); |
| 9302 | spd_tras = |
| 9303 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9304 | DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN); |
| 9305 | spd_tras |= |
| 9306 | ((0xff & |
| 9307 | read_spd(&dimm_config_table[0], 0, |
| 9308 | DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8); |
| 9309 | spd_trc = |
| 9310 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9311 | DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN); |
| 9312 | spd_trc |= |
| 9313 | ((0xff & |
| 9314 | read_spd(&dimm_config_table[0], 0, |
| 9315 | DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4); |
| 9316 | spd_trfc = |
| 9317 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9318 | DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN); |
| 9319 | spd_trfc |= |
| 9320 | ((0xff & |
| 9321 | read_spd(&dimm_config_table[0], 0, |
| 9322 | DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) << |
| 9323 | 8); |
| 9324 | spd_twtr = |
| 9325 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9326 | DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN); |
| 9327 | spd_trtp = |
| 9328 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9329 | DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN); |
| 9330 | spd_tfaw = |
| 9331 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9332 | DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN); |
| 9333 | spd_tfaw |= |
| 9334 | ((0xff & |
| 9335 | read_spd(&dimm_config_table[0], 0, |
| 9336 | DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8); |
| 9337 | spd_addr_mirror = |
| 9338 | 0xff & read_spd(&dimm_config_table[0], 0, |
| 9339 | DDR3_SPD_ADDRESS_MAPPING) & 0x1; |
| 9340 | /* Only address mirror unbuffered dimms. */ |
| 9341 | spd_addr_mirror = spd_addr_mirror && !spd_rdimm; |
| 9342 | ftb_dividend = |
| 9343 | read_spd(&dimm_config_table[0], 0, |
| 9344 | DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4; |
| 9345 | ftb_divisor = |
| 9346 | read_spd(&dimm_config_table[0], 0, |
| 9347 | DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf; |
| 9348 | /* Make sure that it is not 0 */ |
| 9349 | ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor; |
| 9350 | |
| 9351 | debug("spd_twr : %#06x\n", spd_twr); |
| 9352 | debug("spd_trcd : %#06x\n", spd_trcd); |
| 9353 | debug("spd_trrd : %#06x\n", spd_trrd); |
| 9354 | debug("spd_trp : %#06x\n", spd_trp); |
| 9355 | debug("spd_tras : %#06x\n", spd_tras); |
| 9356 | debug("spd_trc : %#06x\n", spd_trc); |
| 9357 | debug("spd_trfc : %#06x\n", spd_trfc); |
| 9358 | debug("spd_twtr : %#06x\n", spd_twtr); |
| 9359 | debug("spd_trtp : %#06x\n", spd_trtp); |
| 9360 | debug("spd_tfaw : %#06x\n", spd_tfaw); |
| 9361 | debug("spd_addr_mirror : %#06x\n", spd_addr_mirror); |
| 9362 | |
| 9363 | mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor; |
| 9364 | taamin = mtb_psec * spd_taa_min; |
| 9365 | taamin += ftb_dividend * |
| 9366 | (signed char)read_spd(&dimm_config_table[0], |
| 9367 | 0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) / |
| 9368 | ftb_divisor; |
| 9369 | tckmin = mtb_psec * spd_tck_min; |
| 9370 | tckmin += ftb_dividend * |
| 9371 | (signed char)read_spd(&dimm_config_table[0], |
| 9372 | 0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) / |
| 9373 | ftb_divisor; |
| 9374 | |
| 9375 | twr = spd_twr * mtb_psec; |
| 9376 | trcd = spd_trcd * mtb_psec; |
| 9377 | trrd = spd_trrd * mtb_psec; |
| 9378 | trp = spd_trp * mtb_psec; |
| 9379 | tras = spd_tras * mtb_psec; |
| 9380 | trc = spd_trc * mtb_psec; |
| 9381 | trfc = spd_trfc * mtb_psec; |
| 9382 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) { |
| 9383 | // default to this - because it works... |
| 9384 | int new_trfc = 260000; |
| 9385 | |
| 9386 | s = env_get("ddr_trfc"); |
| 9387 | if (s) { |
| 9388 | new_trfc = simple_strtoul(s, NULL, 0); |
| 9389 | printf("Parameter found in environment. ddr_trfc = %d\n", |
| 9390 | new_trfc); |
| 9391 | if (new_trfc < 160000 || new_trfc > 260000) { |
| 9392 | // back to default if out of range |
| 9393 | new_trfc = 260000; |
| 9394 | } |
| 9395 | } |
| 9396 | debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n", |
| 9397 | node, if_num, trfc, new_trfc); |
| 9398 | trfc = new_trfc; |
| 9399 | } |
| 9400 | |
| 9401 | twtr = spd_twtr * mtb_psec; |
| 9402 | trtp = spd_trtp * mtb_psec; |
| 9403 | tfaw = spd_tfaw * mtb_psec; |
| 9404 | |
| 9405 | debug("Medium Timebase (MTB) : %6d ps\n", |
| 9406 | mtb_psec); |
| 9407 | debug("Minimum Cycle Time (tckmin) : %6d ps (%ld MT/s)\n", |
| 9408 | tckmin, pretty_psecs_to_mts(tckmin)); |
| 9409 | debug("Minimum CAS Latency Time (taamin) : %6d ps\n", |
| 9410 | taamin); |
| 9411 | debug("Write Recovery Time (tWR) : %6d ps\n", |
| 9412 | twr); |
| 9413 | debug("Minimum RAS to CAS delay (tRCD) : %6d ps\n", |
| 9414 | trcd); |
| 9415 | debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n", |
| 9416 | trrd); |
| 9417 | debug("Minimum Row Precharge Delay (tRP) : %6d ps\n", |
| 9418 | trp); |
| 9419 | debug("Minimum Active to Precharge (tRAS) : %6d ps\n", |
| 9420 | tras); |
| 9421 | debug("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n", |
| 9422 | trc); |
| 9423 | debug("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n", |
| 9424 | trfc); |
| 9425 | debug("Internal write to read command delay (tWTR) : %6d ps\n", |
| 9426 | twtr); |
| 9427 | debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n", |
| 9428 | trtp); |
| 9429 | debug("Minimum Four Activate Window Delay (tFAW) : %6d ps\n", |
| 9430 | tfaw); |
| 9431 | } |
| 9432 | |
| 9433 | /* |
| 9434 | * When the cycle time is within 1 psec of the minimum accept it |
| 9435 | * as a slight rounding error and adjust it to exactly the minimum |
| 9436 | * cycle time. This avoids an unnecessary warning. |
| 9437 | */ |
| 9438 | if (abs(tclk_psecs - tckmin) < 2) |
| 9439 | tclk_psecs = tckmin; |
| 9440 | |
| 9441 | if (tclk_psecs < (u64)tckmin) { |
| 9442 | printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n", |
| 9443 | tclk_psecs, (ulong)tckmin); |
| 9444 | } |
| 9445 | |
| 9446 | debug("DDR Clock Rate (tCLK) : %6ld ps\n", |
| 9447 | tclk_psecs); |
| 9448 | debug("Core Clock Rate (eCLK) : %6ld ps\n", |
| 9449 | eclk_psecs); |
| 9450 | |
| 9451 | s = env_get("ddr_use_ecc"); |
| 9452 | if (s) { |
| 9453 | use_ecc = !!simple_strtoul(s, NULL, 0); |
| 9454 | printf("Parameter found in environment. ddr_use_ecc = %d\n", |
| 9455 | use_ecc); |
| 9456 | } |
| 9457 | use_ecc = use_ecc && spd_ecc; |
| 9458 | |
| 9459 | if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff) |
| 9460 | : (use_ecc ? 0x01f : 0x0f); |
| 9461 | |
| 9462 | debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n", |
| 9463 | if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask); |
| 9464 | |
| 9465 | debug("\n------ Board Custom Configuration Settings ------\n"); |
| 9466 | debug("%-45s : %d\n", "MIN_RTT_NOM_IDX ", c_cfg->min_rtt_nom_idx); |
| 9467 | debug("%-45s : %d\n", "MAX_RTT_NOM_IDX ", c_cfg->max_rtt_nom_idx); |
| 9468 | debug("%-45s : %d\n", "MIN_RODT_CTL ", c_cfg->min_rodt_ctl); |
| 9469 | debug("%-45s : %d\n", "MAX_RODT_CTL ", c_cfg->max_rodt_ctl); |
| 9470 | debug("%-45s : %d\n", "MIN_CAS_LATENCY ", c_cfg->min_cas_latency); |
| 9471 | debug("%-45s : %d\n", "OFFSET_EN ", c_cfg->offset_en); |
| 9472 | debug("%-45s : %d\n", "OFFSET_UDIMM ", c_cfg->offset_udimm); |
| 9473 | debug("%-45s : %d\n", "OFFSET_RDIMM ", c_cfg->offset_rdimm); |
| 9474 | debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", c_cfg->ddr_rtt_nom_auto); |
| 9475 | debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto); |
| 9476 | if (spd_rdimm) |
| 9477 | debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET", |
| 9478 | c_cfg->rlevel_comp_offset_rdimm); |
| 9479 | else |
| 9480 | debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET", |
| 9481 | c_cfg->rlevel_comp_offset_udimm); |
| 9482 | debug("%-45s : %d\n", "RLEVEL_COMPUTE ", c_cfg->rlevel_compute); |
| 9483 | debug("%-45s : %d\n", "DDR2T_UDIMM ", c_cfg->ddr2t_udimm); |
| 9484 | debug("%-45s : %d\n", "DDR2T_RDIMM ", c_cfg->ddr2t_rdimm); |
| 9485 | debug("%-45s : %d\n", "FPRCH2 ", c_cfg->fprch2); |
| 9486 | debug("%-45s : %d\n", "PTUNE_OFFSET ", c_cfg->ptune_offset); |
| 9487 | debug("%-45s : %d\n", "NTUNE_OFFSET ", c_cfg->ntune_offset); |
| 9488 | debug("-------------------------------------------------\n"); |
| 9489 | |
| 9490 | cl = divide_roundup(taamin, tclk_psecs); |
| 9491 | |
| 9492 | debug("Desired CAS Latency : %6d\n", cl); |
| 9493 | |
| 9494 | min_cas_latency = c_cfg->min_cas_latency; |
| 9495 | |
| 9496 | s = lookup_env(priv, "ddr_min_cas_latency"); |
| 9497 | if (s) |
| 9498 | min_cas_latency = simple_strtoul(s, NULL, 0); |
| 9499 | |
| 9500 | debug("CAS Latencies supported in DIMM :"); |
| 9501 | base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4; |
| 9502 | for (i = 0; i < 32; ++i) { |
| 9503 | if ((spd_cas_latency >> i) & 1) { |
| 9504 | debug(" %d", i + base_cl); |
| 9505 | max_cas_latency = i + base_cl; |
| 9506 | if (min_cas_latency == 0) |
| 9507 | min_cas_latency = i + base_cl; |
| 9508 | } |
| 9509 | } |
| 9510 | debug("\n"); |
| 9511 | |
| 9512 | /* |
| 9513 | * Use relaxed timing when running slower than the minimum |
| 9514 | * supported speed. Adjust timing to match the smallest supported |
| 9515 | * CAS Latency. |
| 9516 | */ |
| 9517 | if (min_cas_latency > cl) { |
| 9518 | ulong adjusted_tclk = taamin / min_cas_latency; |
| 9519 | |
| 9520 | cl = min_cas_latency; |
| 9521 | debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n", |
| 9522 | tclk_psecs, adjusted_tclk); |
| 9523 | tclk_psecs = adjusted_tclk; |
| 9524 | } |
| 9525 | |
| 9526 | s = env_get("ddr_cas_latency"); |
| 9527 | if (s) { |
| 9528 | override_cas_latency = simple_strtoul(s, NULL, 0); |
| 9529 | printf("Parameter found in environment. ddr_cas_latency = %d\n", |
| 9530 | override_cas_latency); |
| 9531 | } |
| 9532 | |
| 9533 | /* Make sure that the selected cas latency is legal */ |
| 9534 | for (i = (cl - base_cl); i < 32; ++i) { |
| 9535 | if ((spd_cas_latency >> i) & 1) { |
| 9536 | cl = i + base_cl; |
| 9537 | break; |
| 9538 | } |
| 9539 | } |
| 9540 | |
| 9541 | if (max_cas_latency < cl) |
| 9542 | cl = max_cas_latency; |
| 9543 | |
| 9544 | if (override_cas_latency != 0) |
| 9545 | cl = override_cas_latency; |
| 9546 | |
| 9547 | debug("CAS Latency : %6d\n", cl); |
| 9548 | |
| 9549 | if ((cl * tckmin) > 20000) { |
| 9550 | debug("(CLactual * tckmin) = %d exceeds 20 ns\n", |
| 9551 | (cl * tckmin)); |
| 9552 | } |
| 9553 | |
| 9554 | if (tclk_psecs < (ulong)tckmin) { |
| 9555 | printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n", |
| 9556 | tclk_psecs, (ulong)tckmin); |
| 9557 | } |
| 9558 | |
| 9559 | if (num_banks != 4 && num_banks != 8 && num_banks != 16) { |
| 9560 | printf("Unsupported number of banks %d. Must be 4 or 8.\n", |
| 9561 | num_banks); |
| 9562 | ++fatal_error; |
| 9563 | } |
| 9564 | |
| 9565 | if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) { |
| 9566 | printf("Unsupported number of ranks: %d\n", num_ranks); |
| 9567 | ++fatal_error; |
| 9568 | } |
| 9569 | |
| 9570 | if (octeon_is_cpuid(OCTEON_CN78XX) || |
| 9571 | octeon_is_cpuid(OCTEON_CN73XX) || |
| 9572 | octeon_is_cpuid(OCTEON_CNF75XX)) { |
| 9573 | if (dram_width != 8 && dram_width != 16 && dram_width != 4) { |
| 9574 | printf("Unsupported SDRAM Width, %d. Must be 4, 8 or 16.\n", |
| 9575 | dram_width); |
| 9576 | ++fatal_error; |
| 9577 | } |
| 9578 | } else if (dram_width != 8 && dram_width != 16) { |
| 9579 | printf("Unsupported SDRAM Width, %d. Must be 8 or 16.\n", |
| 9580 | dram_width); |
| 9581 | ++fatal_error; |
| 9582 | } |
| 9583 | |
| 9584 | /* |
| 9585 | ** Bail out here if things are not copasetic. |
| 9586 | */ |
| 9587 | if (fatal_error) |
| 9588 | return (-1); |
| 9589 | |
| 9590 | /* |
| 9591 | * 4.8.4 LMC RESET Initialization |
| 9592 | * |
| 9593 | * The purpose of this step is to assert/deassert the RESET# pin at the |
| 9594 | * DDR3/DDR4 parts. |
| 9595 | * |
| 9596 | * This LMC RESET step is done for all enabled LMCs. |
| 9597 | */ |
| 9598 | perform_lmc_reset(priv, node, if_num); |
| 9599 | |
| 9600 | // Make sure scrambling is disabled during init... |
| 9601 | ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num)); |
| 9602 | ctrl.s.scramble_ena = 0; |
| 9603 | lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64); |
| 9604 | |
| 9605 | lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0); |
| 9606 | lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0); |
| 9607 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) |
| 9608 | lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0); |
| 9609 | |
| 9610 | odt_idx = min(dimm_count - 1, 3); |
| 9611 | |
| 9612 | switch (num_ranks) { |
| 9613 | case 1: |
| 9614 | odt_config = odt_1rank_config; |
| 9615 | break; |
| 9616 | case 2: |
| 9617 | odt_config = odt_2rank_config; |
| 9618 | break; |
| 9619 | case 4: |
| 9620 | odt_config = odt_4rank_config; |
| 9621 | break; |
| 9622 | default: |
| 9623 | odt_config = disable_odt_config; |
| 9624 | printf("Unsupported number of ranks: %d\n", num_ranks); |
| 9625 | ++fatal_error; |
| 9626 | } |
| 9627 | |
| 9628 | /* |
| 9629 | * 4.8.5 Early LMC Initialization |
| 9630 | * |
| 9631 | * All of DDR PLL, LMC CK, and LMC DRESET initializations must be |
| 9632 | * completed prior to starting this LMC initialization sequence. |
| 9633 | * |
| 9634 | * Perform the following five substeps for early LMC initialization: |
| 9635 | * |
| 9636 | * 1. Software must ensure there are no pending DRAM transactions. |
| 9637 | * |
| 9638 | * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0, |
| 9639 | * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0, |
| 9640 | * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM, |
| 9641 | * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2, |
| 9642 | * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with |
| 9643 | * appropriate values. All sections in this chapter can be used to |
| 9644 | * derive proper register settings. |
| 9645 | */ |
| 9646 | |
| 9647 | /* LMC(0)_CONFIG */ |
| 9648 | lmc_config(priv); |
| 9649 | |
| 9650 | /* LMC(0)_CONTROL */ |
| 9651 | lmc_control(priv); |
| 9652 | |
| 9653 | /* LMC(0)_TIMING_PARAMS0 */ |
| 9654 | lmc_timing_params0(priv); |
| 9655 | |
| 9656 | /* LMC(0)_TIMING_PARAMS1 */ |
| 9657 | lmc_timing_params1(priv); |
| 9658 | |
| 9659 | /* LMC(0)_TIMING_PARAMS2 */ |
| 9660 | lmc_timing_params2(priv); |
| 9661 | |
| 9662 | /* LMC(0)_MODEREG_PARAMS0 */ |
| 9663 | lmc_modereg_params0(priv); |
| 9664 | |
| 9665 | /* LMC(0)_MODEREG_PARAMS1 */ |
| 9666 | lmc_modereg_params1(priv); |
| 9667 | |
| 9668 | /* LMC(0)_MODEREG_PARAMS2 */ |
| 9669 | lmc_modereg_params2(priv); |
| 9670 | |
| 9671 | /* LMC(0)_MODEREG_PARAMS3 */ |
| 9672 | lmc_modereg_params3(priv); |
| 9673 | |
| 9674 | /* LMC(0)_NXM */ |
| 9675 | lmc_nxm(priv); |
| 9676 | |
| 9677 | /* LMC(0)_WODT_MASK */ |
| 9678 | lmc_wodt_mask(priv); |
| 9679 | |
| 9680 | /* LMC(0)_RODT_MASK */ |
| 9681 | lmc_rodt_mask(priv); |
| 9682 | |
| 9683 | /* LMC(0)_COMP_CTL2 */ |
| 9684 | lmc_comp_ctl2(priv); |
| 9685 | |
| 9686 | /* LMC(0)_PHY_CTL */ |
| 9687 | lmc_phy_ctl(priv); |
| 9688 | |
| 9689 | /* LMC(0)_EXT_CONFIG */ |
| 9690 | lmc_ext_config(priv); |
| 9691 | |
| 9692 | /* LMC(0)_EXT_CONFIG2 */ |
| 9693 | lmc_ext_config2(priv); |
| 9694 | |
| 9695 | /* LMC(0)_DIMM0/1_PARAMS */ |
| 9696 | lmc_dimm01_params(priv); |
| 9697 | |
| 9698 | ret = lmc_rank_init(priv); |
| 9699 | if (ret < 0) |
| 9700 | return 0; /* 0 indicates problem */ |
| 9701 | |
| 9702 | lmc_config_2(priv); |
| 9703 | |
| 9704 | lmc_write_leveling(priv); |
| 9705 | |
| 9706 | lmc_read_leveling(priv); |
| 9707 | |
| 9708 | lmc_workaround(priv); |
| 9709 | |
| 9710 | ret = lmc_sw_write_leveling(priv); |
| 9711 | if (ret < 0) |
| 9712 | return 0; /* 0 indicates problem */ |
| 9713 | |
| 9714 | // this sometimes causes stack overflow crashes.. |
| 9715 | // display only for DDR4 RDIMMs. |
| 9716 | if (ddr_type == DDR4_DRAM && spd_rdimm) { |
| 9717 | int i; |
| 9718 | |
| 9719 | for (i = 0; i < 3; i += 2) // just pages 0 and 2 for now.. |
| 9720 | display_mpr_page(priv, rank_mask, if_num, i); |
| 9721 | } |
| 9722 | |
| 9723 | lmc_dll(priv); |
| 9724 | |
| 9725 | lmc_workaround_2(priv); |
| 9726 | |
| 9727 | lmc_final(priv); |
| 9728 | |
| 9729 | lmc_scrambling(priv); |
| 9730 | |
| 9731 | return mem_size_mbytes; |
| 9732 | } |
| 9733 | |
| 9734 | ///// HW-assist byte DLL offset tuning ////// |
| 9735 | |
| 9736 | static int cvmx_dram_get_num_lmc(struct ddr_priv *priv) |
| 9737 | { |
| 9738 | union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2; |
| 9739 | |
| 9740 | if (octeon_is_cpuid(OCTEON_CN70XX)) |
| 9741 | return 1; |
| 9742 | |
| 9743 | if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) { |
| 9744 | // sample LMC1 |
| 9745 | lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1)); |
| 9746 | if (lmcx_dll_ctl2.cn78xx.intf_en) |
| 9747 | return 2; |
| 9748 | else |
| 9749 | return 1; |
| 9750 | } |
| 9751 | |
| 9752 | // for CN78XX, LMCs are always active in pairs, and always LMC0/1 |
| 9753 | // so, we sample LMC2 to see if 2 and 3 are active |
| 9754 | lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2)); |
| 9755 | if (lmcx_dll_ctl2.cn78xx.intf_en) |
| 9756 | return 4; |
| 9757 | else |
| 9758 | return 2; |
| 9759 | } |
| 9760 | |
| 9761 | // got to do these here, even though already defined in BDK |
| 9762 | |
| 9763 | // all DDR3, and DDR4 x16 today, use only 3 bank bits; |
| 9764 | // DDR4 x4 and x8 always have 4 bank bits |
| 9765 | // NOTE: this will change in the future, when DDR4 x16 devices can |
| 9766 | // come with 16 banks!! FIXME!! |
| 9767 | static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc) |
| 9768 | { |
| 9769 | union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2; |
| 9770 | union cvmx_lmcx_config lmcx_config; |
| 9771 | union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl; |
| 9772 | int bank_width; |
| 9773 | |
| 9774 | // can always read this |
| 9775 | lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc)); |
| 9776 | |
| 9777 | if (lmcx_dll_ctl2.cn78xx.dreset) // check LMCn |
| 9778 | return 0; |
| 9779 | |
| 9780 | lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc)); |
| 9781 | lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc)); |
| 9782 | |
| 9783 | bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) && |
| 9784 | (lmcx_config.s.bg2_enable)) ? 4 : 3; |
| 9785 | |
| 9786 | return bank_width; |
| 9787 | } |
| 9788 | |
| 9789 | #define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1)) |
| 9790 | #define ADDRESS_HOLE 0x10000000ULL |
| 9791 | |
| 9792 | static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address, |
| 9793 | int *node, int *lmc, int *dimm, |
| 9794 | int *prank, int *lrank, int *bank, |
| 9795 | int *row, int *col) |
| 9796 | { |
| 9797 | int bank_lsb, xbits; |
| 9798 | union cvmx_l2c_ctl l2c_ctl; |
| 9799 | union cvmx_lmcx_config lmcx_config; |
| 9800 | union cvmx_lmcx_control lmcx_control; |
| 9801 | union cvmx_lmcx_ext_config ext_config; |
| 9802 | int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18; |
| 9803 | int bank_width; |
| 9804 | int dimm_lsb; |
| 9805 | int dimm_width; |
| 9806 | int prank_lsb, lrank_lsb; |
| 9807 | int prank_width, lrank_width; |
| 9808 | int row_lsb; |
| 9809 | int row_width; |
| 9810 | int col_hi_lsb; |
| 9811 | int col_hi_width; |
| 9812 | int col_hi; |
| 9813 | |
| 9814 | if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) |
| 9815 | bitno = 18; |
| 9816 | |
| 9817 | *node = EXTRACT(address, 40, 2); /* Address bits [41:40] */ |
| 9818 | |
| 9819 | address &= (1ULL << 40) - 1; // lop off any node bits or above |
| 9820 | if (address >= ADDRESS_HOLE) // adjust down if at HOLE or above |
| 9821 | address -= ADDRESS_HOLE; |
| 9822 | |
| 9823 | /* Determine the LMC controllers */ |
Stefan Roese | b0f4ba0 | 2020-12-11 17:05:56 +0100 | [diff] [blame] | 9824 | l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL); |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 9825 | |
| 9826 | /* xbits depends on number of LMCs */ |
| 9827 | xbits = cvmx_dram_get_num_lmc(priv) >> 1; // 4->2, 2->1, 1->0 |
| 9828 | bank_lsb = 7 + xbits; |
| 9829 | |
| 9830 | /* LMC number is probably aliased */ |
| 9831 | if (l2c_ctl.s.disidxalias) { |
| 9832 | *lmc = EXTRACT(address, 7, xbits); |
| 9833 | } else { |
| 9834 | *lmc = EXTRACT(address, 7, xbits) ^ |
| 9835 | EXTRACT(address, bitno, xbits) ^ |
| 9836 | EXTRACT(address, 12, xbits); |
| 9837 | } |
| 9838 | |
| 9839 | /* Figure out the bank field width */ |
| 9840 | lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc)); |
| 9841 | ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc)); |
| 9842 | bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc); |
| 9843 | |
| 9844 | /* Extract additional info from the LMC_CONFIG CSR */ |
| 9845 | dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits; |
| 9846 | dimm_width = 40 - dimm_lsb; |
| 9847 | prank_lsb = dimm_lsb - lmcx_config.s.rank_ena; |
| 9848 | prank_width = dimm_lsb - prank_lsb; |
| 9849 | lrank_lsb = prank_lsb - ext_config.s.dimm0_cid; |
| 9850 | lrank_width = prank_lsb - lrank_lsb; |
| 9851 | row_lsb = 14 + lmcx_config.s.row_lsb + xbits; |
| 9852 | row_width = lrank_lsb - row_lsb; |
| 9853 | col_hi_lsb = bank_lsb + bank_width; |
| 9854 | col_hi_width = row_lsb - col_hi_lsb; |
| 9855 | |
| 9856 | /* Extract the parts of the address */ |
| 9857 | *dimm = EXTRACT(address, dimm_lsb, dimm_width); |
| 9858 | *prank = EXTRACT(address, prank_lsb, prank_width); |
| 9859 | *lrank = EXTRACT(address, lrank_lsb, lrank_width); |
| 9860 | *row = EXTRACT(address, row_lsb, row_width); |
| 9861 | |
| 9862 | /* bank calculation may be aliased... */ |
| 9863 | lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc)); |
| 9864 | if (lmcx_control.s.xor_bank) { |
| 9865 | *bank = EXTRACT(address, bank_lsb, bank_width) ^ |
| 9866 | EXTRACT(address, 12 + xbits, bank_width); |
| 9867 | } else { |
| 9868 | *bank = EXTRACT(address, bank_lsb, bank_width); |
| 9869 | } |
| 9870 | |
| 9871 | /* LMC number already extracted */ |
| 9872 | col_hi = EXTRACT(address, col_hi_lsb, col_hi_width); |
| 9873 | *col = EXTRACT(address, 3, 4) | (col_hi << 4); |
| 9874 | /* Bus byte is address bits [2:0]. Unused here */ |
| 9875 | } |
| 9876 | |
| 9877 | // end of added workarounds |
| 9878 | |
| 9879 | // NOTE: "mode" argument: |
| 9880 | // DBTRAIN_TEST: for testing using GP patterns, includes ECC |
| 9881 | // DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns) |
| 9882 | // DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC |
| 9883 | // NOTE: trust the caller to specify the correct/supported mode |
| 9884 | // |
| 9885 | static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p, |
| 9886 | int mode, u64 *xor_data) |
| 9887 | { |
| 9888 | u64 p1; |
| 9889 | u64 k; |
| 9890 | int errors = 0; |
| 9891 | |
| 9892 | u64 mpr_data0, mpr_data1; |
| 9893 | u64 bad_bits[2] = { 0, 0 }; |
| 9894 | |
| 9895 | int node_address, lmc, dimm; |
| 9896 | int prank, lrank; |
| 9897 | int bank, row, col; |
| 9898 | int save_or_dis; |
| 9899 | int byte; |
| 9900 | int ba_loop, ba_bits; |
| 9901 | |
| 9902 | union cvmx_lmcx_rlevel_ctl rlevel_ctl; |
| 9903 | union cvmx_lmcx_dbtrain_ctl dbtrain_ctl; |
| 9904 | union cvmx_lmcx_phy_ctl phy_ctl; |
| 9905 | |
| 9906 | int biter_errs; |
| 9907 | |
| 9908 | // FIXME: K iterations set to 4 for now. |
| 9909 | // FIXME: decrement to increase interations. |
| 9910 | // FIXME: must be no less than 22 to stay above an LMC hash field. |
| 9911 | int kshift = 27; |
| 9912 | |
| 9913 | const char *s; |
| 9914 | int node = 0; |
| 9915 | |
| 9916 | // allow override default setting for kshift |
| 9917 | s = env_get("ddr_tune_set_kshift"); |
| 9918 | if (s) { |
| 9919 | int temp = simple_strtoul(s, NULL, 0); |
| 9920 | |
| 9921 | if (temp < 22 || temp > 28) { |
| 9922 | debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n", |
| 9923 | node, if_num, temp, kshift); |
| 9924 | } else { |
| 9925 | debug("N%d.LMC%d: overriding kshift (%d) to %d\n", |
| 9926 | node, if_num, kshift, temp); |
| 9927 | kshift = temp; |
| 9928 | } |
| 9929 | } |
| 9930 | |
| 9931 | /* |
| 9932 | * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0. |
| 9933 | */ |
| 9934 | rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num)); |
| 9935 | save_or_dis = rlevel_ctl.s.or_dis; |
| 9936 | /* or_dis must be disabled for this sequence */ |
| 9937 | rlevel_ctl.s.or_dis = 0; |
| 9938 | lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64); |
| 9939 | |
| 9940 | /* |
| 9941 | * NOTE: this step done in the calling routine(s)... |
| 9942 | * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern |
| 9943 | * of choice. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 9944 | * a. GENERAL_PURPOSE0[DATA<63:0>] - sets the initial lower |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 9945 | * (rising edge) 64 bits of data. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 9946 | * b. GENERAL_PURPOSE1[DATA<63:0>] - sets the initial upper |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 9947 | * (falling edge) 64 bits of data. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 9948 | * c. GENERAL_PURPOSE2[DATA<15:0>] - sets the initial lower |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 9949 | * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data. |
| 9950 | */ |
| 9951 | |
| 9952 | // final address must include LMC and node |
| 9953 | p |= (if_num << 7); /* Map address into proper interface */ |
| 9954 | p |= (u64)node << CVMX_NODE_MEM_SHIFT; // map to node |
| 9955 | |
| 9956 | /* |
| 9957 | * Add base offset to both test regions to not clobber u-boot stuff |
| 9958 | * when running from L2 for NAND boot. |
| 9959 | */ |
| 9960 | p += 0x20000000; // offset to 512MB, ie above THE HOLE!!! |
| 9961 | p |= 1ull << 63; // needed for OCTEON |
| 9962 | |
| 9963 | errors = 0; |
| 9964 | |
| 9965 | cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm, |
| 9966 | &prank, &lrank, &bank, &row, &col); |
| 9967 | debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n", |
| 9968 | __func__, p, node_address, lmc, dimm, prank, lrank, bank, |
| 9969 | row, col); |
| 9970 | |
| 9971 | // only check once per call, and ignore if no match... |
| 9972 | if ((int)node != node_address) { |
| 9973 | printf("ERROR: Node address mismatch\n"); |
| 9974 | return 0; |
| 9975 | } |
| 9976 | if (lmc != if_num) { |
| 9977 | printf("ERROR: LMC address mismatch\n"); |
| 9978 | return 0; |
| 9979 | } |
| 9980 | |
| 9981 | /* |
| 9982 | * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 9983 | * it's a one-shot operation). This is to get into the habit of |
| 9984 | * resetting PHY's SILO to the original 0 location. |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 9985 | */ |
| 9986 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 9987 | phy_ctl.s.phy_reset = 1; |
| 9988 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 9989 | |
| 9990 | /* |
| 9991 | * Walk through a range of addresses avoiding bits that alias |
| 9992 | * interfaces on the CN88XX. |
| 9993 | */ |
| 9994 | |
| 9995 | // FIXME: want to try to keep the K increment from affecting the |
| 9996 | // LMC via hash, so keep it above bit 21 we also want to keep k |
| 9997 | // less than the base offset of bit 29 (512MB) |
| 9998 | |
| 9999 | for (k = 0; k < (1UL << 29); k += (1UL << kshift)) { |
| 10000 | // FIXME: the sequence will interate over 1/2 cacheline |
| 10001 | // FIXME: for each unit specified in "read_cmd_count", |
| 10002 | // FIXME: so, we setup each sequence to do the max cachelines |
| 10003 | // it can |
| 10004 | |
| 10005 | p1 = p + k; |
| 10006 | |
| 10007 | cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc, |
| 10008 | &dimm, &prank, &lrank, &bank, |
| 10009 | &row, &col); |
| 10010 | |
| 10011 | /* |
| 10012 | * 2) Setup the fields of the CSR DBTRAIN_CTL as follows: |
| 10013 | * a. COL, ROW, BA, BG, PRANK points to the starting point |
| 10014 | * of the address. |
| 10015 | * You can just set them to all 0. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 10016 | * b. RW_TRAIN - set this to 1. |
| 10017 | * c. TCCD_L - set this to 0. |
| 10018 | * d. READ_CMD_COUNT - instruct the sequence to the how many |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 10019 | * writes/reads. |
| 10020 | * It is 5 bits field, so set to 31 of maximum # of r/w. |
| 10021 | */ |
| 10022 | dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num)); |
| 10023 | dbtrain_ctl.s.column_a = col; |
| 10024 | dbtrain_ctl.s.row_a = row; |
| 10025 | dbtrain_ctl.s.bg = (bank >> 2) & 3; |
| 10026 | dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME? |
| 10027 | dbtrain_ctl.s.lrank = lrank; // FIXME? |
| 10028 | dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI); |
| 10029 | dbtrain_ctl.s.write_ena = 1; |
| 10030 | dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x |
| 10031 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) || |
| 10032 | octeon_is_cpuid(OCTEON_CNF75XX)) { |
| 10033 | // max count on chips that support it |
| 10034 | dbtrain_ctl.s.cmd_count_ext = 3; |
| 10035 | } else { |
| 10036 | // max count pass 1.x |
| 10037 | dbtrain_ctl.s.cmd_count_ext = 0; |
| 10038 | } |
| 10039 | |
| 10040 | dbtrain_ctl.s.rw_train = 1; |
| 10041 | dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI); |
| 10042 | // LFSR should only be on when chip supports it... |
| 10043 | dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0; |
| 10044 | |
| 10045 | biter_errs = 0; |
| 10046 | |
| 10047 | // for each address, iterate over the 4 "banks" in the BA |
| 10048 | for (ba_loop = 0, ba_bits = bank & 3; |
| 10049 | ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) { |
| 10050 | dbtrain_ctl.s.ba = ba_bits; |
| 10051 | lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num), |
| 10052 | dbtrain_ctl.u64); |
| 10053 | |
| 10054 | /* |
| 10055 | * We will use the RW_TRAINING sequence (14) for |
| 10056 | * this task. |
| 10057 | * |
| 10058 | * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14, |
| 10059 | * SEQ_CTL[INIT_START] = 1). |
| 10060 | * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion. |
| 10061 | */ |
| 10062 | oct3_ddr3_seq(priv, prank, if_num, 14); |
| 10063 | |
| 10064 | /* |
| 10065 | * 6) Read MPR_DATA0 and MPR_DATA1 for results. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 10066 | * a. MPR_DATA0[MPR_DATA<63:0>] - comparison results |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 10067 | * for DQ63:DQ0. (1 means MATCH, 0 means FAIL). |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 10068 | * b. MPR_DATA1[MPR_DATA<7:0>] - comparison results |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 10069 | * for ECC bit7:0. |
| 10070 | */ |
| 10071 | mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num)); |
| 10072 | mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num)); |
| 10073 | |
| 10074 | /* |
| 10075 | * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 10076 | * clears this as it's a one-shot operation). |
| 10077 | * This is to get into the habit of resetting PHY's |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 10078 | * SILO to the original 0 location. |
| 10079 | */ |
| 10080 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num)); |
| 10081 | phy_ctl.s.phy_reset = 1; |
| 10082 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64); |
| 10083 | |
| 10084 | // bypass any error checking or updating when DBI mode |
| 10085 | if (mode == DBTRAIN_DBI) |
| 10086 | continue; |
| 10087 | |
| 10088 | // data bytes |
| 10089 | if (~mpr_data0) { |
| 10090 | for (byte = 0; byte < 8; byte++) { |
| 10091 | if ((~mpr_data0 >> (8 * byte)) & 0xffUL) |
| 10092 | biter_errs |= (1 << byte); |
| 10093 | } |
| 10094 | // accumulate bad bits |
| 10095 | bad_bits[0] |= ~mpr_data0; |
| 10096 | } |
| 10097 | |
| 10098 | // include ECC byte errors |
| 10099 | if (~mpr_data1 & 0xffUL) { |
| 10100 | biter_errs |= (1 << 8); |
| 10101 | bad_bits[1] |= ~mpr_data1 & 0xffUL; |
| 10102 | } |
| 10103 | } |
| 10104 | |
| 10105 | errors |= biter_errs; |
| 10106 | } /* end for (k=...) */ |
| 10107 | |
| 10108 | rlevel_ctl.s.or_dis = save_or_dis; |
| 10109 | lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64); |
| 10110 | |
| 10111 | // send the bad bits back... |
| 10112 | if (mode != DBTRAIN_DBI && xor_data) { |
| 10113 | xor_data[0] = bad_bits[0]; |
| 10114 | xor_data[1] = bad_bits[1]; |
| 10115 | } |
| 10116 | |
| 10117 | return errors; |
| 10118 | } |
| 10119 | |
| 10120 | // setup default for byte test pattern array |
| 10121 | // take these from the HRM section 6.9.13 |
| 10122 | static const u64 byte_pattern_0[] = { |
| 10123 | 0xFFAAFFFFFF55FFFFULL, // GP0 |
| 10124 | 0x55555555AAAAAAAAULL, // GP1 |
| 10125 | 0xAA55AAAAULL, // GP2 |
| 10126 | }; |
| 10127 | |
| 10128 | static const u64 byte_pattern_1[] = { |
| 10129 | 0xFBF7EFDFBF7FFEFDULL, // GP0 |
| 10130 | 0x0F1E3C78F0E1C387ULL, // GP1 |
| 10131 | 0xF0E1BF7FULL, // GP2 |
| 10132 | }; |
| 10133 | |
| 10134 | // this is from Andrew via LFSR with PRBS=0xFFFFAAAA |
| 10135 | static const u64 byte_pattern_2[] = { |
| 10136 | 0xEE55AADDEE55AADDULL, // GP0 |
| 10137 | 0x55AADDEE55AADDEEULL, // GP1 |
| 10138 | 0x55EEULL, // GP2 |
| 10139 | }; |
| 10140 | |
| 10141 | // this is from Mike via LFSR with PRBS=0x4A519909 |
| 10142 | static const u64 byte_pattern_3[] = { |
| 10143 | 0x0088CCEE0088CCEEULL, // GP0 |
| 10144 | 0xBB552211BB552211ULL, // GP1 |
| 10145 | 0xBB00ULL, // GP2 |
| 10146 | }; |
| 10147 | |
| 10148 | static const u64 *byte_patterns[4] = { |
| 10149 | byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3 |
| 10150 | }; |
| 10151 | |
| 10152 | static const u32 lfsr_patterns[4] = { |
| 10153 | 0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL |
| 10154 | }; |
| 10155 | |
| 10156 | #define NUM_BYTE_PATTERNS 4 |
| 10157 | |
| 10158 | #define DEFAULT_BYTE_BURSTS 32 // compromise between time and rigor |
| 10159 | |
| 10160 | static void setup_hw_pattern(struct ddr_priv *priv, int lmc, |
| 10161 | const u64 *pattern_p) |
| 10162 | { |
| 10163 | /* |
| 10164 | * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern |
| 10165 | * of choice. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 10166 | * a. GENERAL_PURPOSE0[DATA<63:0>] - sets the initial lower |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 10167 | * (rising edge) 64 bits of data. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 10168 | * b. GENERAL_PURPOSE1[DATA<63:0>] - sets the initial upper |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 10169 | * (falling edge) 64 bits of data. |
Heinrich Schuchardt | 2e66ecb | 2022-01-16 23:11:58 +0100 | [diff] [blame] | 10170 | * c. GENERAL_PURPOSE2[DATA<15:0>] - sets the initial lower |
Aaron Williams | 61674a1 | 2020-09-02 08:29:07 +0200 | [diff] [blame] | 10171 | * (rising edge <7:0>) and upper |
| 10172 | * (falling edge <15:8>) ECC data. |
| 10173 | */ |
| 10174 | lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]); |
| 10175 | lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]); |
| 10176 | lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]); |
| 10177 | } |
| 10178 | |
| 10179 | static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data) |
| 10180 | { |
| 10181 | union cvmx_lmcx_char_ctl char_ctl; |
| 10182 | u32 prbs; |
| 10183 | const char *s; |
| 10184 | |
| 10185 | s = env_get("ddr_lfsr_prbs"); |
| 10186 | if (s) |
| 10187 | prbs = simple_strtoul(s, NULL, 0); |
| 10188 | else |
| 10189 | prbs = data; |
| 10190 | |
| 10191 | /* |
| 10192 | * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1 |
| 10193 | * here data comes from the LFSR generating a PRBS pattern |
| 10194 | * CHAR_CTL.EN = 0 |
| 10195 | * CHAR_CTL.SEL = 0; // for PRBS |
| 10196 | * CHAR_CTL.DR = 1; |
| 10197 | * CHAR_CTL.PRBS = setup for whatever type of PRBS to send |
| 10198 | * CHAR_CTL.SKEW_ON = 1; |
| 10199 | */ |
| 10200 | char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc)); |
| 10201 | char_ctl.s.en = 0; |
| 10202 | char_ctl.s.sel = 0; |
| 10203 | char_ctl.s.dr = 1; |
| 10204 | char_ctl.s.prbs = prbs; |
| 10205 | char_ctl.s.skew_on = 1; |
| 10206 | lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64); |
| 10207 | } |
| 10208 | |
| 10209 | static int choose_best_hw_patterns(int lmc, int mode) |
| 10210 | { |
| 10211 | int new_mode = mode; |
| 10212 | const char *s; |
| 10213 | |
| 10214 | switch (mode) { |
| 10215 | case DBTRAIN_TEST: // always choose LFSR if chip supports it |
| 10216 | if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) { |
| 10217 | int lfsr_enable = 1; |
| 10218 | |
| 10219 | s = env_get("ddr_allow_lfsr"); |
| 10220 | if (s) { |
| 10221 | // override? |
| 10222 | lfsr_enable = !!strtoul(s, NULL, 0); |
| 10223 | } |
| 10224 | |
| 10225 | if (lfsr_enable) |
| 10226 | new_mode = DBTRAIN_LFSR; |
| 10227 | } |
| 10228 | break; |
| 10229 | |
| 10230 | case DBTRAIN_DBI: // possibly can allow LFSR use? |
| 10231 | break; |
| 10232 | |
| 10233 | case DBTRAIN_LFSR: // forced already |
| 10234 | if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) { |
| 10235 | debug("ERROR: illegal HW assist mode %d\n", mode); |
| 10236 | new_mode = DBTRAIN_TEST; |
| 10237 | } |
| 10238 | break; |
| 10239 | |
| 10240 | default: |
| 10241 | debug("ERROR: unknown HW assist mode %d\n", mode); |
| 10242 | } |
| 10243 | |
| 10244 | if (new_mode != mode) |
| 10245 | debug("%s: changing mode %d to %d\n", __func__, mode, new_mode); |
| 10246 | |
| 10247 | return new_mode; |
| 10248 | } |
| 10249 | |
| 10250 | int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr, |
| 10251 | int mode, u64 *xor_data) |
| 10252 | { |
| 10253 | int pattern; |
| 10254 | const u64 *pattern_p; |
| 10255 | int errs, errors = 0; |
| 10256 | |
| 10257 | // FIXME? always choose LFSR if chip supports it??? |
| 10258 | mode = choose_best_hw_patterns(lmc, mode); |
| 10259 | |
| 10260 | for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { |
| 10261 | if (mode == DBTRAIN_LFSR) { |
| 10262 | setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]); |
| 10263 | } else { |
| 10264 | pattern_p = byte_patterns[pattern]; |
| 10265 | setup_hw_pattern(priv, lmc, pattern_p); |
| 10266 | } |
| 10267 | errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data); |
| 10268 | |
| 10269 | debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n", |
| 10270 | __func__, pattern, phys_addr, errs); |
| 10271 | |
| 10272 | errors |= errs; |
| 10273 | } |
| 10274 | |
| 10275 | return errors; |
| 10276 | } |
| 10277 | |
| 10278 | static void hw_assist_test_dll_offset(struct ddr_priv *priv, |
| 10279 | int dll_offset_mode, int lmc, |
| 10280 | int bytelane, |
| 10281 | int if_64b, |
| 10282 | u64 dram_tune_rank_offset, |
| 10283 | int dram_tune_byte_bursts) |
| 10284 | { |
| 10285 | int byte_offset, new_best_offset[9]; |
| 10286 | int rank_delay_start[4][9]; |
| 10287 | int rank_delay_count[4][9]; |
| 10288 | int rank_delay_best_start[4][9]; |
| 10289 | int rank_delay_best_count[4][9]; |
| 10290 | int errors[4], off_errors, tot_errors; |
| 10291 | int rank_mask, rankx, active_ranks; |
| 10292 | int pattern; |
| 10293 | const u64 *pattern_p; |
| 10294 | int byte; |
| 10295 | char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write"; |
| 10296 | int pat_best_offset[9]; |
| 10297 | u64 phys_addr; |
| 10298 | int pat_beg, pat_end; |
| 10299 | int rank_beg, rank_end; |
| 10300 | int byte_lo, byte_hi; |
| 10301 | union cvmx_lmcx_config lmcx_config; |
| 10302 | u64 hw_rank_offset; |
| 10303 | int num_lmcs = cvmx_dram_get_num_lmc(priv); |
| 10304 | // FIXME? always choose LFSR if chip supports it??? |
| 10305 | int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST); |
| 10306 | int node = 0; |
| 10307 | |
| 10308 | if (bytelane == 0x0A) { // all bytelanes |
| 10309 | byte_lo = 0; |
| 10310 | byte_hi = 8; |
| 10311 | } else { // just 1 |
| 10312 | byte_lo = bytelane; |
| 10313 | byte_hi = bytelane; |
| 10314 | } |
| 10315 | |
| 10316 | lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); |
| 10317 | rank_mask = lmcx_config.s.init_status; |
| 10318 | |
| 10319 | // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs |
| 10320 | hw_rank_offset = |
| 10321 | 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + |
| 10322 | (num_lmcs / 2)); |
| 10323 | |
| 10324 | debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n", |
| 10325 | node, __func__, lmc, (unsigned long long)hw_rank_offset); |
| 10326 | |
| 10327 | // start of pattern loop |
| 10328 | // we do the set of tests for each pattern supplied... |
| 10329 | |
| 10330 | memset(new_best_offset, 0, sizeof(new_best_offset)); |
| 10331 | for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) { |
| 10332 | memset(pat_best_offset, 0, sizeof(pat_best_offset)); |
| 10333 | |
| 10334 | if (mode == DBTRAIN_TEST) { |
| 10335 | pattern_p = byte_patterns[pattern]; |
| 10336 | setup_hw_pattern(priv, lmc, pattern_p); |
| 10337 | } else { |
| 10338 | setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]); |
| 10339 | } |
| 10340 | |
| 10341 | // now loop through all legal values for the DLL byte offset... |
| 10342 | |
| 10343 | #define BYTE_OFFSET_INCR 3 // FIXME: make this tunable? |
| 10344 | |
| 10345 | tot_errors = 0; |
| 10346 | |
| 10347 | memset(rank_delay_count, 0, sizeof(rank_delay_count)); |
| 10348 | memset(rank_delay_start, 0, sizeof(rank_delay_start)); |
| 10349 | memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count)); |
| 10350 | memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start)); |
| 10351 | |
| 10352 | for (byte_offset = -63; byte_offset < 64; |
| 10353 | byte_offset += BYTE_OFFSET_INCR) { |
| 10354 | // do the setup on the active LMC |
| 10355 | // set the bytelanes DLL offsets |
| 10356 | change_dll_offset_enable(priv, lmc, 0); |
| 10357 | // FIXME? bytelane? |
| 10358 | load_dll_offset(priv, lmc, dll_offset_mode, |
| 10359 | byte_offset, bytelane); |
| 10360 | change_dll_offset_enable(priv, lmc, 1); |
| 10361 | |
| 10362 | //bdk_watchdog_poke(); |
| 10363 | |
| 10364 | // run the test on each rank |
| 10365 | // only 1 call per rank should be enough, let the |
| 10366 | // bursts, loops, etc, control the load... |
| 10367 | |
| 10368 | // errors for this byte_offset, all ranks |
| 10369 | off_errors = 0; |
| 10370 | |
| 10371 | active_ranks = 0; |
| 10372 | |
| 10373 | for (rankx = 0; rankx < 4; rankx++) { |
| 10374 | if (!(rank_mask & (1 << rankx))) |
| 10375 | continue; |
| 10376 | |
| 10377 | phys_addr = hw_rank_offset * active_ranks; |
| 10378 | // FIXME: now done by test_dram_byte_hw() |
| 10379 | //phys_addr |= (lmc << 7); |
| 10380 | //phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT; |
| 10381 | |
| 10382 | active_ranks++; |
| 10383 | |
| 10384 | // NOTE: return is a now a bitmask of the |
| 10385 | // erroring bytelanes. |
| 10386 | errors[rankx] = |
| 10387 | test_dram_byte_hw(priv, lmc, phys_addr, |
| 10388 | mode, NULL); |
| 10389 | |
| 10390 | // process any errors in the bytelane(s) that |
| 10391 | // are being tested |
| 10392 | for (byte = byte_lo; byte <= byte_hi; byte++) { |
| 10393 | // check errors |
| 10394 | // yes, an error in the byte lane in |
| 10395 | // this rank |
| 10396 | if (errors[rankx] & (1 << byte)) { |
| 10397 | off_errors |= (1 << byte); |
| 10398 | |
| 10399 | debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n", |
| 10400 | node, lmc, rankx, byte, |
| 10401 | mode_str, byte_offset, |
| 10402 | phys_addr); |
| 10403 | |
| 10404 | // had started run |
| 10405 | if (rank_delay_count |
| 10406 | [rankx][byte] > 0) { |
| 10407 | debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n", |
| 10408 | node, lmc, rankx, |
| 10409 | byte, mode_str, |
| 10410 | byte_offset); |
| 10411 | // stop now |
| 10412 | rank_delay_count |
| 10413 | [rankx][byte] = |
| 10414 | 0; |
| 10415 | } |
| 10416 | // FIXME: else had not started |
| 10417 | // run - nothing else to do? |
| 10418 | } else { |
| 10419 | // no error in the byte lane |
| 10420 | // first success, set run start |
| 10421 | if (rank_delay_count[rankx] |
| 10422 | [byte] == 0) { |
| 10423 | debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n", |
| 10424 | node, lmc, rankx, |
| 10425 | byte, mode_str, |
| 10426 | byte_offset); |
| 10427 | rank_delay_start[rankx] |
| 10428 | [byte] = |
| 10429 | byte_offset; |
| 10430 | } |
| 10431 | // bump run length |
| 10432 | rank_delay_count[rankx][byte] |
| 10433 | += BYTE_OFFSET_INCR; |
| 10434 | |
| 10435 | // is this now the biggest |
| 10436 | // window? |
| 10437 | if (rank_delay_count[rankx] |
| 10438 | [byte] > |
| 10439 | rank_delay_best_count[rankx] |
| 10440 | [byte]) { |
| 10441 | rank_delay_best_count |
| 10442 | [rankx][byte] = |
| 10443 | rank_delay_count |
| 10444 | [rankx][byte]; |
| 10445 | rank_delay_best_start |
| 10446 | [rankx][byte] = |
| 10447 | rank_delay_start |
| 10448 | [rankx][byte]; |
| 10449 | debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n", |
| 10450 | node, lmc, rankx, |
| 10451 | byte, mode_str, |
| 10452 | byte_offset, |
| 10453 | rank_delay_best_start |
| 10454 | [rankx][byte], |
| 10455 | rank_delay_best_count |
| 10456 | [rankx][byte]); |
| 10457 | } |
| 10458 | } |
| 10459 | } |
| 10460 | } /* for (rankx = 0; rankx < 4; rankx++) */ |
| 10461 | |
| 10462 | tot_errors |= off_errors; |
| 10463 | } |
| 10464 | |
| 10465 | // set the bytelanes DLL offsets all back to 0 |
| 10466 | change_dll_offset_enable(priv, lmc, 0); |
| 10467 | load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane); |
| 10468 | change_dll_offset_enable(priv, lmc, 1); |
| 10469 | |
| 10470 | // now choose the best byte_offsets for this pattern |
| 10471 | // according to the best windows of the tested ranks |
| 10472 | // calculate offset by constructing an average window |
| 10473 | // from the rank windows |
| 10474 | for (byte = byte_lo; byte <= byte_hi; byte++) { |
| 10475 | pat_beg = -999; |
| 10476 | pat_end = 999; |
| 10477 | |
| 10478 | for (rankx = 0; rankx < 4; rankx++) { |
| 10479 | if (!(rank_mask & (1 << rankx))) |
| 10480 | continue; |
| 10481 | |
| 10482 | rank_beg = rank_delay_best_start[rankx][byte]; |
| 10483 | pat_beg = max(pat_beg, rank_beg); |
| 10484 | rank_end = rank_beg + |
| 10485 | rank_delay_best_count[rankx][byte] - |
| 10486 | BYTE_OFFSET_INCR; |
| 10487 | pat_end = min(pat_end, rank_end); |
| 10488 | |
| 10489 | debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n", |
| 10490 | node, lmc, rankx, byte, mode_str, |
| 10491 | rank_beg, rank_end); |
| 10492 | |
| 10493 | } /* for (rankx = 0; rankx < 4; rankx++) */ |
| 10494 | |
| 10495 | pat_best_offset[byte] = (pat_end + pat_beg) / 2; |
| 10496 | |
| 10497 | // sum the pattern averages |
| 10498 | new_best_offset[byte] += pat_best_offset[byte]; |
| 10499 | } |
| 10500 | |
| 10501 | // now print them on 1 line, descending order... |
| 10502 | debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :", |
| 10503 | node, lmc, mode_str, pattern); |
| 10504 | for (byte = byte_hi; byte >= byte_lo; --byte) |
| 10505 | debug(" %4d", pat_best_offset[byte]); |
| 10506 | debug("\n"); |
| 10507 | } |
| 10508 | // end of pattern loop |
| 10509 | |
| 10510 | debug("N%d.LMC%d: HW DLL %s Offset Average : ", node, lmc, mode_str); |
| 10511 | |
| 10512 | // print in decending byte index order |
| 10513 | for (byte = byte_hi; byte >= byte_lo; --byte) { |
| 10514 | // create the new average NINT |
| 10515 | new_best_offset[byte] = divide_nint(new_best_offset[byte], |
| 10516 | NUM_BYTE_PATTERNS); |
| 10517 | |
| 10518 | // print the best offsets from all patterns |
| 10519 | |
| 10520 | // print just the offset of all the bytes |
| 10521 | if (bytelane == 0x0A) |
| 10522 | debug("%4d ", new_best_offset[byte]); |
| 10523 | else // print the bytelanes also |
| 10524 | debug("(byte %d) %4d ", byte, new_best_offset[byte]); |
| 10525 | |
| 10526 | // done with testing, load up the best offsets we found... |
| 10527 | // disable offsets while we load... |
| 10528 | change_dll_offset_enable(priv, lmc, 0); |
| 10529 | load_dll_offset(priv, lmc, dll_offset_mode, |
| 10530 | new_best_offset[byte], byte); |
| 10531 | // re-enable the offsets now that we are done loading |
| 10532 | change_dll_offset_enable(priv, lmc, 1); |
| 10533 | } |
| 10534 | |
| 10535 | debug("\n"); |
| 10536 | } |
| 10537 | |
| 10538 | /* |
| 10539 | * Automatically adjust the DLL offset for the selected bytelane using |
| 10540 | * hardware-assist |
| 10541 | */ |
| 10542 | static int perform_HW_dll_offset_tuning(struct ddr_priv *priv, |
| 10543 | int dll_offset_mode, int bytelane) |
| 10544 | { |
| 10545 | int if_64b; |
| 10546 | int save_ecc_ena[4]; |
| 10547 | union cvmx_lmcx_config lmc_config; |
| 10548 | int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv); |
| 10549 | const char *s; |
| 10550 | int loops = 1, loop; |
| 10551 | int by; |
| 10552 | u64 dram_tune_rank_offset; |
| 10553 | int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS; |
| 10554 | int node = 0; |
| 10555 | |
| 10556 | // see if we want to do the tuning more than once per LMC... |
| 10557 | s = env_get("ddr_tune_ecc_loops"); |
| 10558 | if (s) |
| 10559 | loops = strtoul(s, NULL, 0); |
| 10560 | |
| 10561 | // allow override of the test repeats (bursts) |
| 10562 | s = env_get("ddr_tune_byte_bursts"); |
| 10563 | if (s) |
| 10564 | dram_tune_byte_bursts = strtoul(s, NULL, 10); |
| 10565 | |
| 10566 | // print current working values |
| 10567 | debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n", |
| 10568 | node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS); |
| 10569 | |
| 10570 | // FIXME? get flag from LMC0 only |
| 10571 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0)); |
| 10572 | if_64b = !lmc_config.s.mode32b; |
| 10573 | |
| 10574 | // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs |
| 10575 | dram_tune_rank_offset = |
| 10576 | 1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena + |
| 10577 | (num_lmcs / 2)); |
| 10578 | |
| 10579 | // do once for each active LMC |
| 10580 | |
| 10581 | for (lmc = 0; lmc < num_lmcs; lmc++) { |
| 10582 | debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n", |
| 10583 | node, lmc, bytelane); |
| 10584 | |
| 10585 | /* Enable ECC for the HW tests */ |
| 10586 | // NOTE: we do enable ECC, but the HW tests used will not |
| 10587 | // generate "visible" errors |
| 10588 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); |
| 10589 | save_ecc_ena[lmc] = lmc_config.s.ecc_ena; |
| 10590 | lmc_config.s.ecc_ena = 1; |
| 10591 | lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64); |
| 10592 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); |
| 10593 | |
| 10594 | // testing is done on a single LMC at a time |
| 10595 | // FIXME: for now, loop here to show what happens multiple times |
| 10596 | for (loop = 0; loop < loops; loop++) { |
| 10597 | /* Perform DLL offset tuning */ |
| 10598 | hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc, |
| 10599 | bytelane, |
| 10600 | if_64b, dram_tune_rank_offset, |
| 10601 | dram_tune_byte_bursts); |
| 10602 | } |
| 10603 | |
| 10604 | // perform cleanup on active LMC |
| 10605 | debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n", |
| 10606 | node, lmc, bytelane); |
| 10607 | |
| 10608 | /* Restore ECC for DRAM tests */ |
| 10609 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); |
| 10610 | lmc_config.s.ecc_ena = save_ecc_ena[lmc]; |
| 10611 | lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64); |
| 10612 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); |
| 10613 | |
| 10614 | // finally, see if there are any read offset overrides |
| 10615 | // after tuning |
| 10616 | for (by = 0; by < 9; by++) { |
| 10617 | s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by); |
| 10618 | if (s) { |
| 10619 | int dllro = strtoul(s, NULL, 10); |
| 10620 | |
| 10621 | change_dll_offset_enable(priv, lmc, 0); |
| 10622 | load_dll_offset(priv, lmc, 2, dllro, by); |
| 10623 | change_dll_offset_enable(priv, lmc, 1); |
| 10624 | } |
| 10625 | } |
| 10626 | |
| 10627 | } /* for (lmc = 0; lmc < num_lmcs; lmc++) */ |
| 10628 | |
| 10629 | // finish up... |
| 10630 | |
| 10631 | return 0; |
| 10632 | |
| 10633 | } /* perform_HW_dll_offset_tuning */ |
| 10634 | |
| 10635 | // this routine simply makes the calls to the tuning routine and returns |
| 10636 | // any errors |
| 10637 | static int cvmx_tune_node(struct ddr_priv *priv) |
| 10638 | { |
| 10639 | int errs, tot_errs; |
| 10640 | int do_dllwo = 0; // default to NO |
| 10641 | const char *str; |
| 10642 | int node = 0; |
| 10643 | |
| 10644 | // Automatically tune the data and ECC byte DLL read offsets |
| 10645 | debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node); |
| 10646 | errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */); |
| 10647 | debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n", |
| 10648 | node, errs); |
| 10649 | tot_errs = errs; |
| 10650 | |
| 10651 | // disabled by default for now, does not seem to be needed? |
| 10652 | // Automatically tune the data and ECC byte DLL write offsets |
| 10653 | // allow override of default setting |
| 10654 | str = env_get("ddr_tune_write_offsets"); |
| 10655 | if (str) |
| 10656 | do_dllwo = !!strtoul(str, NULL, 0); |
| 10657 | if (do_dllwo) { |
| 10658 | debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node); |
| 10659 | errs = |
| 10660 | perform_HW_dll_offset_tuning(priv, 1, |
| 10661 | 0x0A /* all bytelanes */); |
| 10662 | debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n", |
| 10663 | node, errs); |
| 10664 | tot_errs += errs; |
| 10665 | } |
| 10666 | |
| 10667 | return tot_errs; |
| 10668 | } |
| 10669 | |
| 10670 | // this routine makes the calls to the tuning routines when criteria are met |
| 10671 | // intended to be called for automated tuning, to apply filtering... |
| 10672 | |
| 10673 | #define IS_DDR4 1 |
| 10674 | #define IS_DDR3 0 |
| 10675 | #define IS_RDIMM 1 |
| 10676 | #define IS_UDIMM 0 |
| 10677 | #define IS_1SLOT 1 |
| 10678 | #define IS_2SLOT 0 |
| 10679 | |
| 10680 | // FIXME: DDR3 is not tuned |
| 10681 | static const u32 ddr_speed_filter[2][2][2] = { |
| 10682 | [IS_DDR4] = { |
| 10683 | [IS_RDIMM] = { |
| 10684 | [IS_1SLOT] = 940, |
| 10685 | [IS_2SLOT] = 800}, |
| 10686 | [IS_UDIMM] = { |
| 10687 | [IS_1SLOT] = 1050, |
| 10688 | [IS_2SLOT] = 940}, |
| 10689 | }, |
| 10690 | [IS_DDR3] = { |
| 10691 | [IS_RDIMM] = { |
| 10692 | [IS_1SLOT] = 0, // disabled |
| 10693 | [IS_2SLOT] = 0 // disabled |
| 10694 | }, |
| 10695 | [IS_UDIMM] = { |
| 10696 | [IS_1SLOT] = 0, // disabled |
| 10697 | [IS_2SLOT] = 0 // disabled |
| 10698 | } |
| 10699 | } |
| 10700 | }; |
| 10701 | |
| 10702 | void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed) |
| 10703 | { |
| 10704 | const char *s; |
| 10705 | union cvmx_lmcx_config lmc_config; |
| 10706 | union cvmx_lmcx_control lmc_control; |
| 10707 | union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl; |
| 10708 | int is_ddr4; |
| 10709 | int is_rdimm; |
| 10710 | int is_1slot; |
| 10711 | int do_tune = 0; |
| 10712 | u32 ddr_min_speed; |
| 10713 | int node = 0; |
| 10714 | |
| 10715 | // scale it down from Hz to MHz |
| 10716 | ddr_speed = divide_nint(ddr_speed, 1000000); |
| 10717 | |
| 10718 | // FIXME: allow an override here so that all configs can be tuned |
| 10719 | // or none |
| 10720 | // If the envvar is defined, always either force it or avoid it |
| 10721 | // accordingly |
| 10722 | s = env_get("ddr_tune_all_configs"); |
| 10723 | if (s) { |
| 10724 | do_tune = !!strtoul(s, NULL, 0); |
| 10725 | printf("N%d: DRAM auto-tuning %s.\n", node, |
| 10726 | (do_tune) ? "forced" : "disabled"); |
| 10727 | if (do_tune) |
| 10728 | cvmx_tune_node(priv); |
| 10729 | |
| 10730 | return; |
| 10731 | } |
| 10732 | |
| 10733 | // filter the tuning calls here... |
| 10734 | // determine if we should/can run automatically for this configuration |
| 10735 | // |
| 10736 | // FIXME: tune only when the configuration indicates it will help: |
| 10737 | // DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed |
| 10738 | // |
| 10739 | lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0)); // sample LMC0 |
| 10740 | lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0)); // sample LMC0 |
| 10741 | // sample LMC0 |
| 10742 | lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0)); |
| 10743 | |
| 10744 | is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0); |
| 10745 | is_rdimm = (lmc_control.s.rdimm_ena != 0); |
| 10746 | // HACK, should do better |
| 10747 | is_1slot = (lmc_config.s.init_status < 4); |
| 10748 | |
| 10749 | ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot]; |
| 10750 | do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed)); |
| 10751 | |
| 10752 | debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n", |
| 10753 | node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U', |
| 10754 | (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not"); |
| 10755 | |
| 10756 | // call the tuning routine, filtering is done... |
| 10757 | if (do_tune) |
| 10758 | cvmx_tune_node(priv); |
| 10759 | } |
| 10760 | |
| 10761 | /* |
| 10762 | * first pattern example: |
| 10763 | * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff; |
| 10764 | * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff; |
| 10765 | * GENERAL_PURPOSE0.DATA == 16'h0000; |
| 10766 | */ |
| 10767 | |
| 10768 | static const u64 dbi_pattern[3] = { |
| 10769 | 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL }; |
| 10770 | |
| 10771 | // Perform switchover to DBI |
| 10772 | static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc) |
| 10773 | { |
| 10774 | union cvmx_lmcx_modereg_params0 modereg_params0; |
| 10775 | union cvmx_lmcx_modereg_params3 modereg_params3; |
| 10776 | union cvmx_lmcx_phy_ctl phy_ctl; |
| 10777 | union cvmx_lmcx_config lmcx_config; |
| 10778 | union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl; |
| 10779 | int rank_mask, rankx, active_ranks; |
| 10780 | u64 phys_addr, rank_offset; |
| 10781 | int num_lmcs, errors; |
| 10782 | int dbi_settings[9], byte, unlocked, retries; |
| 10783 | int ecc_ena; |
| 10784 | int rank_max = 1; // FIXME: make this 4 to try all the ranks |
| 10785 | int node = 0; |
| 10786 | |
| 10787 | ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0)); |
| 10788 | |
| 10789 | lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc)); |
| 10790 | rank_mask = lmcx_config.s.init_status; |
| 10791 | ecc_ena = lmcx_config.s.ecc_ena; |
| 10792 | |
| 10793 | // FIXME: must filter out any non-supported configs |
| 10794 | // ie, no DDR3, no x4 devices |
| 10795 | if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) { |
| 10796 | debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n", |
| 10797 | node, lmc); |
| 10798 | return; |
| 10799 | } |
| 10800 | |
| 10801 | // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs |
| 10802 | num_lmcs = cvmx_dram_get_num_lmc(priv); |
| 10803 | rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - |
| 10804 | lmcx_config.s.rank_ena + (num_lmcs / 2)); |
| 10805 | |
| 10806 | debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n", |
| 10807 | node, lmc, rank_mask, (unsigned long long)rank_offset); |
| 10808 | |
| 10809 | /* |
| 10810 | * 1. conduct the current init sequence as usual all the way |
| 10811 | * after software write leveling. |
| 10812 | */ |
| 10813 | |
| 10814 | read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings); |
| 10815 | |
| 10816 | display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings, |
| 10817 | " INIT"); |
| 10818 | |
| 10819 | /* |
| 10820 | * 2. set DBI related CSRs as below and issue MR write. |
| 10821 | * MODEREG_PARAMS3.WR_DBI=1 |
| 10822 | * MODEREG_PARAMS3.RD_DBI=1 |
| 10823 | * PHY_CTL.DBI_MODE_ENA=1 |
| 10824 | */ |
| 10825 | modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc)); |
| 10826 | |
| 10827 | modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc)); |
| 10828 | modereg_params3.s.wr_dbi = 1; |
| 10829 | modereg_params3.s.rd_dbi = 1; |
| 10830 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64); |
| 10831 | |
| 10832 | phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc)); |
| 10833 | phy_ctl.s.dbi_mode_ena = 1; |
| 10834 | lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64); |
| 10835 | |
| 10836 | /* |
| 10837 | * there are two options for data to send. Lets start with (1) |
| 10838 | * and could move to (2) in the future: |
| 10839 | * |
| 10840 | * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where |
| 10841 | * this does not exist) set data directly in these reigsters. |
| 10842 | * this will yield a clk/2 pattern: |
| 10843 | * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff; |
| 10844 | * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff; |
| 10845 | * GENERAL_PURPOSE0.DATA == 16'h0000; |
| 10846 | * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1 |
| 10847 | * here data comes from the LFSR generating a PRBS pattern |
| 10848 | * CHAR_CTL.EN = 0 |
| 10849 | * CHAR_CTL.SEL = 0; // for PRBS |
| 10850 | * CHAR_CTL.DR = 1; |
| 10851 | * CHAR_CTL.PRBS = setup for whatever type of PRBS to send |
| 10852 | * CHAR_CTL.SKEW_ON = 1; |
| 10853 | */ |
| 10854 | lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]); |
| 10855 | lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]); |
| 10856 | lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]); |
| 10857 | |
| 10858 | /* |
| 10859 | * 3. adjust cas_latency (only necessary if RD_DBI is set). |
| 10860 | * here is my code for doing this: |
| 10861 | * |
| 10862 | * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin |
| 10863 | * case (csr_model.MODEREG_PARAMS0.CL.value) |
| 10864 | * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2; |
| 10865 | * // CL 9-13 -> 11-15 |
| 10866 | * 5: begin |
| 10867 | * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3 |
| 10868 | * if((csr_model.MODEREG_PARAMS0.CWL.value==1 || |
| 10869 | * csr_model.MODEREG_PARAMS0.CWL.value==3)) |
| 10870 | * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16 |
| 10871 | * else |
| 10872 | * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17 |
| 10873 | * end |
| 10874 | * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18 |
| 10875 | * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19 |
| 10876 | * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21 |
| 10877 | * default: |
| 10878 | * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1, |
| 10879 | * I am not sure what to do.", |
| 10880 | * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value)) |
| 10881 | * endcase |
| 10882 | * end |
| 10883 | */ |
| 10884 | |
| 10885 | if (modereg_params3.s.rd_dbi == 1) { |
| 10886 | int old_cl, new_cl, old_cwl; |
| 10887 | |
| 10888 | old_cl = modereg_params0.s.cl; |
| 10889 | old_cwl = modereg_params0.s.cwl; |
| 10890 | |
| 10891 | switch (old_cl) { |
| 10892 | case 0: |
| 10893 | case 1: |
| 10894 | case 2: |
| 10895 | case 3: |
| 10896 | case 4: |
| 10897 | new_cl = old_cl + 2; |
| 10898 | break; // 9-13->11-15 |
| 10899 | // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3 |
| 10900 | case 5: |
| 10901 | new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13; |
| 10902 | break; |
| 10903 | case 6: |
| 10904 | new_cl = 8; |
| 10905 | break; // 15->18 |
| 10906 | case 7: |
| 10907 | new_cl = 14; |
| 10908 | break; // 16->19 |
| 10909 | case 8: |
| 10910 | new_cl = 15; |
| 10911 | break; // 18->21 |
| 10912 | default: |
| 10913 | printf("ERROR: Bad CL value (%d) for DBI switchover.\n", |
| 10914 | old_cl); |
| 10915 | // FIXME: need to error exit here... |
| 10916 | old_cl = -1; |
| 10917 | new_cl = -1; |
| 10918 | break; |
| 10919 | } |
| 10920 | debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n", |
| 10921 | node, lmc, old_cl, old_cwl, new_cl); |
| 10922 | modereg_params0.s.cl = new_cl; |
| 10923 | lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc), |
| 10924 | modereg_params0.u64); |
| 10925 | } |
| 10926 | |
| 10927 | /* |
| 10928 | * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence |
| 10929 | * SEQ_CTL[SEQ_SEL] = MRW. |
| 10930 | */ |
| 10931 | // Use the default values, from the CSRs fields |
| 10932 | // also, do B-sides for RDIMMs... |
| 10933 | |
| 10934 | for (rankx = 0; rankx < 4; rankx++) { |
| 10935 | if (!(rank_mask & (1 << rankx))) |
| 10936 | continue; |
| 10937 | |
| 10938 | // for RDIMMs, B-side writes should get done automatically |
| 10939 | // when the A-side is written |
| 10940 | ddr4_mrw(priv, lmc, rankx, -1 /* use_default */, |
| 10941 | 0 /*MRreg */, 0 /*A-side */); /* MR0 */ |
| 10942 | ddr4_mrw(priv, lmc, rankx, -1 /* use_default */, |
| 10943 | 5 /*MRreg */, 0 /*A-side */); /* MR5 */ |
| 10944 | } |
| 10945 | |
| 10946 | /* |
| 10947 | * 5. conduct DBI bit deskew training via the General Purpose |
| 10948 | * R/W sequence (dbtrain). may need to run this over and over to get |
| 10949 | * a lock (I need up to 5 in simulation): |
| 10950 | * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15) |
| 10951 | * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's |
| 10952 | * DBTRAIN_CTL.READ_CMD_COUNT = all 1's |
| 10953 | * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L] |
| 10954 | * DBTRAIN_CTL.RW_TRAIN = 1 |
| 10955 | * DBTRAIN_CTL.READ_DQ_COUNT = dont care |
| 10956 | * DBTRAIN_CTL.WRITE_ENA = 1; |
| 10957 | * DBTRAIN_CTL.ACTIVATE = 1; |
| 10958 | * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a |
| 10959 | * valid address |
| 10960 | */ |
| 10961 | |
| 10962 | // NOW - do the training |
| 10963 | debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc); |
| 10964 | |
| 10965 | active_ranks = 0; |
| 10966 | for (rankx = 0; rankx < rank_max; rankx++) { |
| 10967 | if (!(rank_mask & (1 << rankx))) |
| 10968 | continue; |
| 10969 | |
| 10970 | phys_addr = rank_offset * active_ranks; |
| 10971 | // FIXME: now done by test_dram_byte_hw() |
| 10972 | |
| 10973 | active_ranks++; |
| 10974 | |
| 10975 | retries = 0; |
| 10976 | |
| 10977 | restart_training: |
| 10978 | |
| 10979 | // NOTE: return is a bitmask of the erroring bytelanes - |
| 10980 | // we only print it |
| 10981 | errors = |
| 10982 | test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL); |
| 10983 | |
| 10984 | debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n", |
| 10985 | node, lmc, rankx, (unsigned long long)phys_addr, errors); |
| 10986 | |
| 10987 | // NEXT - check for locking |
| 10988 | unlocked = 0; |
| 10989 | read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings); |
| 10990 | |
| 10991 | for (byte = 0; byte < (8 + ecc_ena); byte++) |
| 10992 | unlocked += (dbi_settings[byte] & 1) ^ 1; |
| 10993 | |
| 10994 | // FIXME: print out the DBI settings array after each rank? |
| 10995 | if (rank_max > 1) // only when doing more than 1 rank |
| 10996 | display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, |
| 10997 | dbi_settings, " RANK"); |
| 10998 | |
| 10999 | if (unlocked > 0) { |
| 11000 | debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n", |
| 11001 | node, lmc, unlocked); |
| 11002 | retries++; |
| 11003 | if (retries < 10) { |
| 11004 | goto restart_training; |
| 11005 | } else { |
| 11006 | debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n", |
| 11007 | node, lmc, retries); |
| 11008 | } |
| 11009 | } |
| 11010 | } /* for (rankx = 0; rankx < 4; rankx++) */ |
| 11011 | |
| 11012 | // print out the final DBI settings array |
| 11013 | display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings, |
| 11014 | "FINAL"); |
| 11015 | } |
| 11016 | |
| 11017 | void cvmx_dbi_switchover(struct ddr_priv *priv) |
| 11018 | { |
| 11019 | int lmc; |
| 11020 | int num_lmcs = cvmx_dram_get_num_lmc(priv); |
| 11021 | |
| 11022 | for (lmc = 0; lmc < num_lmcs; lmc++) |
| 11023 | cvmx_dbi_switchover_interface(priv, lmc); |
| 11024 | } |