1 /************************************************************************ 2 * 3 * Copyright (c) 2013-2015 Intel Corporation. 4 * 5 * This program and the accompanying materials 6 * are licensed and made available under the terms and conditions of the BSD License 7 * which accompanies this distribution. The full text of the license may be found at 8 * http://opensource.org/licenses/bsd-license.php 9 * 10 * THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 12 * 13 ***************************************************************************/ 14 15 #include "mrc.h" 16 #include "memory_options.h" 17 18 #include "meminit_utils.h" 19 #include "hte.h" 20 #include "io.h" 21 22 void select_hte( 23 MRCParams_t *mrc_params); 24 25 static uint8_t first_run = 0; 26 27 const uint8_t vref_codes[64] = 28 { // lowest to highest 29 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, // 00 - 15 30 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, // 16 - 31 31 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 32 - 47 32 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F // 48 - 63 33 }; 34 35 #ifdef EMU 36 // Track current post code for debugging purpose 37 uint32_t PostCode; 38 #endif 39 40 // set_rcvn: 41 // 42 // This function will program the RCVEN delays. 43 // (currently doesn't comprehend rank) 44 void set_rcvn( 45 uint8_t channel, 46 uint8_t rank, 47 uint8_t byte_lane, 48 uint32_t pi_count) 49 { 50 uint32_t reg; 51 uint32_t msk; 52 uint32_t tempD; 53 54 ENTERFN(); 55 DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count); 56 57 // RDPTR (1/2 MCLK, 64 PIs) 58 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF) 59 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF) 60 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 61 msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : (BIT11 | BIT10 | BIT9 | BIT8); 62 tempD = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : ((pi_count / HALF_CLK) << 8); 63 isbM32m(DDRPHY, reg, tempD, msk); 64 65 // Adjust PI_COUNT 66 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; 67 68 // PI (1/64 MCLK, 1 PIs) 69 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) 70 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) 71 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); 72 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 73 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); 74 tempD = pi_count << 24; 75 isbM32m(DDRPHY, reg, tempD, msk); 76 77 // DEADBAND 78 // BL0/1 -> B01DBCTL1[08/11] (+1 select) 79 // BL0/1 -> B01DBCTL1[02/05] (enable) 80 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 81 msk = 0x00; 82 tempD = 0x00; 83 // enable 84 msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2); 85 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) 86 { 87 tempD |= msk; 88 } 89 // select 90 msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8); 91 if (pi_count < EARLY_DB) 92 { 93 tempD |= msk; 94 } 95 isbM32m(DDRPHY, reg, tempD, msk); 96 97 // error check 98 if (pi_count > 0x3F) 99 { 100 training_message(channel, rank, byte_lane); 101 post_code(0xEE, 0xE0); 102 } 103 104 LEAVEFN(); 105 return; 106 } 107 108 // get_rcvn: 109 // 110 // This function will return the current RCVEN delay on the given channel, rank, byte_lane as an absolute PI count. 111 // (currently doesn't comprehend rank) 112 uint32_t get_rcvn( 113 uint8_t channel, 114 uint8_t rank, 115 uint8_t byte_lane) 116 { 117 uint32_t reg; 118 uint32_t tempD; 119 uint32_t pi_count; 120 121 ENTERFN(); 122 123 // RDPTR (1/2 MCLK, 64 PIs) 124 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF) 125 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF) 126 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 127 tempD = isbR32m(DDRPHY, reg); 128 tempD >>= (byte_lane & BIT0) ? (20) : (8); 129 tempD &= 0xF; 130 131 // Adjust PI_COUNT 132 pi_count = tempD * HALF_CLK; 133 134 // PI (1/64 MCLK, 1 PIs) 135 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) 136 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) 137 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); 138 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 139 tempD = isbR32m(DDRPHY, reg); 140 tempD >>= 24; 141 tempD &= 0x3F; 142 143 // Adjust PI_COUNT 144 pi_count += tempD; 145 146 LEAVEFN(); 147 return pi_count; 148 } 149 150 // set_rdqs: 151 // 152 // This function will program the RDQS delays based on an absolute amount of PIs. 153 // (currently doesn't comprehend rank) 154 void set_rdqs( 155 uint8_t channel, 156 uint8_t rank, 157 uint8_t byte_lane, 158 uint32_t pi_count) 159 { 160 uint32_t reg; 161 uint32_t msk; 162 uint32_t tempD; 163 164 ENTERFN(); 165 DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count); 166 167 // PI (1/128 MCLK) 168 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) 169 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) 170 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); 171 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 172 msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); 173 tempD = pi_count << 0; 174 isbM32m(DDRPHY, reg, tempD, msk); 175 176 // error check (shouldn't go above 0x3F) 177 if (pi_count > 0x47) 178 { 179 training_message(channel, rank, byte_lane); 180 post_code(0xEE, 0xE1); 181 } 182 183 LEAVEFN(); 184 return; 185 } 186 187 // get_rdqs: 188 // 189 // This function will return the current RDQS delay on the given channel, rank, byte_lane as an absolute PI count. 190 // (currently doesn't comprehend rank) 191 uint32_t get_rdqs( 192 uint8_t channel, 193 uint8_t rank, 194 uint8_t byte_lane) 195 { 196 uint32_t reg; 197 uint32_t tempD; 198 uint32_t pi_count; 199 200 ENTERFN(); 201 202 // PI (1/128 MCLK) 203 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) 204 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) 205 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); 206 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 207 tempD = isbR32m(DDRPHY, reg); 208 209 // Adjust PI_COUNT 210 pi_count = tempD & 0x7F; 211 212 LEAVEFN(); 213 return pi_count; 214 } 215 216 // set_wdqs: 217 // 218 // This function will program the WDQS delays based on an absolute amount of PIs. 219 // (currently doesn't comprehend rank) 220 void set_wdqs( 221 uint8_t channel, 222 uint8_t rank, 223 uint8_t byte_lane, 224 uint32_t pi_count) 225 { 226 uint32_t reg; 227 uint32_t msk; 228 uint32_t tempD; 229 230 ENTERFN(); 231 DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count); 232 233 // RDPTR (1/2 MCLK, 64 PIs) 234 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF) 235 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF) 236 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 237 msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : (BIT7 | BIT6 | BIT5 | BIT4); 238 tempD = pi_count / HALF_CLK; 239 tempD <<= (byte_lane & BIT0) ? (16) : (4); 240 isbM32m(DDRPHY, reg, tempD, msk); 241 242 // Adjust PI_COUNT 243 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; 244 245 // PI (1/64 MCLK, 1 PIs) 246 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) 247 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) 248 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); 249 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 250 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16); 251 tempD = pi_count << 16; 252 isbM32m(DDRPHY, reg, tempD, msk); 253 254 // DEADBAND 255 // BL0/1 -> B01DBCTL1[07/10] (+1 select) 256 // BL0/1 -> B01DBCTL1[01/04] (enable) 257 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 258 msk = 0x00; 259 tempD = 0x00; 260 // enable 261 msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1); 262 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) 263 { 264 tempD |= msk; 265 } 266 // select 267 msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7); 268 if (pi_count < EARLY_DB) 269 { 270 tempD |= msk; 271 } 272 isbM32m(DDRPHY, reg, tempD, msk); 273 274 // error check 275 if (pi_count > 0x3F) 276 { 277 training_message(channel, rank, byte_lane); 278 post_code(0xEE, 0xE2); 279 } 280 281 LEAVEFN(); 282 return; 283 } 284 285 // get_wdqs: 286 // 287 // This function will return the amount of WDQS delay on the given channel, rank, byte_lane as an absolute PI count. 288 // (currently doesn't comprehend rank) 289 uint32_t get_wdqs( 290 uint8_t channel, 291 uint8_t rank, 292 uint8_t byte_lane) 293 { 294 uint32_t reg; 295 uint32_t tempD; 296 uint32_t pi_count; 297 298 ENTERFN(); 299 300 // RDPTR (1/2 MCLK, 64 PIs) 301 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF) 302 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF) 303 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 304 tempD = isbR32m(DDRPHY, reg); 305 tempD >>= (byte_lane & BIT0) ? (16) : (4); 306 tempD &= 0xF; 307 308 // Adjust PI_COUNT 309 pi_count = (tempD * HALF_CLK); 310 311 // PI (1/64 MCLK, 1 PIs) 312 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) 313 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) 314 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); 315 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 316 tempD = isbR32m(DDRPHY, reg); 317 tempD >>= 16; 318 tempD &= 0x3F; 319 320 // Adjust PI_COUNT 321 pi_count += tempD; 322 323 LEAVEFN(); 324 return pi_count; 325 } 326 327 // set_wdq: 328 // 329 // This function will program the WDQ delays based on an absolute number of PIs. 330 // (currently doesn't comprehend rank) 331 void set_wdq( 332 uint8_t channel, 333 uint8_t rank, 334 uint8_t byte_lane, 335 uint32_t pi_count) 336 { 337 uint32_t reg; 338 uint32_t msk; 339 uint32_t tempD; 340 341 ENTERFN(); 342 DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count); 343 344 // RDPTR (1/2 MCLK, 64 PIs) 345 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF) 346 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF) 347 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 348 msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : (BIT3 | BIT2 | BIT1 | BIT0); 349 tempD = pi_count / HALF_CLK; 350 tempD <<= (byte_lane & BIT0) ? (12) : (0); 351 isbM32m(DDRPHY, reg, tempD, msk); 352 353 // Adjust PI_COUNT 354 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; 355 356 // PI (1/64 MCLK, 1 PIs) 357 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) 358 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) 359 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); 360 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 361 msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); 362 tempD = pi_count << 8; 363 isbM32m(DDRPHY, reg, tempD, msk); 364 365 // DEADBAND 366 // BL0/1 -> B01DBCTL1[06/09] (+1 select) 367 // BL0/1 -> B01DBCTL1[00/03] (enable) 368 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 369 msk = 0x00; 370 tempD = 0x00; 371 // enable 372 msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0); 373 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) 374 { 375 tempD |= msk; 376 } 377 // select 378 msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6); 379 if (pi_count < EARLY_DB) 380 { 381 tempD |= msk; 382 } 383 isbM32m(DDRPHY, reg, tempD, msk); 384 385 // error check 386 if (pi_count > 0x3F) 387 { 388 training_message(channel, rank, byte_lane); 389 post_code(0xEE, 0xE3); 390 } 391 392 LEAVEFN(); 393 return; 394 } 395 396 // get_wdq: 397 // 398 // This function will return the amount of WDQ delay on the given channel, rank, byte_lane as an absolute PI count. 399 // (currently doesn't comprehend rank) 400 uint32_t get_wdq( 401 uint8_t channel, 402 uint8_t rank, 403 uint8_t byte_lane) 404 { 405 uint32_t reg; 406 uint32_t tempD; 407 uint32_t pi_count; 408 409 ENTERFN(); 410 411 // RDPTR (1/2 MCLK, 64 PIs) 412 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF) 413 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF) 414 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET); 415 tempD = isbR32m(DDRPHY, reg); 416 tempD >>= (byte_lane & BIT0) ? (12) : (0); 417 tempD &= 0xF; 418 419 // Adjust PI_COUNT 420 pi_count = (tempD * HALF_CLK); 421 422 // PI (1/64 MCLK, 1 PIs) 423 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) 424 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) 425 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); 426 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)); 427 tempD = isbR32m(DDRPHY, reg); 428 tempD >>= 8; 429 tempD &= 0x3F; 430 431 // Adjust PI_COUNT 432 pi_count += tempD; 433 434 LEAVEFN(); 435 return pi_count; 436 } 437 438 // set_wcmd: 439 // 440 // This function will program the WCMD delays based on an absolute number of PIs. 441 void set_wcmd( 442 uint8_t channel, 443 uint32_t pi_count) 444 { 445 uint32_t reg; 446 uint32_t msk; 447 uint32_t tempD; 448 449 ENTERFN(); 450 // RDPTR (1/2 MCLK, 64 PIs) 451 // CMDPTRREG[11:08] (0x0-0xF) 452 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); 453 msk = (BIT11 | BIT10 | BIT9 | BIT8); 454 tempD = pi_count / HALF_CLK; 455 tempD <<= 8; 456 isbM32m(DDRPHY, reg, tempD, msk); 457 458 // Adjust PI_COUNT 459 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; 460 461 // PI (1/64 MCLK, 1 PIs) 462 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) 463 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) 464 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) 465 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) 466 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) 467 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) 468 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) 469 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) 470 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); 471 472 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24) | (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) 473 | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8) | (BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); 474 475 tempD = (pi_count << 24) | (pi_count << 16) | (pi_count << 8) | (pi_count << 0); 476 477 isbM32m(DDRPHY, reg, tempD, msk); 478 reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); // PO 479 isbM32m(DDRPHY, reg, tempD, msk); 480 481 // DEADBAND 482 // CMDCFGREG0[17] (+1 select) 483 // CMDCFGREG0[16] (enable) 484 reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET); 485 msk = 0x00; 486 tempD = 0x00; 487 // enable 488 msk |= BIT16; 489 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) 490 { 491 tempD |= msk; 492 } 493 // select 494 msk |= BIT17; 495 if (pi_count < EARLY_DB) 496 { 497 tempD |= msk; 498 } 499 isbM32m(DDRPHY, reg, tempD, msk); 500 501 // error check 502 if (pi_count > 0x3F) 503 { 504 post_code(0xEE, 0xE4); 505 } 506 507 LEAVEFN(); 508 return; 509 } 510 511 // get_wcmd: 512 // 513 // This function will return the amount of WCMD delay on the given channel as an absolute PI count. 514 uint32_t get_wcmd( 515 uint8_t channel) 516 { 517 uint32_t reg; 518 uint32_t tempD; 519 uint32_t pi_count; 520 521 ENTERFN(); 522 // RDPTR (1/2 MCLK, 64 PIs) 523 // CMDPTRREG[11:08] (0x0-0xF) 524 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); 525 tempD = isbR32m(DDRPHY, reg); 526 tempD >>= 8; 527 tempD &= 0xF; 528 529 // Adjust PI_COUNT 530 pi_count = tempD * HALF_CLK; 531 532 // PI (1/64 MCLK, 1 PIs) 533 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) 534 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) 535 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) 536 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) 537 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) 538 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) 539 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) 540 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) 541 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); 542 tempD = isbR32m(DDRPHY, reg); 543 tempD >>= 16; 544 tempD &= 0x3F; 545 546 // Adjust PI_COUNT 547 pi_count += tempD; 548 549 LEAVEFN(); 550 return pi_count; 551 } 552 553 // set_wclk: 554 // 555 // This function will program the WCLK delays based on an absolute number of PIs. 556 void set_wclk( 557 uint8_t channel, 558 uint8_t rank, 559 uint32_t pi_count) 560 { 561 uint32_t reg; 562 uint32_t msk; 563 uint32_t tempD; 564 565 ENTERFN(); 566 // RDPTR (1/2 MCLK, 64 PIs) 567 // CCPTRREG[15:12] -> CLK1 (0x0-0xF) 568 // CCPTRREG[11:08] -> CLK0 (0x0-0xF) 569 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); 570 msk = (BIT15 | BIT14 | BIT13 | BIT12) | (BIT11 | BIT10 | BIT9 | BIT8); 571 tempD = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8); 572 isbM32m(DDRPHY, reg, tempD, msk); 573 574 // Adjust PI_COUNT 575 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; 576 577 // PI (1/64 MCLK, 1 PIs) 578 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) 579 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) 580 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); 581 reg += (channel * DDRIOCCC_CH_OFFSET); 582 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); 583 tempD = (pi_count << 16) | (pi_count << 8); 584 isbM32m(DDRPHY, reg, tempD, msk); 585 reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1); 586 reg += (channel * DDRIOCCC_CH_OFFSET); 587 isbM32m(DDRPHY, reg, tempD, msk); 588 reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2); 589 reg += (channel * DDRIOCCC_CH_OFFSET); 590 isbM32m(DDRPHY, reg, tempD, msk); 591 reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3); 592 reg += (channel * DDRIOCCC_CH_OFFSET); 593 isbM32m(DDRPHY, reg, tempD, msk); 594 595 // DEADBAND 596 // CCCFGREG1[11:08] (+1 select) 597 // CCCFGREG1[03:00] (enable) 598 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); 599 msk = 0x00; 600 tempD = 0x00; 601 // enable 602 msk |= (BIT3 | BIT2 | BIT1 | BIT0); // only ??? matters 603 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) 604 { 605 tempD |= msk; 606 } 607 // select 608 msk |= (BIT11 | BIT10 | BIT9 | BIT8); // only ??? matters 609 if (pi_count < EARLY_DB) 610 { 611 tempD |= msk; 612 } 613 isbM32m(DDRPHY, reg, tempD, msk); 614 615 // error check 616 if (pi_count > 0x3F) 617 { 618 post_code(0xEE, 0xE5); 619 } 620 621 LEAVEFN(); 622 return; 623 } 624 625 // get_wclk: 626 // 627 // This function will return the amout of WCLK delay on the given channel, rank as an absolute PI count. 628 uint32_t get_wclk( 629 uint8_t channel, 630 uint8_t rank) 631 { 632 uint32_t reg; 633 uint32_t tempD; 634 uint32_t pi_count; 635 636 ENTERFN(); 637 // RDPTR (1/2 MCLK, 64 PIs) 638 // CCPTRREG[15:12] -> CLK1 (0x0-0xF) 639 // CCPTRREG[11:08] -> CLK0 (0x0-0xF) 640 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); 641 tempD = isbR32m(DDRPHY, reg); 642 tempD >>= (rank) ? (12) : (8); 643 tempD &= 0xF; 644 645 // Adjust PI_COUNT 646 pi_count = tempD * HALF_CLK; 647 648 // PI (1/64 MCLK, 1 PIs) 649 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) 650 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) 651 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); 652 reg += (channel * DDRIOCCC_CH_OFFSET); 653 tempD = isbR32m(DDRPHY, reg); 654 tempD >>= (rank) ? (16) : (8); 655 tempD &= 0x3F; 656 657 pi_count += tempD; 658 659 LEAVEFN(); 660 return pi_count; 661 } 662 663 // set_wctl: 664 // 665 // This function will program the WCTL delays based on an absolute number of PIs. 666 // (currently doesn't comprehend rank) 667 void set_wctl( 668 uint8_t channel, 669 uint8_t rank, 670 uint32_t pi_count) 671 { 672 uint32_t reg; 673 uint32_t msk; 674 uint32_t tempD; 675 676 ENTERFN(); 677 678 // RDPTR (1/2 MCLK, 64 PIs) 679 // CCPTRREG[31:28] (0x0-0xF) 680 // CCPTRREG[27:24] (0x0-0xF) 681 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); 682 msk = (BIT31 | BIT30 | BIT29 | BIT28) | (BIT27 | BIT26 | BIT25 | BIT24); 683 tempD = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24); 684 isbM32m(DDRPHY, reg, tempD, msk); 685 686 // Adjust PI_COUNT 687 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; 688 689 // PI (1/64 MCLK, 1 PIs) 690 // ECCB1DLLPICODER?[29:24] (0x00-0x3F) 691 // ECCB1DLLPICODER?[29:24] (0x00-0x3F) 692 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); 693 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); 694 tempD = (pi_count << 24); 695 isbM32m(DDRPHY, reg, tempD, msk); 696 reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); 697 isbM32m(DDRPHY, reg, tempD, msk); 698 reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET); 699 isbM32m(DDRPHY, reg, tempD, msk); 700 reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET); 701 isbM32m(DDRPHY, reg, tempD, msk); 702 703 // DEADBAND 704 // CCCFGREG1[13:12] (+1 select) 705 // CCCFGREG1[05:04] (enable) 706 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); 707 msk = 0x00; 708 tempD = 0x00; 709 // enable 710 msk |= (BIT5 | BIT4); // only ??? matters 711 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) 712 { 713 tempD |= msk; 714 } 715 // select 716 msk |= (BIT13 | BIT12); // only ??? matters 717 if (pi_count < EARLY_DB) 718 { 719 tempD |= msk; 720 } 721 isbM32m(DDRPHY, reg, tempD, msk); 722 723 // error check 724 if (pi_count > 0x3F) 725 { 726 post_code(0xEE, 0xE6); 727 } 728 729 LEAVEFN(); 730 return; 731 } 732 733 // get_wctl: 734 // 735 // This function will return the amount of WCTL delay on the given channel, rank as an absolute PI count. 736 // (currently doesn't comprehend rank) 737 uint32_t get_wctl( 738 uint8_t channel, 739 uint8_t rank) 740 { 741 uint32_t reg; 742 uint32_t tempD; 743 uint32_t pi_count; 744 745 ENTERFN(); 746 747 // RDPTR (1/2 MCLK, 64 PIs) 748 // CCPTRREG[31:28] (0x0-0xF) 749 // CCPTRREG[27:24] (0x0-0xF) 750 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); 751 tempD = isbR32m(DDRPHY, reg); 752 tempD >>= 24; 753 tempD &= 0xF; 754 755 // Adjust PI_COUNT 756 pi_count = tempD * HALF_CLK; 757 758 // PI (1/64 MCLK, 1 PIs) 759 // ECCB1DLLPICODER?[29:24] (0x00-0x3F) 760 // ECCB1DLLPICODER?[29:24] (0x00-0x3F) 761 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); 762 tempD = isbR32m(DDRPHY, reg); 763 tempD >>= 24; 764 tempD &= 0x3F; 765 766 // Adjust PI_COUNT 767 pi_count += tempD; 768 769 LEAVEFN(); 770 return pi_count; 771 } 772 773 // set_vref: 774 // 775 // This function will program the internal Vref setting in a given byte lane in a given channel. 776 void set_vref( 777 uint8_t channel, 778 uint8_t byte_lane, 779 uint32_t setting) 780 { 781 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); 782 783 ENTERFN(); 784 DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", channel, byte_lane, setting); 785 786 isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), 787 (vref_codes[setting] << 2), (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2)); 788 //isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), (setting<<2), (BIT7|BIT6|BIT5|BIT4|BIT3|BIT2)); 789 // need to wait ~300ns for Vref to settle (check that this is necessary) 790 delay_n(300); 791 // ??? may need to clear pointers ??? 792 LEAVEFN(); 793 return; 794 } 795 796 // get_vref: 797 // 798 // This function will return the internal Vref setting for the given channel, byte_lane; 799 uint32_t get_vref( 800 uint8_t channel, 801 uint8_t byte_lane) 802 { 803 uint8_t j; 804 uint32_t ret_val = sizeof(vref_codes) / 2; 805 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); 806 807 uint32_t tempD; 808 809 ENTERFN(); 810 tempD = isbR32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET))); 811 tempD >>= 2; 812 tempD &= 0x3F; 813 for (j = 0; j < sizeof(vref_codes); j++) 814 { 815 if (vref_codes[j] == tempD) 816 { 817 ret_val = j; 818 break; 819 } 820 } 821 LEAVEFN(); 822 return ret_val; 823 } 824 825 // clear_pointers: 826 // 827 // This function will be used to clear the pointers in a given byte lane in a given channel. 828 void clear_pointers( 829 void) 830 { 831 uint8_t channel_i; 832 uint8_t bl_i; 833 834 ENTERFN(); 835 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++) 836 { 837 for (bl_i = 0; bl_i < NUM_BYTE_LANES; bl_i++) 838 { 839 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), ~(BIT8), 840 (BIT8)); 841 //delay_m(1); // DEBUG 842 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), (BIT8), 843 (BIT8)); 844 } 845 } 846 LEAVEFN(); 847 return; 848 } 849 850 // void enable_cache: 851 void enable_cache( 852 void) 853 { 854 // Cache control not used in Quark MRC 855 return; 856 } 857 858 // void disable_cache: 859 void disable_cache( 860 void) 861 { 862 // Cache control not used in Quark MRC 863 return; 864 } 865 866 // Send DRAM command, data should be formated 867 // using DCMD_Xxxx macro or emrsXCommand structure. 868 static void dram_init_command( 869 uint32_t data) 870 { 871 Wr32(DCMD, 0, data); 872 } 873 874 // find_rising_edge: 875 // 876 // This function will find the rising edge transition on RCVN or WDQS. 877 void find_rising_edge( 878 MRCParams_t *mrc_params, 879 uint32_t delay[], 880 uint8_t channel, 881 uint8_t rank, 882 bool rcvn) 883 { 884 885 #define SAMPLE_CNT 3 // number of sample points 886 #define SAMPLE_DLY 26 // number of PIs to increment per sample 887 #define FORWARD true // indicates to increase delays when looking for edge 888 #define BACKWARD false // indicates to decrease delays when looking for edge 889 890 bool all_edges_found; // determines stop condition 891 bool direction[NUM_BYTE_LANES]; // direction indicator 892 uint8_t sample_i; // sample counter 893 uint8_t bl_i; // byte lane counter 894 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor 895 uint32_t sample_result[SAMPLE_CNT]; // results of "sample_dqs()" 896 uint32_t tempD; // temporary DWORD 897 uint32_t transition_pattern; 898 899 ENTERFN(); 900 901 // select hte and request initial configuration 902 select_hte(mrc_params); 903 first_run = 1; 904 905 // Take 3 sample points (T1,T2,T3) to obtain a transition pattern. 906 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++) 907 { 908 // program the desired delays for sample 909 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++) 910 { 911 // increase sample delay by 26 PI (0.2 CLK) 912 if (rcvn) 913 { 914 set_rcvn(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY)); 915 } 916 else 917 { 918 set_wdqs(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY)); 919 } 920 } // bl_i loop 921 // take samples (Tsample_i) 922 sample_result[sample_i] = sample_dqs(mrc_params, channel, rank, rcvn); 923 924 DPF(D_TRN, "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n", 925 (rcvn ? "RCVN" : "WDQS"), channel, rank, 926 sample_i, sample_i * SAMPLE_DLY, sample_result[sample_i]); 927 928 } // sample_i loop 929 930 // This pattern will help determine where we landed and ultimately how to place RCVEN/WDQS. 931 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++) 932 { 933 // build "transition_pattern" (MSB is 1st sample) 934 transition_pattern = 0x00; 935 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++) 936 { 937 transition_pattern |= ((sample_result[sample_i] & (1 << bl_i)) >> bl_i) << (SAMPLE_CNT - 1 - sample_i); 938 } // sample_i loop 939 940 DPF(D_TRN, "=== transition pattern %d\n", transition_pattern); 941 942 // set up to look for rising edge based on "transition_pattern" 943 switch (transition_pattern) 944 { 945 case 0x00: // sampled 0->0->0 946 // move forward from T3 looking for 0->1 947 delay[bl_i] += 2 * SAMPLE_DLY; 948 direction[bl_i] = FORWARD; 949 break; 950 case 0x01: // sampled 0->0->1 951 case 0x05: // sampled 1->0->1 (bad duty cycle) *HSD#237503* 952 // move forward from T2 looking for 0->1 953 delay[bl_i] += 1 * SAMPLE_DLY; 954 direction[bl_i] = FORWARD; 955 break; 956 // HSD#237503 957 // case 0x02: // sampled 0->1->0 (bad duty cycle) 958 // training_message(channel, rank, bl_i); 959 // post_code(0xEE, 0xE8); 960 // break; 961 case 0x02: // sampled 0->1->0 (bad duty cycle) *HSD#237503* 962 case 0x03: // sampled 0->1->1 963 // move forward from T1 looking for 0->1 964 delay[bl_i] += 0 * SAMPLE_DLY; 965 direction[bl_i] = FORWARD; 966 break; 967 case 0x04: // sampled 1->0->0 (assumes BL8, HSD#234975) 968 // move forward from T3 looking for 0->1 969 delay[bl_i] += 2 * SAMPLE_DLY; 970 direction[bl_i] = FORWARD; 971 break; 972 // HSD#237503 973 // case 0x05: // sampled 1->0->1 (bad duty cycle) 974 // training_message(channel, rank, bl_i); 975 // post_code(0xEE, 0xE9); 976 // break; 977 case 0x06: // sampled 1->1->0 978 case 0x07: // sampled 1->1->1 979 // move backward from T1 looking for 1->0 980 delay[bl_i] += 0 * SAMPLE_DLY; 981 direction[bl_i] = BACKWARD; 982 break; 983 default: 984 post_code(0xEE, 0xEE); 985 break; 986 } // transition_pattern switch 987 // program delays 988 if (rcvn) 989 { 990 set_rcvn(channel, rank, bl_i, delay[bl_i]); 991 } 992 else 993 { 994 set_wdqs(channel, rank, bl_i, delay[bl_i]); 995 } 996 } // bl_i loop 997 998 // Based on the observed transition pattern on the byte lane, 999 // begin looking for a rising edge with single PI granularity. 1000 do 1001 { 1002 all_edges_found = true; // assume all byte lanes passed 1003 tempD = sample_dqs(mrc_params, channel, rank, rcvn); // take a sample 1004 // check all each byte lane for proper edge 1005 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++) 1006 { 1007 if (tempD & (1 << bl_i)) 1008 { 1009 // sampled "1" 1010 if (direction[bl_i] == BACKWARD) 1011 { 1012 // keep looking for edge on this byte lane 1013 all_edges_found = false; 1014 delay[bl_i] -= 1; 1015 if (rcvn) 1016 { 1017 set_rcvn(channel, rank, bl_i, delay[bl_i]); 1018 } 1019 else 1020 { 1021 set_wdqs(channel, rank, bl_i, delay[bl_i]); 1022 } 1023 } 1024 } 1025 else 1026 { 1027 // sampled "0" 1028 if (direction[bl_i] == FORWARD) 1029 { 1030 // keep looking for edge on this byte lane 1031 all_edges_found = false; 1032 delay[bl_i] += 1; 1033 if (rcvn) 1034 { 1035 set_rcvn(channel, rank, bl_i, delay[bl_i]); 1036 } 1037 else 1038 { 1039 set_wdqs(channel, rank, bl_i, delay[bl_i]); 1040 } 1041 } 1042 } 1043 } // bl_i loop 1044 } while (!all_edges_found); 1045 1046 // restore DDR idle state 1047 dram_init_command(DCMD_PREA(rank)); 1048 1049 DPF(D_TRN, "Delay %03X %03X %03X %03X\n", 1050 delay[0], delay[1], delay[2], delay[3]); 1051 1052 LEAVEFN(); 1053 return; 1054 } 1055 1056 // sample_dqs: 1057 // 1058 // This function will sample the DQTRAINSTS registers in the given channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'. 1059 // It will return an encoded DWORD in which each bit corresponds to the sampled value on the byte lane. 1060 uint32_t sample_dqs( 1061 MRCParams_t *mrc_params, 1062 uint8_t channel, 1063 uint8_t rank, 1064 bool rcvn) 1065 { 1066 uint8_t j; // just a counter 1067 uint8_t bl_i; // which BL in the module (always 2 per module) 1068 uint8_t bl_grp; // which BL module 1069 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor 1070 uint32_t msk[2]; // BLx in module 1071 uint32_t sampled_val[SAMPLE_SIZE]; // DQTRAINSTS register contents for each sample 1072 uint32_t num_0s; // tracks the number of '0' samples 1073 uint32_t num_1s; // tracks the number of '1' samples 1074 uint32_t ret_val = 0x00; // assume all '0' samples 1075 uint32_t address = get_addr(mrc_params, channel, rank); 1076 1077 // initialise "msk[]" 1078 msk[0] = (rcvn) ? (BIT1) : (BIT9); // BL0 1079 msk[1] = (rcvn) ? (BIT0) : (BIT8); // BL1 1080 1081 1082 // cycle through each byte lane group 1083 for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) 1084 { 1085 // take SAMPLE_SIZE samples 1086 for (j = 0; j < SAMPLE_SIZE; j++) 1087 { 1088 HteMemOp(address, first_run, rcvn?0:1); 1089 first_run = 0; 1090 1091 // record the contents of the proper DQTRAINSTS register 1092 sampled_val[j] = isbR32m(DDRPHY, (DQTRAINSTS + (bl_grp * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET))); 1093 } 1094 // look for a majority value ( (SAMPLE_SIZE/2)+1 ) on the byte lane 1095 // and set that value in the corresponding "ret_val" bit 1096 for (bl_i = 0; bl_i < 2; bl_i++) 1097 { 1098 num_0s = 0x00; // reset '0' tracker for byte lane 1099 num_1s = 0x00; // reset '1' tracker for byte lane 1100 for (j = 0; j < SAMPLE_SIZE; j++) 1101 { 1102 if (sampled_val[j] & msk[bl_i]) 1103 { 1104 num_1s++; 1105 } 1106 else 1107 { 1108 num_0s++; 1109 } 1110 } 1111 if (num_1s > num_0s) 1112 { 1113 ret_val |= (1 << (bl_i + (bl_grp * 2))); 1114 } 1115 } 1116 } 1117 1118 // "ret_val.0" contains the status of BL0 1119 // "ret_val.1" contains the status of BL1 1120 // "ret_val.2" contains the status of BL2 1121 // etc. 1122 return ret_val; 1123 } 1124 1125 // get_addr: 1126 // 1127 // This function will return a 32 bit address in the desired channel and rank. 1128 uint32_t get_addr( 1129 MRCParams_t *mrc_params, 1130 uint8_t channel, 1131 uint8_t rank) 1132 { 1133 uint32_t offset = 0x02000000; // 32MB 1134 1135 // Begin product specific code 1136 if (channel > 0) 1137 { 1138 DPF(D_ERROR, "ILLEGAL CHANNEL\n"); 1139 DEAD_LOOP(); 1140 } 1141 1142 if (rank > 1) 1143 { 1144 DPF(D_ERROR, "ILLEGAL RANK\n"); 1145 DEAD_LOOP(); 1146 } 1147 1148 // use 256MB lowest density as per DRP == 0x0003 1149 offset += rank * (256 * 1024 * 1024); 1150 1151 return offset; 1152 } 1153 1154 // byte_lane_mask: 1155 // 1156 // This function will return a 32 bit mask that will be used to check for byte lane failures. 1157 uint32_t byte_lane_mask( 1158 MRCParams_t *mrc_params) 1159 { 1160 uint32_t j; 1161 uint32_t ret_val = 0x00; 1162 1163 // set "ret_val" based on NUM_BYTE_LANES such that you will check only BL0 in "result" 1164 // (each bit in "result" represents a byte lane) 1165 for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES) 1166 { 1167 ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES)); 1168 } 1169 1170 // HSD#235037 1171 // need to adjust the mask for 16-bit mode 1172 if (mrc_params->channel_width == x16) 1173 { 1174 ret_val |= (ret_val << 2); 1175 } 1176 1177 return ret_val; 1178 } 1179 1180 1181 // read_tsc: 1182 // 1183 // This function will do some assembly to return TSC register contents as a uint64_t. 1184 uint64_t read_tsc( 1185 void) 1186 { 1187 volatile uint64_t tsc; // EDX:EAX 1188 1189 #if defined (SIM) || defined (GCC) 1190 volatile uint32_t tscH; // EDX 1191 volatile uint32_t tscL;// EAX 1192 1193 asm("rdtsc":"=a"(tscL),"=d"(tscH)); 1194 tsc = tscH; 1195 tsc = (tsc<<32)|tscL; 1196 #else 1197 tsc = __rdtsc(); 1198 #endif 1199 1200 return tsc; 1201 } 1202 1203 // get_tsc_freq: 1204 // 1205 // This function returns the TSC frequency in MHz 1206 uint32_t get_tsc_freq( 1207 void) 1208 { 1209 static uint32_t freq[] = 1210 { 533, 400, 200, 100 }; 1211 uint32_t fuse; 1212 #if 0 1213 fuse = (isbR32m(FUSE, 0) >> 12) & (BIT1|BIT0); 1214 #else 1215 // todo!!! Fixed 533MHz for emulation or debugging 1216 fuse = 0; 1217 #endif 1218 return freq[fuse]; 1219 } 1220 1221 #ifndef SIM 1222 // delay_n: 1223 // 1224 // This is a simple delay function. 1225 // It takes "nanoseconds" as a parameter. 1226 void delay_n( 1227 uint32_t nanoseconds) 1228 { 1229 // 1000 MHz clock has 1ns period --> no conversion required 1230 uint64_t final_tsc = read_tsc(); 1231 final_tsc += ((get_tsc_freq() * (nanoseconds)) / 1000); 1232 1233 while (read_tsc() < final_tsc) 1234 ; 1235 return; 1236 } 1237 #endif 1238 1239 // delay_u: 1240 // 1241 // This is a simple delay function. 1242 // It takes "microseconds as a parameter. 1243 void delay_u( 1244 uint32_t microseconds) 1245 { 1246 // 64 bit math is not an option, just use loops 1247 while (microseconds--) 1248 { 1249 delay_n(1000); 1250 } 1251 return; 1252 } 1253 1254 // delay_m: 1255 // 1256 // This is a simple delay function. 1257 // It takes "milliseconds" as a parameter. 1258 void delay_m( 1259 uint32_t milliseconds) 1260 { 1261 // 64 bit math is not an option, just use loops 1262 while (milliseconds--) 1263 { 1264 delay_u(1000); 1265 } 1266 return; 1267 } 1268 1269 // delay_s: 1270 // 1271 // This is a simple delay function. 1272 // It takes "seconds" as a parameter. 1273 void delay_s( 1274 uint32_t seconds) 1275 { 1276 // 64 bit math is not an option, just use loops 1277 while (seconds--) 1278 { 1279 delay_m(1000); 1280 } 1281 return; 1282 } 1283 1284 // post_code: 1285 // 1286 // This function will output the POST CODE to the four 7-Segment LED displays. 1287 void post_code( 1288 uint8_t major, 1289 uint8_t minor) 1290 { 1291 #ifdef EMU 1292 // Update global variable for execution tracking in debug env 1293 PostCode = ((major << 8) | minor); 1294 #endif 1295 1296 // send message to UART 1297 DPF(D_INFO, "POST: 0x%01X%02X\n", major, minor); 1298 1299 // error check: 1300 if (major == 0xEE) 1301 { 1302 // todo!!! Consider updating error status and exit MRC 1303 #ifdef SIM 1304 // enable Ctrl-C handling 1305 for(;;) delay_n(100); 1306 #else 1307 DEAD_LOOP(); 1308 #endif 1309 } 1310 } 1311 1312 void training_message( 1313 uint8_t channel, 1314 uint8_t rank, 1315 uint8_t byte_lane) 1316 { 1317 // send message to UART 1318 DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane); 1319 return; 1320 } 1321 1322 void print_timings( 1323 MRCParams_t *mrc_params) 1324 { 1325 uint8_t algo_i; 1326 uint8_t channel_i; 1327 uint8_t rank_i; 1328 uint8_t bl_i; 1329 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; 1330 1331 DPF(D_INFO, "\n---------------------------"); 1332 DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3"); 1333 DPF(D_INFO, "\n==========================="); 1334 for (algo_i = 0; algo_i < eMAX_ALGOS; algo_i++) 1335 { 1336 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++) 1337 { 1338 if (mrc_params->channel_enables & (1 << channel_i)) 1339 { 1340 for (rank_i = 0; rank_i < NUM_RANKS; rank_i++) 1341 { 1342 if (mrc_params->rank_enables & (1 << rank_i)) 1343 { 1344 switch (algo_i) 1345 { 1346 case eRCVN: 1347 DPF(D_INFO, "\nRCVN[%02d:%02d]", channel_i, rank_i); 1348 break; 1349 case eWDQS: 1350 DPF(D_INFO, "\nWDQS[%02d:%02d]", channel_i, rank_i); 1351 break; 1352 case eWDQx: 1353 DPF(D_INFO, "\nWDQx[%02d:%02d]", channel_i, rank_i); 1354 break; 1355 case eRDQS: 1356 DPF(D_INFO, "\nRDQS[%02d:%02d]", channel_i, rank_i); 1357 break; 1358 case eVREF: 1359 DPF(D_INFO, "\nVREF[%02d:%02d]", channel_i, rank_i); 1360 break; 1361 case eWCMD: 1362 DPF(D_INFO, "\nWCMD[%02d:%02d]", channel_i, rank_i); 1363 break; 1364 case eWCTL: 1365 DPF(D_INFO, "\nWCTL[%02d:%02d]", channel_i, rank_i); 1366 break; 1367 case eWCLK: 1368 DPF(D_INFO, "\nWCLK[%02d:%02d]", channel_i, rank_i); 1369 break; 1370 default: 1371 break; 1372 } // algo_i switch 1373 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++) 1374 { 1375 switch (algo_i) 1376 { 1377 case eRCVN: 1378 DPF(D_INFO, " %03d", get_rcvn(channel_i, rank_i, bl_i)); 1379 break; 1380 case eWDQS: 1381 DPF(D_INFO, " %03d", get_wdqs(channel_i, rank_i, bl_i)); 1382 break; 1383 case eWDQx: 1384 DPF(D_INFO, " %03d", get_wdq(channel_i, rank_i, bl_i)); 1385 break; 1386 case eRDQS: 1387 DPF(D_INFO, " %03d", get_rdqs(channel_i, rank_i, bl_i)); 1388 break; 1389 case eVREF: 1390 DPF(D_INFO, " %03d", get_vref(channel_i, bl_i)); 1391 break; 1392 case eWCMD: 1393 DPF(D_INFO, " %03d", get_wcmd(channel_i)); 1394 break; 1395 case eWCTL: 1396 DPF(D_INFO, " %03d", get_wctl(channel_i, rank_i)); 1397 break; 1398 case eWCLK: 1399 DPF(D_INFO, " %03d", get_wclk(channel_i, rank_i)); 1400 break; 1401 default: 1402 break; 1403 } // algo_i switch 1404 } // bl_i loop 1405 } // if rank_i enabled 1406 } // rank_i loop 1407 } // if channel_i enabled 1408 } // channel_i loop 1409 } // algo_i loop 1410 DPF(D_INFO, "\n---------------------------"); 1411 DPF(D_INFO, "\n"); 1412 return; 1413 } 1414 1415 // 32 bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1 1416 // The function takes pointer to previous 32 bit value and modifies it to next value. 1417 void lfsr32( 1418 uint32_t *lfsr_ptr) 1419 { 1420 uint32_t bit; 1421 uint32_t lfsr; 1422 uint32_t i; 1423 1424 lfsr = *lfsr_ptr; 1425 1426 for (i = 0; i < 32; i++) 1427 { 1428 bit = 1 ^ (lfsr & BIT0); 1429 bit = bit ^ ((lfsr & BIT1) >> 1); 1430 bit = bit ^ ((lfsr & BIT2) >> 2); 1431 bit = bit ^ ((lfsr & BIT22) >> 22); 1432 1433 lfsr = ((lfsr >> 1) | (bit << 31)); 1434 } 1435 1436 *lfsr_ptr = lfsr; 1437 return; 1438 } 1439 1440 // The purpose of this function is to ensure the SEC comes out of reset 1441 // and IA initiates the SEC enabling Memory Scrambling. 1442 void enable_scrambling( 1443 MRCParams_t *mrc_params) 1444 { 1445 uint32_t lfsr = 0; 1446 uint8_t i; 1447 1448 if (mrc_params->scrambling_enables == 0) 1449 return; 1450 1451 ENTERFN(); 1452 1453 // 32 bit seed is always stored in BIOS NVM. 1454 lfsr = mrc_params->timings.scrambler_seed; 1455 1456 if (mrc_params->boot_mode == bmCold) 1457 { 1458 // factory value is 0 and in first boot, a clock based seed is loaded. 1459 if (lfsr == 0) 1460 { 1461 lfsr = read_tsc() & 0x0FFFFFFF; // get seed from system clock and make sure it is not all 1's 1462 } 1463 // need to replace scrambler 1464 // get next 32bit LFSR 16 times which is the last part of the previous scrambler vector. 1465 else 1466 { 1467 for (i = 0; i < 16; i++) 1468 { 1469 lfsr32(&lfsr); 1470 } 1471 } 1472 mrc_params->timings.scrambler_seed = lfsr; // save new seed. 1473 } // if (cold_boot) 1474 1475 // In warm boot or S3 exit, we have the previous seed. 1476 // In cold boot, we have the last 32bit LFSR which is the new seed. 1477 lfsr32(&lfsr); // shift to next value 1478 isbW32m(MCU, SCRMSEED, (lfsr & 0x0003FFFF)); 1479 for (i = 0; i < 2; i++) 1480 { 1481 isbW32m(MCU, SCRMLO + i, (lfsr & 0xAAAAAAAA)); 1482 } 1483 1484 LEAVEFN(); 1485 return; 1486 } 1487 1488 // This function will store relevant timing data 1489 // This data will be used on subsequent boots to speed up boot times 1490 // and is required for Suspend To RAM capabilities. 1491 void store_timings( 1492 MRCParams_t *mrc_params) 1493 { 1494 uint8_t ch, rk, bl; 1495 MrcTimings_t *mt = &mrc_params->timings; 1496 1497 for (ch = 0; ch < NUM_CHANNELS; ch++) 1498 { 1499 for (rk = 0; rk < NUM_RANKS; rk++) 1500 { 1501 for (bl = 0; bl < NUM_BYTE_LANES; bl++) 1502 { 1503 mt->rcvn[ch][rk][bl] = get_rcvn(ch, rk, bl); // RCVN 1504 mt->rdqs[ch][rk][bl] = get_rdqs(ch, rk, bl); // RDQS 1505 mt->wdqs[ch][rk][bl] = get_wdqs(ch, rk, bl); // WDQS 1506 mt->wdq[ch][rk][bl] = get_wdq(ch, rk, bl); // WDQ 1507 if (rk == 0) 1508 { 1509 mt->vref[ch][bl] = get_vref(ch, bl); // VREF (RANK0 only) 1510 } 1511 } 1512 mt->wctl[ch][rk] = get_wctl(ch, rk); // WCTL 1513 } 1514 mt->wcmd[ch] = get_wcmd(ch); // WCMD 1515 } 1516 1517 // need to save for a case of changing frequency after warm reset 1518 mt->ddr_speed = mrc_params->ddr_speed; 1519 1520 return; 1521 } 1522 1523 // This function will retrieve relevant timing data 1524 // This data will be used on subsequent boots to speed up boot times 1525 // and is required for Suspend To RAM capabilities. 1526 void restore_timings( 1527 MRCParams_t *mrc_params) 1528 { 1529 uint8_t ch, rk, bl; 1530 const MrcTimings_t *mt = &mrc_params->timings; 1531 1532 for (ch = 0; ch < NUM_CHANNELS; ch++) 1533 { 1534 for (rk = 0; rk < NUM_RANKS; rk++) 1535 { 1536 for (bl = 0; bl < NUM_BYTE_LANES; bl++) 1537 { 1538 set_rcvn(ch, rk, bl, mt->rcvn[ch][rk][bl]); // RCVN 1539 set_rdqs(ch, rk, bl, mt->rdqs[ch][rk][bl]); // RDQS 1540 set_wdqs(ch, rk, bl, mt->wdqs[ch][rk][bl]); // WDQS 1541 set_wdq(ch, rk, bl, mt->wdq[ch][rk][bl]); // WDQ 1542 if (rk == 0) 1543 { 1544 set_vref(ch, bl, mt->vref[ch][bl]); // VREF (RANK0 only) 1545 } 1546 } 1547 set_wctl(ch, rk, mt->wctl[ch][rk]); // WCTL 1548 } 1549 set_wcmd(ch, mt->wcmd[ch]); // WCMD 1550 } 1551 1552 return; 1553 } 1554 1555 // Configure default settings normally set as part of read training 1556 // Some defaults have to be set earlier as they may affect earlier 1557 // training steps. 1558 void default_timings( 1559 MRCParams_t *mrc_params) 1560 { 1561 uint8_t ch, rk, bl; 1562 1563 for (ch = 0; ch < NUM_CHANNELS; ch++) 1564 { 1565 for (rk = 0; rk < NUM_RANKS; rk++) 1566 { 1567 for (bl = 0; bl < NUM_BYTE_LANES; bl++) 1568 { 1569 set_rdqs(ch, rk, bl, 24); // RDQS 1570 if (rk == 0) 1571 { 1572 set_vref(ch, bl, 32); // VREF (RANK0 only) 1573 } 1574 } 1575 } 1576 } 1577 1578 return; 1579 } 1580 1581