Home | History | Annotate | Download | only in Pei
      1 /************************************************************************
      2  *
      3  * Copyright (c) 2013-2015 Intel Corporation.
      4  *
      5 * This program and the accompanying materials
      6 * are licensed and made available under the terms and conditions of the BSD License
      7 * which accompanies this distribution.  The full text of the license may be found at
      8 * http://opensource.org/licenses/bsd-license.php
      9 *
     10 * THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     11 * WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     12  *
     13  ***************************************************************************/
     14 
     15 #include "mrc.h"
     16 #include "memory_options.h"
     17 
     18 #include "meminit_utils.h"
     19 #include "hte.h"
     20 #include "io.h"
     21 
     22 void select_hte(
     23     MRCParams_t *mrc_params);
     24 
     25 static uint8_t first_run = 0;
     26 
     27 const uint8_t vref_codes[64] =
     28 { // lowest to highest
     29     0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, // 00 - 15
     30     0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, // 16 - 31
     31     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 32 - 47
     32     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F  // 48 - 63
     33 };
     34 
     35 #ifdef EMU
     36 // Track current post code for debugging purpose
     37 uint32_t PostCode;
     38 #endif
     39 
     40 // set_rcvn:
     41 //
     42 // This function will program the RCVEN delays.
     43 // (currently doesn't comprehend rank)
     44 void set_rcvn(
     45     uint8_t channel,
     46     uint8_t rank,
     47     uint8_t byte_lane,
     48     uint32_t pi_count)
     49 {
     50   uint32_t reg;
     51   uint32_t msk;
     52   uint32_t tempD;
     53 
     54   ENTERFN();
     55   DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
     56 
     57   // RDPTR (1/2 MCLK, 64 PIs)
     58   // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
     59   // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
     60   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
     61   msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : (BIT11 | BIT10 | BIT9 | BIT8);
     62   tempD = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : ((pi_count / HALF_CLK) << 8);
     63   isbM32m(DDRPHY, reg, tempD, msk);
     64 
     65   // Adjust PI_COUNT
     66   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
     67 
     68   // PI (1/64 MCLK, 1 PIs)
     69   // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
     70   // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
     71   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
     72   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
     73   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
     74   tempD = pi_count << 24;
     75   isbM32m(DDRPHY, reg, tempD, msk);
     76 
     77   // DEADBAND
     78   // BL0/1 -> B01DBCTL1[08/11] (+1 select)
     79   // BL0/1 -> B01DBCTL1[02/05] (enable)
     80   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
     81   msk = 0x00;
     82   tempD = 0x00;
     83   // enable
     84   msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2);
     85   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
     86   {
     87     tempD |= msk;
     88   }
     89   // select
     90   msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8);
     91   if (pi_count < EARLY_DB)
     92   {
     93     tempD |= msk;
     94   }
     95   isbM32m(DDRPHY, reg, tempD, msk);
     96 
     97   // error check
     98   if (pi_count > 0x3F)
     99   {
    100     training_message(channel, rank, byte_lane);
    101     post_code(0xEE, 0xE0);
    102   }
    103 
    104   LEAVEFN();
    105   return;
    106 }
    107 
    108 // get_rcvn:
    109 //
    110 // This function will return the current RCVEN delay on the given channel, rank, byte_lane as an absolute PI count.
    111 // (currently doesn't comprehend rank)
    112 uint32_t get_rcvn(
    113     uint8_t channel,
    114     uint8_t rank,
    115     uint8_t byte_lane)
    116 {
    117   uint32_t reg;
    118   uint32_t tempD;
    119   uint32_t pi_count;
    120 
    121   ENTERFN();
    122 
    123   // RDPTR (1/2 MCLK, 64 PIs)
    124   // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
    125   // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
    126   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
    127   tempD = isbR32m(DDRPHY, reg);
    128   tempD >>= (byte_lane & BIT0) ? (20) : (8);
    129   tempD &= 0xF;
    130 
    131   // Adjust PI_COUNT
    132   pi_count = tempD * HALF_CLK;
    133 
    134   // PI (1/64 MCLK, 1 PIs)
    135   // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
    136   // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
    137   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
    138   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
    139   tempD = isbR32m(DDRPHY, reg);
    140   tempD >>= 24;
    141   tempD &= 0x3F;
    142 
    143   // Adjust PI_COUNT
    144   pi_count += tempD;
    145 
    146   LEAVEFN();
    147   return pi_count;
    148 }
    149 
    150 // set_rdqs:
    151 //
    152 // This function will program the RDQS delays based on an absolute amount of PIs.
    153 // (currently doesn't comprehend rank)
    154 void set_rdqs(
    155     uint8_t channel,
    156     uint8_t rank,
    157     uint8_t byte_lane,
    158     uint32_t pi_count)
    159 {
    160   uint32_t reg;
    161   uint32_t msk;
    162   uint32_t tempD;
    163 
    164   ENTERFN();
    165   DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
    166 
    167   // PI (1/128 MCLK)
    168   // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
    169   // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
    170   reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
    171   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
    172   msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
    173   tempD = pi_count << 0;
    174   isbM32m(DDRPHY, reg, tempD, msk);
    175 
    176   // error check (shouldn't go above 0x3F)
    177   if (pi_count > 0x47)
    178   {
    179     training_message(channel, rank, byte_lane);
    180     post_code(0xEE, 0xE1);
    181   }
    182 
    183   LEAVEFN();
    184   return;
    185 }
    186 
    187 // get_rdqs:
    188 //
    189 // This function will return the current RDQS delay on the given channel, rank, byte_lane as an absolute PI count.
    190 // (currently doesn't comprehend rank)
    191 uint32_t get_rdqs(
    192     uint8_t channel,
    193     uint8_t rank,
    194     uint8_t byte_lane)
    195 {
    196   uint32_t reg;
    197   uint32_t tempD;
    198   uint32_t pi_count;
    199 
    200   ENTERFN();
    201 
    202   // PI (1/128 MCLK)
    203   // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
    204   // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
    205   reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);
    206   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
    207   tempD = isbR32m(DDRPHY, reg);
    208 
    209   // Adjust PI_COUNT
    210   pi_count = tempD & 0x7F;
    211 
    212   LEAVEFN();
    213   return pi_count;
    214 }
    215 
    216 // set_wdqs:
    217 //
    218 // This function will program the WDQS delays based on an absolute amount of PIs.
    219 // (currently doesn't comprehend rank)
    220 void set_wdqs(
    221     uint8_t channel,
    222     uint8_t rank,
    223     uint8_t byte_lane,
    224     uint32_t pi_count)
    225 {
    226   uint32_t reg;
    227   uint32_t msk;
    228   uint32_t tempD;
    229 
    230   ENTERFN();
    231   DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
    232 
    233   // RDPTR (1/2 MCLK, 64 PIs)
    234   // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
    235   // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
    236   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
    237   msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : (BIT7 | BIT6 | BIT5 | BIT4);
    238   tempD = pi_count / HALF_CLK;
    239   tempD <<= (byte_lane & BIT0) ? (16) : (4);
    240   isbM32m(DDRPHY, reg, tempD, msk);
    241 
    242   // Adjust PI_COUNT
    243   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
    244 
    245   // PI (1/64 MCLK, 1 PIs)
    246   // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
    247   // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
    248   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
    249   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
    250   msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16);
    251   tempD = pi_count << 16;
    252   isbM32m(DDRPHY, reg, tempD, msk);
    253 
    254   // DEADBAND
    255   // BL0/1 -> B01DBCTL1[07/10] (+1 select)
    256   // BL0/1 -> B01DBCTL1[01/04] (enable)
    257   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
    258   msk = 0x00;
    259   tempD = 0x00;
    260   // enable
    261   msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1);
    262   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
    263   {
    264     tempD |= msk;
    265   }
    266   // select
    267   msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7);
    268   if (pi_count < EARLY_DB)
    269   {
    270     tempD |= msk;
    271   }
    272   isbM32m(DDRPHY, reg, tempD, msk);
    273 
    274   // error check
    275   if (pi_count > 0x3F)
    276   {
    277     training_message(channel, rank, byte_lane);
    278     post_code(0xEE, 0xE2);
    279   }
    280 
    281   LEAVEFN();
    282   return;
    283 }
    284 
    285 // get_wdqs:
    286 //
    287 // This function will return the amount of WDQS delay on the given channel, rank, byte_lane as an absolute PI count.
    288 // (currently doesn't comprehend rank)
    289 uint32_t get_wdqs(
    290     uint8_t channel,
    291     uint8_t rank,
    292     uint8_t byte_lane)
    293 {
    294   uint32_t reg;
    295   uint32_t tempD;
    296   uint32_t pi_count;
    297 
    298   ENTERFN();
    299 
    300   // RDPTR (1/2 MCLK, 64 PIs)
    301   // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
    302   // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
    303   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
    304   tempD = isbR32m(DDRPHY, reg);
    305   tempD >>= (byte_lane & BIT0) ? (16) : (4);
    306   tempD &= 0xF;
    307 
    308   // Adjust PI_COUNT
    309   pi_count = (tempD * HALF_CLK);
    310 
    311   // PI (1/64 MCLK, 1 PIs)
    312   // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
    313   // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
    314   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
    315   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
    316   tempD = isbR32m(DDRPHY, reg);
    317   tempD >>= 16;
    318   tempD &= 0x3F;
    319 
    320   // Adjust PI_COUNT
    321   pi_count += tempD;
    322 
    323   LEAVEFN();
    324   return pi_count;
    325 }
    326 
    327 // set_wdq:
    328 //
    329 // This function will program the WDQ delays based on an absolute number of PIs.
    330 // (currently doesn't comprehend rank)
    331 void set_wdq(
    332     uint8_t channel,
    333     uint8_t rank,
    334     uint8_t byte_lane,
    335     uint32_t pi_count)
    336 {
    337   uint32_t reg;
    338   uint32_t msk;
    339   uint32_t tempD;
    340 
    341   ENTERFN();
    342   DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);
    343 
    344   // RDPTR (1/2 MCLK, 64 PIs)
    345   // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
    346   // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
    347   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
    348   msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : (BIT3 | BIT2 | BIT1 | BIT0);
    349   tempD = pi_count / HALF_CLK;
    350   tempD <<= (byte_lane & BIT0) ? (12) : (0);
    351   isbM32m(DDRPHY, reg, tempD, msk);
    352 
    353   // Adjust PI_COUNT
    354   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
    355 
    356   // PI (1/64 MCLK, 1 PIs)
    357   // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
    358   // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
    359   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
    360   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
    361   msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
    362   tempD = pi_count << 8;
    363   isbM32m(DDRPHY, reg, tempD, msk);
    364 
    365   // DEADBAND
    366   // BL0/1 -> B01DBCTL1[06/09] (+1 select)
    367   // BL0/1 -> B01DBCTL1[00/03] (enable)
    368   reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
    369   msk = 0x00;
    370   tempD = 0x00;
    371   // enable
    372   msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0);
    373   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
    374   {
    375     tempD |= msk;
    376   }
    377   // select
    378   msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6);
    379   if (pi_count < EARLY_DB)
    380   {
    381     tempD |= msk;
    382   }
    383   isbM32m(DDRPHY, reg, tempD, msk);
    384 
    385   // error check
    386   if (pi_count > 0x3F)
    387   {
    388     training_message(channel, rank, byte_lane);
    389     post_code(0xEE, 0xE3);
    390   }
    391 
    392   LEAVEFN();
    393   return;
    394 }
    395 
    396 // get_wdq:
    397 //
    398 // This function will return the amount of WDQ delay on the given channel, rank, byte_lane as an absolute PI count.
    399 // (currently doesn't comprehend rank)
    400 uint32_t get_wdq(
    401     uint8_t channel,
    402     uint8_t rank,
    403     uint8_t byte_lane)
    404 {
    405   uint32_t reg;
    406   uint32_t tempD;
    407   uint32_t pi_count;
    408 
    409   ENTERFN();
    410 
    411   // RDPTR (1/2 MCLK, 64 PIs)
    412   // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
    413   // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
    414   reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);
    415   tempD = isbR32m(DDRPHY, reg);
    416   tempD >>= (byte_lane & BIT0) ? (12) : (0);
    417   tempD &= 0xF;
    418 
    419   // Adjust PI_COUNT
    420   pi_count = (tempD * HALF_CLK);
    421 
    422   // PI (1/64 MCLK, 1 PIs)
    423   // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
    424   // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
    425   reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);
    426   reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));
    427   tempD = isbR32m(DDRPHY, reg);
    428   tempD >>= 8;
    429   tempD &= 0x3F;
    430 
    431   // Adjust PI_COUNT
    432   pi_count += tempD;
    433 
    434   LEAVEFN();
    435   return pi_count;
    436 }
    437 
    438 // set_wcmd:
    439 //
    440 // This function will program the WCMD delays based on an absolute number of PIs.
    441 void set_wcmd(
    442     uint8_t channel,
    443     uint32_t pi_count)
    444 {
    445   uint32_t reg;
    446   uint32_t msk;
    447   uint32_t tempD;
    448 
    449   ENTERFN();
    450   // RDPTR (1/2 MCLK, 64 PIs)
    451   // CMDPTRREG[11:08] (0x0-0xF)
    452   reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
    453   msk = (BIT11 | BIT10 | BIT9 | BIT8);
    454   tempD = pi_count / HALF_CLK;
    455   tempD <<= 8;
    456   isbM32m(DDRPHY, reg, tempD, msk);
    457 
    458   // Adjust PI_COUNT
    459   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
    460 
    461   // PI (1/64 MCLK, 1 PIs)
    462   // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
    463   // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
    464   // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
    465   // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
    466   // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
    467   // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
    468   // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
    469   // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
    470   reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
    471 
    472   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24) | (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16)
    473       | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8) | (BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);
    474 
    475   tempD = (pi_count << 24) | (pi_count << 16) | (pi_count << 8) | (pi_count << 0);
    476 
    477   isbM32m(DDRPHY, reg, tempD, msk);
    478   reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); // PO
    479   isbM32m(DDRPHY, reg, tempD, msk);
    480 
    481   // DEADBAND
    482   // CMDCFGREG0[17] (+1 select)
    483   // CMDCFGREG0[16] (enable)
    484   reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET);
    485   msk = 0x00;
    486   tempD = 0x00;
    487   // enable
    488   msk |= BIT16;
    489   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
    490   {
    491     tempD |= msk;
    492   }
    493   // select
    494   msk |= BIT17;
    495   if (pi_count < EARLY_DB)
    496   {
    497     tempD |= msk;
    498   }
    499   isbM32m(DDRPHY, reg, tempD, msk);
    500 
    501   // error check
    502   if (pi_count > 0x3F)
    503   {
    504     post_code(0xEE, 0xE4);
    505   }
    506 
    507   LEAVEFN();
    508   return;
    509 }
    510 
    511 // get_wcmd:
    512 //
    513 // This function will return the amount of WCMD delay on the given channel as an absolute PI count.
    514 uint32_t get_wcmd(
    515     uint8_t channel)
    516 {
    517   uint32_t reg;
    518   uint32_t tempD;
    519   uint32_t pi_count;
    520 
    521   ENTERFN();
    522   // RDPTR (1/2 MCLK, 64 PIs)
    523   // CMDPTRREG[11:08] (0x0-0xF)
    524   reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);
    525   tempD = isbR32m(DDRPHY, reg);
    526   tempD >>= 8;
    527   tempD &= 0xF;
    528 
    529   // Adjust PI_COUNT
    530   pi_count = tempD * HALF_CLK;
    531 
    532   // PI (1/64 MCLK, 1 PIs)
    533   // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
    534   // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
    535   // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
    536   // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
    537   // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
    538   // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
    539   // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
    540   // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
    541   reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
    542   tempD = isbR32m(DDRPHY, reg);
    543   tempD >>= 16;
    544   tempD &= 0x3F;
    545 
    546   // Adjust PI_COUNT
    547   pi_count += tempD;
    548 
    549   LEAVEFN();
    550   return pi_count;
    551 }
    552 
    553 // set_wclk:
    554 //
    555 // This function will program the WCLK delays based on an absolute number of PIs.
    556 void set_wclk(
    557     uint8_t channel,
    558     uint8_t rank,
    559     uint32_t pi_count)
    560 {
    561   uint32_t reg;
    562   uint32_t msk;
    563   uint32_t tempD;
    564 
    565   ENTERFN();
    566   // RDPTR (1/2 MCLK, 64 PIs)
    567   // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
    568   // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
    569   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
    570   msk = (BIT15 | BIT14 | BIT13 | BIT12) | (BIT11 | BIT10 | BIT9 | BIT8);
    571   tempD = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
    572   isbM32m(DDRPHY, reg, tempD, msk);
    573 
    574   // Adjust PI_COUNT
    575   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
    576 
    577   // PI (1/64 MCLK, 1 PIs)
    578   // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
    579   // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
    580   reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
    581   reg += (channel * DDRIOCCC_CH_OFFSET);
    582   msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);
    583   tempD = (pi_count << 16) | (pi_count << 8);
    584   isbM32m(DDRPHY, reg, tempD, msk);
    585   reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1);
    586   reg += (channel * DDRIOCCC_CH_OFFSET);
    587   isbM32m(DDRPHY, reg, tempD, msk);
    588   reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2);
    589   reg += (channel * DDRIOCCC_CH_OFFSET);
    590   isbM32m(DDRPHY, reg, tempD, msk);
    591   reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3);
    592   reg += (channel * DDRIOCCC_CH_OFFSET);
    593   isbM32m(DDRPHY, reg, tempD, msk);
    594 
    595   // DEADBAND
    596   // CCCFGREG1[11:08] (+1 select)
    597   // CCCFGREG1[03:00] (enable)
    598   reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
    599   msk = 0x00;
    600   tempD = 0x00;
    601   // enable
    602   msk |= (BIT3 | BIT2 | BIT1 | BIT0); // only ??? matters
    603   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
    604   {
    605     tempD |= msk;
    606   }
    607   // select
    608   msk |= (BIT11 | BIT10 | BIT9 | BIT8); // only ??? matters
    609   if (pi_count < EARLY_DB)
    610   {
    611     tempD |= msk;
    612   }
    613   isbM32m(DDRPHY, reg, tempD, msk);
    614 
    615   // error check
    616   if (pi_count > 0x3F)
    617   {
    618     post_code(0xEE, 0xE5);
    619   }
    620 
    621   LEAVEFN();
    622   return;
    623 }
    624 
    625 // get_wclk:
    626 //
    627 // This function will return the amout of WCLK delay on the given channel, rank as an absolute PI count.
    628 uint32_t get_wclk(
    629     uint8_t channel,
    630     uint8_t rank)
    631 {
    632   uint32_t reg;
    633   uint32_t tempD;
    634   uint32_t pi_count;
    635 
    636   ENTERFN();
    637   // RDPTR (1/2 MCLK, 64 PIs)
    638   // CCPTRREG[15:12] -> CLK1 (0x0-0xF)
    639   // CCPTRREG[11:08] -> CLK0 (0x0-0xF)
    640   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
    641   tempD = isbR32m(DDRPHY, reg);
    642   tempD >>= (rank) ? (12) : (8);
    643   tempD &= 0xF;
    644 
    645   // Adjust PI_COUNT
    646   pi_count = tempD * HALF_CLK;
    647 
    648   // PI (1/64 MCLK, 1 PIs)
    649   // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
    650   // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
    651   reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);
    652   reg += (channel * DDRIOCCC_CH_OFFSET);
    653   tempD = isbR32m(DDRPHY, reg);
    654   tempD >>= (rank) ? (16) : (8);
    655   tempD &= 0x3F;
    656 
    657   pi_count += tempD;
    658 
    659   LEAVEFN();
    660   return pi_count;
    661 }
    662 
    663 // set_wctl:
    664 //
    665 // This function will program the WCTL delays based on an absolute number of PIs.
    666 // (currently doesn't comprehend rank)
    667 void set_wctl(
    668     uint8_t channel,
    669     uint8_t rank,
    670     uint32_t pi_count)
    671 {
    672   uint32_t reg;
    673   uint32_t msk;
    674   uint32_t tempD;
    675 
    676   ENTERFN();
    677 
    678   // RDPTR (1/2 MCLK, 64 PIs)
    679   // CCPTRREG[31:28] (0x0-0xF)
    680   // CCPTRREG[27:24] (0x0-0xF)
    681   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
    682   msk = (BIT31 | BIT30 | BIT29 | BIT28) | (BIT27 | BIT26 | BIT25 | BIT24);
    683   tempD = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
    684   isbM32m(DDRPHY, reg, tempD, msk);
    685 
    686   // Adjust PI_COUNT
    687   pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;
    688 
    689   // PI (1/64 MCLK, 1 PIs)
    690   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
    691   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
    692   reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
    693   msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);
    694   tempD = (pi_count << 24);
    695   isbM32m(DDRPHY, reg, tempD, msk);
    696   reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);
    697   isbM32m(DDRPHY, reg, tempD, msk);
    698   reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET);
    699   isbM32m(DDRPHY, reg, tempD, msk);
    700   reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET);
    701   isbM32m(DDRPHY, reg, tempD, msk);
    702 
    703   // DEADBAND
    704   // CCCFGREG1[13:12] (+1 select)
    705   // CCCFGREG1[05:04] (enable)
    706   reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);
    707   msk = 0x00;
    708   tempD = 0x00;
    709   // enable
    710   msk |= (BIT5 | BIT4); // only ??? matters
    711   if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
    712   {
    713     tempD |= msk;
    714   }
    715   // select
    716   msk |= (BIT13 | BIT12); // only ??? matters
    717   if (pi_count < EARLY_DB)
    718   {
    719     tempD |= msk;
    720   }
    721   isbM32m(DDRPHY, reg, tempD, msk);
    722 
    723   // error check
    724   if (pi_count > 0x3F)
    725   {
    726     post_code(0xEE, 0xE6);
    727   }
    728 
    729   LEAVEFN();
    730   return;
    731 }
    732 
    733 // get_wctl:
    734 //
    735 // This function will return the amount of WCTL delay on the given channel, rank as an absolute PI count.
    736 // (currently doesn't comprehend rank)
    737 uint32_t get_wctl(
    738     uint8_t channel,
    739     uint8_t rank)
    740 {
    741   uint32_t reg;
    742   uint32_t tempD;
    743   uint32_t pi_count;
    744 
    745   ENTERFN();
    746 
    747   // RDPTR (1/2 MCLK, 64 PIs)
    748   // CCPTRREG[31:28] (0x0-0xF)
    749   // CCPTRREG[27:24] (0x0-0xF)
    750   reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);
    751   tempD = isbR32m(DDRPHY, reg);
    752   tempD >>= 24;
    753   tempD &= 0xF;
    754 
    755   // Adjust PI_COUNT
    756   pi_count = tempD * HALF_CLK;
    757 
    758   // PI (1/64 MCLK, 1 PIs)
    759   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
    760   // ECCB1DLLPICODER?[29:24] (0x00-0x3F)
    761   reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);
    762   tempD = isbR32m(DDRPHY, reg);
    763   tempD >>= 24;
    764   tempD &= 0x3F;
    765 
    766   // Adjust PI_COUNT
    767   pi_count += tempD;
    768 
    769   LEAVEFN();
    770   return pi_count;
    771 }
    772 
    773 // set_vref:
    774 //
    775 // This function will program the internal Vref setting in a given byte lane in a given channel.
    776 void set_vref(
    777     uint8_t channel,
    778     uint8_t byte_lane,
    779     uint32_t setting)
    780 {
    781   uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
    782 
    783   ENTERFN();
    784   DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", channel, byte_lane, setting);
    785 
    786   isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)),
    787       (vref_codes[setting] << 2), (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2));
    788   //isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), (setting<<2), (BIT7|BIT6|BIT5|BIT4|BIT3|BIT2));
    789   // need to wait ~300ns for Vref to settle (check that this is necessary)
    790   delay_n(300);
    791   // ??? may need to clear pointers ???
    792   LEAVEFN();
    793   return;
    794 }
    795 
    796 // get_vref:
    797 //
    798 // This function will return the internal Vref setting for the given channel, byte_lane;
    799 uint32_t get_vref(
    800     uint8_t channel,
    801     uint8_t byte_lane)
    802 {
    803   uint8_t j;
    804   uint32_t ret_val = sizeof(vref_codes) / 2;
    805   uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);
    806 
    807   uint32_t tempD;
    808 
    809   ENTERFN();
    810   tempD = isbR32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)));
    811   tempD >>= 2;
    812   tempD &= 0x3F;
    813   for (j = 0; j < sizeof(vref_codes); j++)
    814   {
    815     if (vref_codes[j] == tempD)
    816     {
    817       ret_val = j;
    818       break;
    819     }
    820   }
    821   LEAVEFN();
    822   return ret_val;
    823 }
    824 
    825 // clear_pointers:
    826 //
    827 // This function will be used to clear the pointers in a given byte lane in a given channel.
    828 void clear_pointers(
    829     void)
    830 {
    831   uint8_t channel_i;
    832   uint8_t bl_i;
    833 
    834   ENTERFN();
    835   for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
    836   {
    837     for (bl_i = 0; bl_i < NUM_BYTE_LANES; bl_i++)
    838     {
    839       isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), ~(BIT8),
    840           (BIT8));
    841       //delay_m(1); // DEBUG
    842       isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), (BIT8),
    843           (BIT8));
    844     }
    845   }
    846   LEAVEFN();
    847   return;
    848 }
    849 
    850 // void enable_cache:
    851 void enable_cache(
    852     void)
    853 {
    854   // Cache control not used in Quark MRC
    855   return;
    856 }
    857 
    858 // void disable_cache:
    859 void disable_cache(
    860     void)
    861 {
    862   // Cache control not used in Quark MRC
    863   return;
    864 }
    865 
    866 // Send DRAM command, data should be formated
    867 // using DCMD_Xxxx macro or emrsXCommand structure.
    868 static void dram_init_command(
    869     uint32_t data)
    870 {
    871   Wr32(DCMD, 0, data);
    872 }
    873 
    874 // find_rising_edge:
    875 //
    876 // This function will find the rising edge transition on RCVN or WDQS.
    877 void find_rising_edge(
    878     MRCParams_t *mrc_params,
    879     uint32_t delay[],
    880     uint8_t channel,
    881     uint8_t rank,
    882     bool rcvn)
    883 {
    884 
    885 #define SAMPLE_CNT 3   // number of sample points
    886 #define SAMPLE_DLY 26  // number of PIs to increment per sample
    887 #define FORWARD true   // indicates to increase delays when looking for edge
    888 #define BACKWARD false // indicates to decrease delays when looking for edge
    889 
    890   bool all_edges_found; // determines stop condition
    891   bool direction[NUM_BYTE_LANES]; // direction indicator
    892   uint8_t sample_i; // sample counter
    893   uint8_t bl_i; // byte lane counter
    894   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
    895   uint32_t sample_result[SAMPLE_CNT]; // results of "sample_dqs()"
    896   uint32_t tempD; // temporary DWORD
    897   uint32_t transition_pattern;
    898 
    899   ENTERFN();
    900 
    901   // select hte and request initial configuration
    902   select_hte(mrc_params);
    903   first_run = 1;
    904 
    905   // Take 3 sample points (T1,T2,T3) to obtain a transition pattern.
    906   for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
    907   {
    908     // program the desired delays for sample
    909     for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
    910     {
    911       // increase sample delay by 26 PI (0.2 CLK)
    912       if (rcvn)
    913       {
    914         set_rcvn(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
    915       }
    916       else
    917       {
    918         set_wdqs(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));
    919       }
    920     } // bl_i loop
    921     // take samples (Tsample_i)
    922     sample_result[sample_i] = sample_dqs(mrc_params, channel, rank, rcvn);
    923 
    924     DPF(D_TRN, "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
    925         (rcvn ? "RCVN" : "WDQS"), channel, rank,
    926         sample_i, sample_i * SAMPLE_DLY, sample_result[sample_i]);
    927 
    928   } // sample_i loop
    929 
    930   // This pattern will help determine where we landed and ultimately how to place RCVEN/WDQS.
    931   for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
    932   {
    933     // build "transition_pattern" (MSB is 1st sample)
    934     transition_pattern = 0x00;
    935     for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)
    936     {
    937       transition_pattern |= ((sample_result[sample_i] & (1 << bl_i)) >> bl_i) << (SAMPLE_CNT - 1 - sample_i);
    938     } // sample_i loop
    939 
    940     DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
    941 
    942     // set up to look for rising edge based on "transition_pattern"
    943     switch (transition_pattern)
    944     {
    945     case 0x00: // sampled 0->0->0
    946       // move forward from T3 looking for 0->1
    947       delay[bl_i] += 2 * SAMPLE_DLY;
    948       direction[bl_i] = FORWARD;
    949       break;
    950     case 0x01: // sampled 0->0->1
    951     case 0x05: // sampled 1->0->1 (bad duty cycle) *HSD#237503*
    952       // move forward from T2 looking for 0->1
    953       delay[bl_i] += 1 * SAMPLE_DLY;
    954       direction[bl_i] = FORWARD;
    955       break;
    956 // HSD#237503
    957 //      case 0x02: // sampled 0->1->0 (bad duty cycle)
    958 //        training_message(channel, rank, bl_i);
    959 //        post_code(0xEE, 0xE8);
    960 //        break;
    961     case 0x02: // sampled 0->1->0 (bad duty cycle) *HSD#237503*
    962     case 0x03: // sampled 0->1->1
    963       // move forward from T1 looking for 0->1
    964       delay[bl_i] += 0 * SAMPLE_DLY;
    965       direction[bl_i] = FORWARD;
    966       break;
    967     case 0x04: // sampled 1->0->0 (assumes BL8, HSD#234975)
    968       // move forward from T3 looking for 0->1
    969       delay[bl_i] += 2 * SAMPLE_DLY;
    970       direction[bl_i] = FORWARD;
    971       break;
    972 // HSD#237503
    973 //      case 0x05: // sampled 1->0->1 (bad duty cycle)
    974 //        training_message(channel, rank, bl_i);
    975 //        post_code(0xEE, 0xE9);
    976 //        break;
    977     case 0x06: // sampled 1->1->0
    978     case 0x07: // sampled 1->1->1
    979       // move backward from T1 looking for 1->0
    980       delay[bl_i] += 0 * SAMPLE_DLY;
    981       direction[bl_i] = BACKWARD;
    982       break;
    983     default:
    984       post_code(0xEE, 0xEE);
    985       break;
    986     } // transition_pattern switch
    987     // program delays
    988     if (rcvn)
    989     {
    990       set_rcvn(channel, rank, bl_i, delay[bl_i]);
    991     }
    992     else
    993     {
    994       set_wdqs(channel, rank, bl_i, delay[bl_i]);
    995     }
    996   } // bl_i loop
    997 
    998   // Based on the observed transition pattern on the byte lane,
    999   // begin looking for a rising edge with single PI granularity.
   1000   do
   1001   {
   1002     all_edges_found = true; // assume all byte lanes passed
   1003     tempD = sample_dqs(mrc_params, channel, rank, rcvn); // take a sample
   1004     // check all each byte lane for proper edge
   1005     for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
   1006     {
   1007       if (tempD & (1 << bl_i))
   1008       {
   1009         // sampled "1"
   1010         if (direction[bl_i] == BACKWARD)
   1011         {
   1012           // keep looking for edge on this byte lane
   1013           all_edges_found = false;
   1014           delay[bl_i] -= 1;
   1015           if (rcvn)
   1016           {
   1017             set_rcvn(channel, rank, bl_i, delay[bl_i]);
   1018           }
   1019           else
   1020           {
   1021             set_wdqs(channel, rank, bl_i, delay[bl_i]);
   1022           }
   1023         }
   1024       }
   1025       else
   1026       {
   1027         // sampled "0"
   1028         if (direction[bl_i] == FORWARD)
   1029         {
   1030           // keep looking for edge on this byte lane
   1031           all_edges_found = false;
   1032           delay[bl_i] += 1;
   1033           if (rcvn)
   1034           {
   1035             set_rcvn(channel, rank, bl_i, delay[bl_i]);
   1036           }
   1037           else
   1038           {
   1039             set_wdqs(channel, rank, bl_i, delay[bl_i]);
   1040           }
   1041         }
   1042       }
   1043     } // bl_i loop
   1044   } while (!all_edges_found);
   1045 
   1046   // restore DDR idle state
   1047   dram_init_command(DCMD_PREA(rank));
   1048 
   1049   DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
   1050       delay[0], delay[1], delay[2], delay[3]);
   1051 
   1052   LEAVEFN();
   1053   return;
   1054 }
   1055 
   1056 // sample_dqs:
   1057 //
   1058 // This function will sample the DQTRAINSTS registers in the given channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
   1059 // It will return an encoded DWORD in which each bit corresponds to the sampled value on the byte lane.
   1060 uint32_t sample_dqs(
   1061     MRCParams_t *mrc_params,
   1062     uint8_t channel,
   1063     uint8_t rank,
   1064     bool rcvn)
   1065 {
   1066   uint8_t j; // just a counter
   1067   uint8_t bl_i; // which BL in the module (always 2 per module)
   1068   uint8_t bl_grp; // which BL module
   1069   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor
   1070   uint32_t msk[2]; // BLx in module
   1071   uint32_t sampled_val[SAMPLE_SIZE]; // DQTRAINSTS register contents for each sample
   1072   uint32_t num_0s; // tracks the number of '0' samples
   1073   uint32_t num_1s; // tracks the number of '1' samples
   1074   uint32_t ret_val = 0x00; // assume all '0' samples
   1075   uint32_t address = get_addr(mrc_params, channel, rank);
   1076 
   1077   // initialise "msk[]"
   1078   msk[0] = (rcvn) ? (BIT1) : (BIT9); // BL0
   1079   msk[1] = (rcvn) ? (BIT0) : (BIT8); // BL1
   1080 
   1081 
   1082   // cycle through each byte lane group
   1083   for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++)
   1084   {
   1085     // take SAMPLE_SIZE samples
   1086     for (j = 0; j < SAMPLE_SIZE; j++)
   1087     {
   1088       HteMemOp(address, first_run, rcvn?0:1);
   1089       first_run = 0;
   1090 
   1091       // record the contents of the proper DQTRAINSTS register
   1092       sampled_val[j] = isbR32m(DDRPHY, (DQTRAINSTS + (bl_grp * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)));
   1093     }
   1094     // look for a majority value ( (SAMPLE_SIZE/2)+1 ) on the byte lane
   1095     // and set that value in the corresponding "ret_val" bit
   1096     for (bl_i = 0; bl_i < 2; bl_i++)
   1097     {
   1098       num_0s = 0x00; // reset '0' tracker for byte lane
   1099       num_1s = 0x00; // reset '1' tracker for byte lane
   1100       for (j = 0; j < SAMPLE_SIZE; j++)
   1101       {
   1102         if (sampled_val[j] & msk[bl_i])
   1103         {
   1104           num_1s++;
   1105         }
   1106         else
   1107         {
   1108           num_0s++;
   1109         }
   1110       }
   1111       if (num_1s > num_0s)
   1112       {
   1113         ret_val |= (1 << (bl_i + (bl_grp * 2)));
   1114       }
   1115     }
   1116   }
   1117 
   1118   // "ret_val.0" contains the status of BL0
   1119   // "ret_val.1" contains the status of BL1
   1120   // "ret_val.2" contains the status of BL2
   1121   // etc.
   1122   return ret_val;
   1123 }
   1124 
   1125 // get_addr:
   1126 //
   1127 // This function will return a 32 bit address in the desired channel and rank.
   1128 uint32_t get_addr(
   1129     MRCParams_t *mrc_params,
   1130     uint8_t channel,
   1131     uint8_t rank)
   1132 {
   1133   uint32_t offset = 0x02000000; // 32MB
   1134 
   1135   // Begin product specific code
   1136   if (channel > 0)
   1137   {
   1138     DPF(D_ERROR, "ILLEGAL CHANNEL\n");
   1139     DEAD_LOOP();
   1140   }
   1141 
   1142   if (rank > 1)
   1143   {
   1144     DPF(D_ERROR, "ILLEGAL RANK\n");
   1145     DEAD_LOOP();
   1146   }
   1147 
   1148   // use 256MB lowest density as per DRP == 0x0003
   1149   offset += rank * (256 * 1024 * 1024);
   1150 
   1151   return offset;
   1152 }
   1153 
   1154 // byte_lane_mask:
   1155 //
   1156 // This function will return a 32 bit mask that will be used to check for byte lane failures.
   1157 uint32_t byte_lane_mask(
   1158     MRCParams_t *mrc_params)
   1159 {
   1160   uint32_t j;
   1161   uint32_t ret_val = 0x00;
   1162 
   1163   // set "ret_val" based on NUM_BYTE_LANES such that you will check only BL0 in "result"
   1164   // (each bit in "result" represents a byte lane)
   1165   for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
   1166   {
   1167     ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
   1168   }
   1169 
   1170   // HSD#235037
   1171   // need to adjust the mask for 16-bit mode
   1172   if (mrc_params->channel_width == x16)
   1173   {
   1174     ret_val |= (ret_val << 2);
   1175   }
   1176 
   1177   return ret_val;
   1178 }
   1179 
   1180 
   1181 // read_tsc:
   1182 //
   1183 // This function will do some assembly to return TSC register contents as a uint64_t.
   1184 uint64_t read_tsc(
   1185     void)
   1186 {
   1187   volatile uint64_t tsc;  // EDX:EAX
   1188 
   1189 #if defined (SIM) || defined (GCC)
   1190   volatile uint32_t tscH; // EDX
   1191   volatile uint32_t tscL;// EAX
   1192 
   1193   asm("rdtsc":"=a"(tscL),"=d"(tscH));
   1194   tsc = tscH;
   1195   tsc = (tsc<<32)|tscL;
   1196 #else
   1197   tsc = __rdtsc();
   1198 #endif
   1199 
   1200   return tsc;
   1201 }
   1202 
   1203 // get_tsc_freq:
   1204 //
   1205 // This function returns the TSC frequency in MHz
   1206 uint32_t get_tsc_freq(
   1207     void)
   1208 {
   1209   static uint32_t freq[] =
   1210   { 533, 400, 200, 100 };
   1211   uint32_t fuse;
   1212 #if 0
   1213   fuse = (isbR32m(FUSE, 0) >> 12) & (BIT1|BIT0);
   1214 #else
   1215   // todo!!! Fixed 533MHz for emulation or debugging
   1216   fuse = 0;
   1217 #endif
   1218   return freq[fuse];
   1219 }
   1220 
   1221 #ifndef SIM
   1222 // delay_n:
   1223 //
   1224 // This is a simple delay function.
   1225 // It takes "nanoseconds" as a parameter.
   1226 void delay_n(
   1227     uint32_t nanoseconds)
   1228 {
   1229   // 1000 MHz clock has 1ns period --> no conversion required
   1230   uint64_t final_tsc = read_tsc();
   1231   final_tsc += ((get_tsc_freq() * (nanoseconds)) / 1000);
   1232 
   1233   while (read_tsc() < final_tsc)
   1234     ;
   1235   return;
   1236 }
   1237 #endif
   1238 
   1239 // delay_u:
   1240 //
   1241 // This is a simple delay function.
   1242 // It takes "microseconds as a parameter.
   1243 void delay_u(
   1244     uint32_t microseconds)
   1245 {
   1246   // 64 bit math is not an option, just use loops
   1247   while (microseconds--)
   1248   {
   1249     delay_n(1000);
   1250   }
   1251   return;
   1252 }
   1253 
   1254 // delay_m:
   1255 //
   1256 // This is a simple delay function.
   1257 // It takes "milliseconds" as a parameter.
   1258 void delay_m(
   1259     uint32_t milliseconds)
   1260 {
   1261   // 64 bit math is not an option, just use loops
   1262   while (milliseconds--)
   1263   {
   1264     delay_u(1000);
   1265   }
   1266   return;
   1267 }
   1268 
   1269 // delay_s:
   1270 //
   1271 // This is a simple delay function.
   1272 // It takes "seconds" as a parameter.
   1273 void delay_s(
   1274     uint32_t seconds)
   1275 {
   1276   // 64 bit math is not an option, just use loops
   1277   while (seconds--)
   1278   {
   1279     delay_m(1000);
   1280   }
   1281   return;
   1282 }
   1283 
   1284 // post_code:
   1285 //
   1286 // This function will output the POST CODE to the four 7-Segment LED displays.
   1287 void post_code(
   1288     uint8_t major,
   1289     uint8_t minor)
   1290 {
   1291 #ifdef EMU
   1292   // Update global variable for execution tracking in debug env
   1293   PostCode = ((major << 8) | minor);
   1294 #endif
   1295 
   1296   // send message to UART
   1297   DPF(D_INFO, "POST: 0x%01X%02X\n", major, minor);
   1298 
   1299   // error check:
   1300   if (major == 0xEE)
   1301   {
   1302     // todo!!! Consider updating error status and exit MRC
   1303 #ifdef SIM
   1304     // enable Ctrl-C handling
   1305     for(;;) delay_n(100);
   1306 #else
   1307     DEAD_LOOP();
   1308 #endif
   1309   }
   1310 }
   1311 
   1312 void training_message(
   1313     uint8_t channel,
   1314     uint8_t rank,
   1315     uint8_t byte_lane)
   1316 {
   1317   // send message to UART
   1318   DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
   1319   return;
   1320 }
   1321 
   1322 void print_timings(
   1323     MRCParams_t *mrc_params)
   1324 {
   1325   uint8_t algo_i;
   1326   uint8_t channel_i;
   1327   uint8_t rank_i;
   1328   uint8_t bl_i;
   1329   uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1;
   1330 
   1331   DPF(D_INFO, "\n---------------------------");
   1332   DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
   1333   DPF(D_INFO, "\n===========================");
   1334   for (algo_i = 0; algo_i < eMAX_ALGOS; algo_i++)
   1335   {
   1336     for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)
   1337     {
   1338       if (mrc_params->channel_enables & (1 << channel_i))
   1339       {
   1340         for (rank_i = 0; rank_i < NUM_RANKS; rank_i++)
   1341         {
   1342           if (mrc_params->rank_enables & (1 << rank_i))
   1343           {
   1344             switch (algo_i)
   1345             {
   1346             case eRCVN:
   1347               DPF(D_INFO, "\nRCVN[%02d:%02d]", channel_i, rank_i);
   1348               break;
   1349             case eWDQS:
   1350               DPF(D_INFO, "\nWDQS[%02d:%02d]", channel_i, rank_i);
   1351               break;
   1352             case eWDQx:
   1353               DPF(D_INFO, "\nWDQx[%02d:%02d]", channel_i, rank_i);
   1354               break;
   1355             case eRDQS:
   1356               DPF(D_INFO, "\nRDQS[%02d:%02d]", channel_i, rank_i);
   1357               break;
   1358             case eVREF:
   1359               DPF(D_INFO, "\nVREF[%02d:%02d]", channel_i, rank_i);
   1360               break;
   1361             case eWCMD:
   1362               DPF(D_INFO, "\nWCMD[%02d:%02d]", channel_i, rank_i);
   1363               break;
   1364             case eWCTL:
   1365               DPF(D_INFO, "\nWCTL[%02d:%02d]", channel_i, rank_i);
   1366               break;
   1367             case eWCLK:
   1368               DPF(D_INFO, "\nWCLK[%02d:%02d]", channel_i, rank_i);
   1369               break;
   1370             default:
   1371               break;
   1372             } // algo_i switch
   1373             for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)
   1374             {
   1375               switch (algo_i)
   1376               {
   1377               case eRCVN:
   1378                 DPF(D_INFO, " %03d", get_rcvn(channel_i, rank_i, bl_i));
   1379                 break;
   1380               case eWDQS:
   1381                 DPF(D_INFO, " %03d", get_wdqs(channel_i, rank_i, bl_i));
   1382                 break;
   1383               case eWDQx:
   1384                 DPF(D_INFO, " %03d", get_wdq(channel_i, rank_i, bl_i));
   1385                 break;
   1386               case eRDQS:
   1387                 DPF(D_INFO, " %03d", get_rdqs(channel_i, rank_i, bl_i));
   1388                 break;
   1389               case eVREF:
   1390                 DPF(D_INFO, " %03d", get_vref(channel_i, bl_i));
   1391                 break;
   1392               case eWCMD:
   1393                 DPF(D_INFO, " %03d", get_wcmd(channel_i));
   1394                 break;
   1395               case eWCTL:
   1396                 DPF(D_INFO, " %03d", get_wctl(channel_i, rank_i));
   1397                 break;
   1398               case eWCLK:
   1399                 DPF(D_INFO, " %03d", get_wclk(channel_i, rank_i));
   1400                 break;
   1401               default:
   1402                 break;
   1403               } // algo_i switch
   1404             } // bl_i loop
   1405           } // if rank_i enabled
   1406         } // rank_i loop
   1407       } // if channel_i enabled
   1408     } // channel_i loop
   1409   } // algo_i loop
   1410   DPF(D_INFO, "\n---------------------------");
   1411   DPF(D_INFO, "\n");
   1412   return;
   1413 }
   1414 
   1415 // 32 bit LFSR with characteristic polynomial:  X^32 + X^22 +X^2 + X^1
   1416 // The function takes pointer to previous 32 bit value and modifies it to next value.
   1417 void lfsr32(
   1418     uint32_t *lfsr_ptr)
   1419 {
   1420   uint32_t bit;
   1421   uint32_t lfsr;
   1422   uint32_t i;
   1423 
   1424   lfsr = *lfsr_ptr;
   1425 
   1426   for (i = 0; i < 32; i++)
   1427   {
   1428     bit = 1 ^ (lfsr & BIT0);
   1429     bit = bit ^ ((lfsr & BIT1) >> 1);
   1430     bit = bit ^ ((lfsr & BIT2) >> 2);
   1431     bit = bit ^ ((lfsr & BIT22) >> 22);
   1432 
   1433     lfsr = ((lfsr >> 1) | (bit << 31));
   1434   }
   1435 
   1436   *lfsr_ptr = lfsr;
   1437   return;
   1438 }
   1439 
   1440 // The purpose of this function is to ensure the SEC comes out of reset
   1441 // and IA initiates the SEC enabling Memory Scrambling.
   1442 void enable_scrambling(
   1443     MRCParams_t *mrc_params)
   1444 {
   1445   uint32_t lfsr = 0;
   1446   uint8_t i;
   1447 
   1448   if (mrc_params->scrambling_enables == 0)
   1449     return;
   1450 
   1451   ENTERFN();
   1452 
   1453   // 32 bit seed is always stored in BIOS NVM.
   1454   lfsr = mrc_params->timings.scrambler_seed;
   1455 
   1456   if (mrc_params->boot_mode == bmCold)
   1457   {
   1458     // factory value is 0 and in first boot, a clock based seed is loaded.
   1459     if (lfsr == 0)
   1460     {
   1461       lfsr = read_tsc() & 0x0FFFFFFF; // get seed from system clock and make sure it is not all 1's
   1462     }
   1463     // need to replace scrambler
   1464     // get next 32bit LFSR 16 times which is the last part of the previous scrambler vector.
   1465     else
   1466     {
   1467       for (i = 0; i < 16; i++)
   1468       {
   1469         lfsr32(&lfsr);
   1470       }
   1471     }
   1472     mrc_params->timings.scrambler_seed = lfsr;  // save new seed.
   1473   } // if (cold_boot)
   1474 
   1475   // In warm boot or S3 exit, we have the previous seed.
   1476   // In cold boot, we have the last 32bit LFSR which is the new seed.
   1477   lfsr32(&lfsr); // shift to next value
   1478   isbW32m(MCU, SCRMSEED, (lfsr & 0x0003FFFF));
   1479   for (i = 0; i < 2; i++)
   1480   {
   1481     isbW32m(MCU, SCRMLO + i, (lfsr & 0xAAAAAAAA));
   1482   }
   1483 
   1484   LEAVEFN();
   1485   return;
   1486 }
   1487 
   1488 // This function will store relevant timing data
   1489 // This data will be used on subsequent boots to speed up boot times
   1490 // and is required for Suspend To RAM capabilities.
   1491 void store_timings(
   1492     MRCParams_t *mrc_params)
   1493 {
   1494   uint8_t ch, rk, bl;
   1495   MrcTimings_t *mt = &mrc_params->timings;
   1496 
   1497   for (ch = 0; ch < NUM_CHANNELS; ch++)
   1498   {
   1499     for (rk = 0; rk < NUM_RANKS; rk++)
   1500     {
   1501       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
   1502       {
   1503         mt->rcvn[ch][rk][bl] = get_rcvn(ch, rk, bl); // RCVN
   1504         mt->rdqs[ch][rk][bl] = get_rdqs(ch, rk, bl); // RDQS
   1505         mt->wdqs[ch][rk][bl] = get_wdqs(ch, rk, bl); // WDQS
   1506         mt->wdq[ch][rk][bl] = get_wdq(ch, rk, bl);  // WDQ
   1507         if (rk == 0)
   1508         {
   1509           mt->vref[ch][bl] = get_vref(ch, bl);  // VREF (RANK0 only)
   1510         }
   1511       }
   1512       mt->wctl[ch][rk] = get_wctl(ch, rk); // WCTL
   1513     }
   1514     mt->wcmd[ch] = get_wcmd(ch); // WCMD
   1515   }
   1516 
   1517   // need to save for a case of changing frequency after warm reset
   1518   mt->ddr_speed = mrc_params->ddr_speed;
   1519 
   1520   return;
   1521 }
   1522 
   1523 // This function will retrieve relevant timing data
   1524 // This data will be used on subsequent boots to speed up boot times
   1525 // and is required for Suspend To RAM capabilities.
   1526 void restore_timings(
   1527     MRCParams_t *mrc_params)
   1528 {
   1529   uint8_t ch, rk, bl;
   1530   const MrcTimings_t *mt = &mrc_params->timings;
   1531 
   1532   for (ch = 0; ch < NUM_CHANNELS; ch++)
   1533   {
   1534     for (rk = 0; rk < NUM_RANKS; rk++)
   1535     {
   1536       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
   1537       {
   1538         set_rcvn(ch, rk, bl, mt->rcvn[ch][rk][bl]); // RCVN
   1539         set_rdqs(ch, rk, bl, mt->rdqs[ch][rk][bl]); // RDQS
   1540         set_wdqs(ch, rk, bl, mt->wdqs[ch][rk][bl]); // WDQS
   1541         set_wdq(ch, rk, bl, mt->wdq[ch][rk][bl]);  // WDQ
   1542         if (rk == 0)
   1543         {
   1544           set_vref(ch, bl, mt->vref[ch][bl]); // VREF (RANK0 only)
   1545         }
   1546       }
   1547       set_wctl(ch, rk, mt->wctl[ch][rk]); // WCTL
   1548     }
   1549     set_wcmd(ch, mt->wcmd[ch]); // WCMD
   1550   }
   1551 
   1552   return;
   1553 }
   1554 
   1555 // Configure default settings normally set as part of read training
   1556 // Some defaults have to be set earlier as they may affect earlier
   1557 // training steps.
   1558 void default_timings(
   1559     MRCParams_t *mrc_params)
   1560 {
   1561   uint8_t ch, rk, bl;
   1562 
   1563   for (ch = 0; ch < NUM_CHANNELS; ch++)
   1564   {
   1565     for (rk = 0; rk < NUM_RANKS; rk++)
   1566     {
   1567       for (bl = 0; bl < NUM_BYTE_LANES; bl++)
   1568       {
   1569         set_rdqs(ch, rk, bl, 24); // RDQS
   1570         if (rk == 0)
   1571         {
   1572           set_vref(ch, bl, 32); // VREF (RANK0 only)
   1573         }
   1574       }
   1575     }
   1576   }
   1577 
   1578   return;
   1579 }
   1580 
   1581