summaryrefslogtreecommitdiffstats
path: root/src/northbridge/intel/nehalem/raminit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/northbridge/intel/nehalem/raminit.c')
-rw-r--r--src/northbridge/intel/nehalem/raminit.c5019
1 files changed, 5019 insertions, 0 deletions
diff --git a/src/northbridge/intel/nehalem/raminit.c b/src/northbridge/intel/nehalem/raminit.c
new file mode 100644
index 0000000..19af3bb
--- /dev/null
+++ b/src/northbridge/intel/nehalem/raminit.c
@@ -0,0 +1,5019 @@
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2013 Vladimir Serbinenko.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Please don't remove this. It's needed it to do debugging
+ and reverse engineering to support in futur more nehalem variants. */
+#ifndef REAL
+#define REAL 1
+#endif
+
+#if REAL
+#include <console/console.h>
+#include <string.h>
+#include <arch/hlt.h>
+#include <arch/io.h>
+#include <cpu/x86/msr.h>
+#include <cbmem.h>
+#include <arch/cbfs.h>
+#include <cbfs.h>
+#include <ip_checksum.h>
+#include <pc80/mc146818rtc.h>
+#include <device/pci_def.h>
+#include <arch/cpu.h>
+#include <spd.h>
+#include "raminit.h"
+#include <timestamp.h>
+#include <cpu/x86/mtrr.h>
+#include <cpu/intel/speedstep.h>
+#include <cpu/intel/turbo.h>
+#endif
+
+#if !REAL
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef u32 device_t;
+#endif
+
+#include "nehalem.h"
+
+#include "southbridge/intel/ibexpeak/me.h"
+
+#if REAL
+#include <delay.h>
+#endif
+
+#define NORTHBRIDGE PCI_DEV(0, 0, 0)
+#define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
+#define GMA PCI_DEV (0, 0x2, 0x0)
+#define HECIDEV PCI_DEV(0, 0x16, 0)
+#define HECIBAR 0x10
+
+#define FOR_ALL_RANKS \
+ for (channel = 0; channel < NUM_CHANNELS; channel++) \
+ for (slot = 0; slot < NUM_SLOTS; slot++) \
+ for (rank = 0; rank < NUM_RANKS; rank++)
+
+#define FOR_POPULATED_RANKS \
+ for (channel = 0; channel < NUM_CHANNELS; channel++) \
+ for (slot = 0; slot < NUM_SLOTS; slot++) \
+ for (rank = 0; rank < NUM_RANKS; rank++) \
+ if (info->populated_ranks[channel][slot][rank])
+
+#define FOR_POPULATED_RANKS_BACKWARDS \
+ for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
+ for (slot = 0; slot < NUM_SLOTS; slot++) \
+ for (rank = 0; rank < NUM_RANKS; rank++) \
+ if (info->populated_ranks[channel][slot][rank])
+
+/* [REG_178][CHANNEL][2 * SLOT + RANK][LANE] */
+typedef struct {
+ u8 smallest;
+ u8 largest;
+} timing_bounds_t[2][2][2][9];
+
+struct ram_training {
+ /* [TM][CHANNEL][SLOT][RANK][LANE] */
+ u16 lane_timings[4][2][2][2][9];
+ u16 reg_178;
+ u16 reg_10b;
+
+ u8 reg178_center;
+ u8 reg178_smallest;
+ u8 reg178_largest;
+ timing_bounds_t timing_bounds[2];
+ u16 timing_offset[2][2][2][9];
+ u16 timing2_offset[2][2][2][9];
+ u16 timing2_bounds[2][2][2][9][2];
+};
+
+#if !REAL
+#include "raminit_fake.c"
+#else
+
+#include <lib.h> /* Prototypes */
+
+static inline void write_mchbar32(u32 addr, u32 val)
+{
+ MCHBAR32(addr) = val;
+}
+
+static inline void write_mchbar16(u32 addr, u16 val)
+{
+ MCHBAR16(addr) = val;
+}
+
+static inline void write_mchbar8(u32 addr, u8 val)
+{
+ MCHBAR8(addr) = val;
+}
+
+
+static inline u32 read_mchbar32(u32 addr)
+{
+ return MCHBAR32(addr);
+}
+
+static inline u16 read_mchbar16(u32 addr)
+{
+ return MCHBAR16(addr);
+}
+
+static inline u8 read_mchbar8(u32 addr)
+{
+ return MCHBAR8(addr);
+}
+
+static inline u8 read_mchbar8_bypass(u32 addr)
+{
+ return MCHBAR8(addr);
+}
+
+static void clflush(u32 addr)
+{
+ asm volatile ("clflush (%0)"::"r" (addr));
+}
+
+typedef struct _u128 {
+ u64 lo;
+ u64 hi;
+} u128;
+
+static void read128(u32 addr, u64 * out)
+{
+ u128 ret;
+ u128 stor;
+ asm volatile ("movdqu %%xmm0, %0\n"
+ "movdqa (%2), %%xmm0\n"
+ "movdqu %%xmm0, %1\n"
+ "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
+ out[0] = ret.lo;
+ out[1] = ret.hi;
+}
+
+#endif
+
+/* OK */
+static void write_1d0(u32 val, u16 addr, int bits, int flag)
+{
+ write_mchbar32(0x1d0, 0);
+ while (read_mchbar32(0x1d0) & 0x800000) ;
+ write_mchbar32(0x1d4,
+ (val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
+ bits));
+ write_mchbar32(0x1d0, 0x40000000 | addr);
+ while (read_mchbar32(0x1d0) & 0x800000) ;
+}
+
+/* OK */
+static u16 read_1d0(u16 addr, int split)
+{
+ u32 val;
+ write_mchbar32(0x1d0, 0);
+ while (read_mchbar32(0x1d0) & 0x800000) ;
+ write_mchbar32(0x1d0,
+ 0x80000000 | (((read_mchbar8(0x246) >> 2) & 3) +
+ 0x361 - addr));
+ while (read_mchbar32(0x1d0) & 0x800000) ;
+ val = read_mchbar32(0x1d8);
+ write_1d0(0, 0x33d, 0, 0);
+ write_1d0(0, 0x33d, 0, 0);
+ val &= ((1 << split) - 1);
+ // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
+ return val;
+}
+
+static void sfence(void)
+{
+#if REAL
+ asm volatile ("sfence");
+#endif
+}
+
+static inline u16 get_lane_offset(int slot, int rank, int lane)
+{
+ return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
+ 0x452 * (lane == 8);
+}
+
+static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
+{
+ const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
+ return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
+}
+
+#if REAL
+static u32 gav_real(int line, u32 in)
+{
+ // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
+ return in;
+}
+
+#define gav(x) gav_real (__LINE__, (x))
+#endif
+struct raminfo {
+ u16 clock_speed_index; /* clock_speed (REAL, not DDR) / 133.(3) - 3 */
+ u16 fsb_frequency; /* in 1.(1)/2 MHz. */
+ u8 is_x16_module[2][2]; /* [CHANNEL][SLOT] */
+ u8 density[2][2]; /* [CHANNEL][SLOT] */
+ u8 populated_ranks[2][2][2]; /* [CHANNEL][SLOT][RANK] */
+ int rank_start[2][2][2];
+ u8 cas_latency;
+ u8 board_lane_delay[9];
+ u8 use_ecc;
+ u8 revision;
+ u8 max_supported_clock_speed_index;
+ u8 uma_enabled;
+ u8 spd[2][2][151]; /* [CHANNEL][SLOT][BYTE] */
+ u8 silicon_revision;
+ u8 populated_ranks_mask[2];
+ u8 max_slots_used_in_channel;
+ u8 mode4030[2];
+ u16 avg4044[2];
+ u16 max4048[2];
+ unsigned total_memory_mb;
+ unsigned interleaved_part_mb;
+ unsigned non_interleaved_part_mb;
+
+ u32 heci_bar;
+ u64 heci_uma_addr;
+ unsigned memory_reserved_for_heci_mb;
+
+ struct ram_training training;
+ u32 last_500_command[2];
+
+ u8 reg2ca9_bit0;
+ u8 reg274265[2][3]; /* [CHANNEL][REGISTER] */
+ u32 delay46_ps[2];
+ u32 delay54_ps[2];
+ u8 revision_flag_1;
+ u8 some_delay_1_cycle_floor;
+ u8 some_delay_2_halfcycles_ceil;
+ u8 some_delay_3_ps_rounded;
+
+ const struct ram_training *cached_training;
+};
+
+static void
+write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
+ int flag);
+
+/* OK */
+static u16
+read_500(struct raminfo *info, int channel, u16 addr, int split)
+{
+ u32 val;
+ info->last_500_command[channel] = 0x80000000;
+ write_mchbar32(0x500 + (channel << 10), 0);
+ while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
+ write_mchbar32(0x500 + (channel << 10),
+ 0x80000000 |
+ (((read_mchbar8(0x246 + (channel << 10)) >> 2) &
+ 3) + 0xb88 - addr));
+ while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
+ val = read_mchbar32(0x508 + (channel << 10));
+ return val & ((1 << split) - 1);
+}
+
+/* OK */
+static void
+write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
+ int flag)
+{
+ if (info->last_500_command[channel] == 0x80000000) {
+ info->last_500_command[channel] = 0x40000000;
+ write_500(info, channel, 0, 0xb61, 0, 0);
+ }
+ write_mchbar32(0x500 + (channel << 10), 0);
+ while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
+ write_mchbar32(0x504 + (channel << 10),
+ (val & ((1 << bits) - 1)) | (2 << bits) | (flag <<
+ bits));
+ write_mchbar32(0x500 + (channel << 10), 0x40000000 | addr);
+ while (read_mchbar32(0x500 + (channel << 10)) & 0x800000) ;
+}
+
+static int rw_test(int rank)
+{
+ const u32 mask = 0xf00fc33c;
+ int ok = 0xff;
+ int i;
+ for (i = 0; i < 64; i++)
+ write32((rank << 28) | (i << 2), 0);
+ sfence();
+ for (i = 0; i < 64; i++)
+ gav(read32((rank << 28) | (i << 2)));
+ sfence();
+ for (i = 0; i < 32; i++) {
+ u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
+ write32((rank << 28) | (i << 3), pat);
+ write32((rank << 28) | (i << 3) | 4, pat);
+ }
+ sfence();
+ for (i = 0; i < 32; i++) {
+ u8 pat = (((mask >> i) & 1) ? 0xff : 0);
+ int j;
+ u32 val;
+ gav(val = read32((rank << 28) | (i << 3)));
+ for (j = 0; j < 4; j++)
+ if (((val >> (j * 8)) & 0xff) != pat)
+ ok &= ~(1 << j);
+ gav(val = read32((rank << 28) | (i << 3) | 4));
+ for (j = 0; j < 4; j++)
+ if (((val >> (j * 8)) & 0xff) != pat)
+ ok &= ~(16 << j);
+ }
+ sfence();
+ for (i = 0; i < 64; i++)
+ write32((rank << 28) | (i << 2), 0);
+ sfence();
+ for (i = 0; i < 64; i++)
+ gav(read32((rank << 28) | (i << 2)));
+
+ return ok;
+}
+
+static void
+program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
+{
+ int lane;
+ for (lane = 0; lane < 8; lane++) {
+ write_500(info, channel,
+ base +
+ info->training.
+ lane_timings[2][channel][slot][rank][lane],
+ get_timing_register_addr(lane, 2, slot, rank), 9, 0);
+ write_500(info, channel,
+ base +
+ info->training.
+ lane_timings[3][channel][slot][rank][lane],
+ get_timing_register_addr(lane, 3, slot, rank), 9, 0);
+ }
+}
+
+static void write_26c(int channel, u16 si)
+{
+ write_mchbar32(0x26c + (channel << 10), 0x03243f35);
+ write_mchbar32(0x268 + (channel << 10), 0xcfc00000 | (si << 9));
+ write_mchbar16(0x2b9 + (channel << 10), si);
+}
+
+static u32 get_580(int channel, u8 addr)
+{
+ u32 ret;
+ gav(read_1d0(0x142, 3));
+ write_mchbar8(0x5ff, 0x0); /* OK */
+ write_mchbar8(0x5ff, 0x80); /* OK */
+ write_mchbar32(0x580 + (channel << 10), 0x8493c012 | addr);
+ write_mchbar8(0x580 + (channel << 10),
+ read_mchbar8(0x580 + (channel << 10)) | 1);
+ while (!((ret = read_mchbar32(0x580 + (channel << 10))) & 0x10000)) ;
+ write_mchbar8(0x580 + (channel << 10),
+ read_mchbar8(0x580 + (channel << 10)) & ~1);
+ return ret;
+}
+
+const int cached_config = 0;
+
+#define NUM_CHANNELS 2
+#define NUM_SLOTS 2
+#define NUM_RANKS 2
+#define RANK_SHIFT 28
+#define CHANNEL_SHIFT 10
+
+#include "raminit_tables.c"
+
+static void seq9(struct raminfo *info, int channel, int slot, int rank)
+{
+ int i, lane;
+
+ for (i = 0; i < 2; i++)
+ for (lane = 0; lane < 8; lane++)
+ write_500(info, channel,
+ info->training.lane_timings[i +
+ 1][channel][slot]
+ [rank][lane], get_timing_register_addr(lane,
+ i + 1,
+ slot,
+ rank),
+ 9, 0);
+
+ write_1d0(1, 0x103, 6, 1);
+ for (lane = 0; lane < 8; lane++)
+ write_500(info, channel,
+ info->training.
+ lane_timings[0][channel][slot][rank][lane],
+ get_timing_register_addr(lane, 0, slot, rank), 9, 0);
+
+ for (i = 0; i < 2; i++) {
+ for (lane = 0; lane < 8; lane++)
+ write_500(info, channel,
+ info->training.lane_timings[i +
+ 1][channel][slot]
+ [rank][lane], get_timing_register_addr(lane,
+ i + 1,
+ slot,
+ rank),
+ 9, 0);
+ gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
+ }
+
+ gav(read_1d0(0x142, 3)); // = 0x10408118
+ write_mchbar8(0x5ff, 0x0); /* OK */
+ write_mchbar8(0x5ff, 0x80); /* OK */
+ write_1d0(0x2, 0x142, 3, 1);
+ for (lane = 0; lane < 8; lane++) {
+ // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
+ info->training.lane_timings[2][channel][slot][rank][lane] =
+ read_500(info, channel,
+ get_timing_register_addr(lane, 2, slot, rank), 9);
+ //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
+ info->training.lane_timings[3][channel][slot][rank][lane] =
+ info->training.lane_timings[2][channel][slot][rank][lane] +
+ 0x20;
+ }
+}
+
+static int count_ranks_in_channel(struct raminfo *info, int channel)
+{
+ int slot, rank;
+ int res = 0;
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_SLOTS; rank++)
+ res += info->populated_ranks[channel][slot][rank];
+ return res;
+}
+
+static void
+config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
+{
+ int add;
+
+ write_1d0(0, 0x178, 7, 1);
+ seq9(info, channel, slot, rank);
+ program_timings(info, 0x80, channel, slot, rank);
+
+ if (channel == 0)
+ add = count_ranks_in_channel(info, 1);
+ else
+ add = 0;
+ if (!s3resume)
+ gav(rw_test(rank + add));
+ program_timings(info, 0x00, channel, slot, rank);
+ if (!s3resume)
+ gav(rw_test(rank + add));
+ if (!s3resume)
+ gav(rw_test(rank + add));
+ write_1d0(0, 0x142, 3, 1);
+ write_1d0(0, 0x103, 6, 1);
+
+ gav(get_580(channel, 0xc | (rank << 5)));
+ gav(read_1d0(0x142, 3));
+
+ write_mchbar8(0x5ff, 0x0); /* OK */
+ write_mchbar8(0x5ff, 0x80); /* OK */
+}
+
+static void set_4cf(struct raminfo *info, int channel, u8 val)
+{
+ gav(read_500(info, channel, 0x4cf, 4)); // = 0xc2300cf9
+ write_500(info, channel, val, 0x4cf, 4, 1);
+ gav(read_500(info, channel, 0x659, 4)); // = 0x80300839
+ write_500(info, channel, val, 0x659, 4, 1);
+ gav(read_500(info, channel, 0x697, 4)); // = 0x80300839
+ write_500(info, channel, val, 0x697, 4, 1);
+}
+
+static void set_334(int zero)
+{
+ int j, k, channel;
+ const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
+ u32 vd8[2][16];
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ for (j = 0; j < 4; j++) {
+ u32 a = (j == 1) ? 0x29292929 : 0x31313131;
+ u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
+ u16 c;
+ if ((j == 0 || j == 3) && zero)
+ c = 0;
+ else if (j == 3)
+ c = 0x5f;
+ else
+ c = 0x5f5f;
+
+ for (k = 0; k < 2; k++) {
+ write_mchbar32(0x138 + 8 * k,
+ (channel << 26) | (j << 24));
+ gav(vd8[1][(channel << 3) | (j << 1) | k] =
+ read_mchbar32(0x138 + 8 * k));
+ gav(vd8[0][(channel << 3) | (j << 1) | k] =
+ read_mchbar32(0x13c + 8 * k));
+ }
+
+ write_mchbar32(0x334 + (channel << 10) + (j * 0x44),
+ zero ? 0 : val3[j]);
+ write_mchbar32(0x32c + (channel << 10) + (j * 0x44),
+ zero ? 0 : (0x18191819 & lmask));
+ write_mchbar16(0x34a + (channel << 10) + (j * 0x44), c);
+ write_mchbar32(0x33c + (channel << 10) + (j * 0x44),
+ zero ? 0 : (a & lmask));
+ write_mchbar32(0x344 + (channel << 10) + (j * 0x44),
+ zero ? 0 : (a & lmask));
+ }
+ }
+
+ write_mchbar32(0x130, read_mchbar32(0x130) | 1); /* OK */
+ while (read_mchbar8(0x130) & 1) ; /* OK */
+}
+
+static void rmw_1d0(u16 addr, u32 and, u32 or, int split, int flag)
+{
+ u32 v;
+ v = read_1d0(addr, split);
+ write_1d0((v & and) | or, addr, split, flag);
+}
+
+static int find_highest_bit_set(u16 val)
+{
+ int i;
+ for (i = 15; i >= 0; i--)
+ if (val & (1 << i))
+ return i;
+ return -1;
+}
+
+static int find_lowest_bit_set32(u32 val)
+{
+ int i;
+ for (i = 0; i < 32; i++)
+ if (val & (1 << i))
+ return i;
+ return -1;
+}
+
+#define max(a,b) (((a) > (b)) ? (a) : (b))
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+
+enum {
+ DEVICE_TYPE = 2,
+ MODULE_TYPE = 3,
+ DENSITY = 4,
+ RANKS_AND_DQ = 7,
+ MEMORY_BUS_WIDTH = 8,
+ TIMEBASE_DIVIDEND = 10,
+ TIMEBASE_DIVISOR = 11,
+ CYCLETIME = 12,
+
+ CAS_LATENCIES_LSB = 14,
+ CAS_LATENCIES_MSB = 15,
+ CAS_LATENCY_TIME = 16,
+ THERMAL_AND_REFRESH = 31,
+ REFERENCE_RAW_CARD_USED = 62,
+ RANK1_ADDRESS_MAPPING = 63
+};
+
+static void calculate_timings(struct raminfo *info)
+{
+ unsigned cycletime;
+ unsigned cas_latency_time;
+ unsigned supported_cas_latencies;
+ unsigned channel, slot;
+ unsigned clock_speed_index;
+ unsigned min_cas_latency;
+ unsigned cas_latency;
+ unsigned max_clock_index;
+
+ /* Find common CAS latency */
+ supported_cas_latencies = 0x3fe;
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ if (info->populated_ranks[channel][slot][0])
+ supported_cas_latencies &=
+ 2 *
+ (info->
+ spd[channel][slot][CAS_LATENCIES_LSB] |
+ (info->
+ spd[channel][slot][CAS_LATENCIES_MSB] <<
+ 8));
+
+ max_clock_index = min(3, info->max_supported_clock_speed_index);
+
+ cycletime = min_cycletime[max_clock_index];
+ cas_latency_time = min_cas_latency_time[max_clock_index];
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ if (info->populated_ranks[channel][slot][0]) {
+ unsigned timebase;
+ timebase =
+ 1000 *
+ info->
+ spd[channel][slot][TIMEBASE_DIVIDEND] /
+ info->spd[channel][slot][TIMEBASE_DIVISOR];
+ cycletime =
+ max(cycletime,
+ timebase *
+ info->spd[channel][slot][CYCLETIME]);
+ cas_latency_time =
+ max(cas_latency_time,
+ timebase *
+ info->
+ spd[channel][slot][CAS_LATENCY_TIME]);
+ }
+ for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
+ if (cycletime == min_cycletime[clock_speed_index])
+ break;
+ if (cycletime > min_cycletime[clock_speed_index]) {
+ clock_speed_index--;
+ cycletime = min_cycletime[clock_speed_index];
+ break;
+ }
+ }
+ min_cas_latency = (cas_latency_time + cycletime - 1) / cycletime;
+ cas_latency = 0;
+ while (supported_cas_latencies) {
+ cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
+ if (cas_latency <= min_cas_latency)
+ break;
+ supported_cas_latencies &=
+ ~(1 << find_highest_bit_set(supported_cas_latencies));
+ }
+
+ if (cas_latency != min_cas_latency && clock_speed_index)
+ clock_speed_index--;
+
+ if (cas_latency * min_cycletime[clock_speed_index] > 20000)
+ die("Couldn't configure DRAM");
+ info->clock_speed_index = clock_speed_index;
+ info->cas_latency = cas_latency;
+}
+
+static void program_base_timings(struct raminfo *info)
+{
+ unsigned channel;
+ unsigned slot, rank, lane;
+ unsigned extended_silicon_revision;
+ int i;
+
+ extended_silicon_revision = info->silicon_revision;
+ if (info->silicon_revision == 0)
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ if ((info->
+ spd[channel][slot][MODULE_TYPE] & 0xF) ==
+ 3)
+ extended_silicon_revision = 4;
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_SLOTS; rank++) {
+ int card_timing_2;
+ if (!info->populated_ranks[channel][slot][rank])
+ continue;
+
+ for (lane = 0; lane < 9; lane++) {
+ int tm_reg;
+ int card_timing;
+
+ card_timing = 0;
+ if ((info->
+ spd[channel][slot][MODULE_TYPE] &
+ 0xF) == 3) {
+ int reference_card;
+ reference_card =
+ info->
+ spd[channel][slot]
+ [REFERENCE_RAW_CARD_USED] &
+ 0x1f;
+ if (reference_card == 3)
+ card_timing =
+ u16_ffd1188[0][lane]
+ [info->
+ clock_speed_index];
+ if (reference_card == 5)
+ card_timing =
+ u16_ffd1188[1][lane]
+ [info->
+ clock_speed_index];
+ }
+
+ info->training.
+ lane_timings[0][channel][slot][rank]
+ [lane] =
+ u8_FFFD1218[info->
+ clock_speed_index];
+ info->training.
+ lane_timings[1][channel][slot][rank]
+ [lane] = 256;
+
+ for (tm_reg = 2; tm_reg < 4; tm_reg++)
+ info->training.
+ lane_timings[tm_reg]
+ [channel][slot][rank][lane]
+ =
+ u8_FFFD1240[channel]
+ [extended_silicon_revision]
+ [lane][2 * slot +
+ rank][info->
+ clock_speed_index]
+ + info->max4048[channel]
+ +
+ u8_FFFD0C78[channel]
+ [extended_silicon_revision]
+ [info->
+ mode4030[channel]][slot]
+ [rank][info->
+ clock_speed_index]
+ + card_timing;
+ for (tm_reg = 0; tm_reg < 4; tm_reg++)
+ write_500(info, channel,
+ info->training.
+ lane_timings[tm_reg]
+ [channel][slot][rank]
+ [lane],
+ get_timing_register_addr
+ (lane, tm_reg, slot,
+ rank), 9, 0);
+ }
+
+ card_timing_2 = 0;
+ if (!(extended_silicon_revision != 4
+ || (info->
+ populated_ranks_mask[channel] & 5) ==
+ 5)) {
+ if ((info->
+ spd[channel][slot]
+ [REFERENCE_RAW_CARD_USED] & 0x1F)
+ == 3)
+ card_timing_2 =
+ u16_FFFE0EB8[0][info->
+ clock_speed_index];
+ if ((info->
+ spd[channel][slot]
+ [REFERENCE_RAW_CARD_USED] & 0x1F)
+ == 5)
+ card_timing_2 =
+ u16_FFFE0EB8[1][info->
+ clock_speed_index];
+ }
+
+ for (i = 0; i < 3; i++)
+ write_500(info, channel,
+ (card_timing_2 +
+ info->max4048[channel]
+ +
+ u8_FFFD0EF8[channel]
+ [extended_silicon_revision]
+ [info->
+ mode4030[channel]][info->
+ clock_speed_index]),
+ u16_fffd0c50[i][slot][rank],
+ 8, 1);
+ write_500(info, channel,
+ (info->max4048[channel] +
+ u8_FFFD0C78[channel]
+ [extended_silicon_revision][info->
+ mode4030
+ [channel]]
+ [slot][rank][info->
+ clock_speed_index]),
+ u16_fffd0c70[slot][rank], 7, 1);
+ }
+ if (!info->populated_ranks_mask[channel])
+ continue;
+ for (i = 0; i < 3; i++)
+ write_500(info, channel,
+ (info->max4048[channel] +
+ info->avg4044[channel]
+ +
+ u8_FFFD17E0[channel]
+ [extended_silicon_revision][info->
+ mode4030
+ [channel]][info->
+ clock_speed_index]),
+ u16_fffd0c68[i], 8, 1);
+ }
+}
+
+static unsigned int fsbcycle_ps(struct raminfo *info)
+{
+ return 900000 / info->fsb_frequency;
+}
+
+/* The time of DDR transfer in ps. */
+static unsigned int halfcycle_ps(struct raminfo *info)
+{
+ return 3750 / (info->clock_speed_index + 3);
+}
+
+/* The time of clock cycle in ps. */
+static unsigned int cycle_ps(struct raminfo *info)
+{
+ return 2 * halfcycle_ps(info);
+}
+
+/* Frequency in 1.(1)=10/9 MHz units. */
+static unsigned frequency_11(struct raminfo *info)
+{
+ return (info->clock_speed_index + 3) * 120;
+}
+
+/* Frequency in 0.1 MHz units. */
+static unsigned frequency_01(struct raminfo *info)
+{
+ return 100 * frequency_11(info) / 9;
+}
+
+static unsigned ps_to_halfcycles(struct raminfo *info, unsigned int ps)
+{
+ return (frequency_11(info) * 2) * ps / 900000;
+}
+
+static unsigned ns_to_cycles(struct raminfo *info, unsigned int ns)
+{
+ return (frequency_11(info)) * ns / 900;
+}
+
+static void compute_derived_timings(struct raminfo *info)
+{
+ unsigned channel, slot, rank;
+ int extended_silicon_revision;
+ int some_delay_1_ps;
+ int some_delay_2_ps;
+ int some_delay_2_halfcycles_ceil;
+ int some_delay_2_halfcycles_floor;
+ int some_delay_3_ps;
+ int some_delay_3_halfcycles;
+ int some_delay_3_ps_rounded;
+ int some_delay_1_cycle_ceil;
+ int some_delay_1_cycle_floor;
+
+ some_delay_3_halfcycles = 0;
+ some_delay_3_ps_rounded = 0;
+ extended_silicon_revision = info->silicon_revision;
+ if (!info->silicon_revision)
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ if ((info->
+ spd[channel][slot][MODULE_TYPE] & 0xF) ==
+ 3)
+ extended_silicon_revision = 4;
+ if (info->board_lane_delay[7] < 5)
+ info->board_lane_delay[7] = 5;
+ info->revision_flag_1 = 2;
+ if (info->silicon_revision == 2 || info->silicon_revision == 3)
+ info->revision_flag_1 = 0;
+ if (info->revision < 16)
+ info->revision_flag_1 = 0;
+
+ if (info->revision < 8)
+ info->revision_flag_1 = 0;
+ if (info->revision >= 8 && (info->silicon_revision == 0
+ || info->silicon_revision == 1))
+ some_delay_2_ps = 735;
+ else
+ some_delay_2_ps = 750;
+
+ if (info->revision >= 0x10 && (info->silicon_revision == 0
+ || info->silicon_revision == 1))
+ some_delay_1_ps = 3929;
+ else
+ some_delay_1_ps = 3490;
+
+ some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
+ some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
+ if (some_delay_1_ps % cycle_ps(info))
+ some_delay_1_cycle_ceil++;
+ else
+ some_delay_1_cycle_floor--;
+ info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
+ if (info->revision_flag_1)
+ some_delay_2_ps = halfcycle_ps(info) >> 6;
+ some_delay_2_ps +=
+ max(some_delay_1_ps - 30,
+ 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
+ 375;
+ some_delay_3_ps =
+ halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
+ if (info->revision_flag_1) {
+ if (some_delay_3_ps < 150)
+ some_delay_3_halfcycles = 0;
+ else
+ some_delay_3_halfcycles =
+ (some_delay_3_ps << 6) / halfcycle_ps(info);
+ some_delay_3_ps_rounded =
+ halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
+ }
+ some_delay_2_halfcycles_ceil =
+ (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
+ 2 * (some_delay_1_cycle_ceil - 1);
+ if (info->revision_flag_1 && some_delay_3_ps < 150)
+ some_delay_2_halfcycles_ceil++;
+ some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
+ if (info->revision < 0x10)
+ some_delay_2_halfcycles_floor =
+ some_delay_2_halfcycles_ceil - 1;
+ if (!info->revision_flag_1)
+ some_delay_2_halfcycles_floor++;
+ info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
+ info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
+ if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
+ || (info->populated_ranks[1][0][0]
+ && info->populated_ranks[1][1][0]))
+ info->max_slots_used_in_channel = 2;
+ else
+ info->max_slots_used_in_channel = 1;
+ for (channel = 0; channel < 2; channel++)
+ write_mchbar32(0x244 + (channel << 10),
+ ((info->revision < 8) ? 1 : 0x200)
+ | ((2 - info->max_slots_used_in_channel) << 17) |
+ (channel << 21) | (info->
+ some_delay_1_cycle_floor <<
+ 18) | 0x9510);
+ if (info->max_slots_used_in_channel == 1) {
+ info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
+ info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
+ } else {
+ info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
+ info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
+ || (count_ranks_in_channel(info, 1) ==
+ 2)) ? 2 : 3;
+ }
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ int max_of_unk;
+ int min_of_unk_2;
+
+ int i, count;
+ int sum;
+
+ if (!info->populated_ranks_mask[channel])
+ continue;
+
+ max_of_unk = 0;
+ min_of_unk_2 = 32767;
+
+ sum = 0;
+ count = 0;
+ for (i = 0; i < 3; i++) {
+ int unk1;
+ if (info->revision < 8)
+ unk1 =
+ u8_FFFD1891[0][channel][info->
+ clock_speed_index]
+ [i];
+ else if (!
+ (info->revision >= 0x10
+ || info->revision_flag_1))
+ unk1 =
+ u8_FFFD1891[1][channel][info->
+ clock_speed_index]
+ [i];
+ else
+ unk1 = 0;
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++) {
+ int a = 0;
+ int b = 0;
+
+ if (!info->
+ populated_ranks[channel][slot]
+ [rank])
+ continue;
+ if (extended_silicon_revision == 4
+ && (info->
+ populated_ranks_mask[channel] &
+ 5) != 5) {
+ if ((info->
+ spd[channel][slot]
+ [REFERENCE_RAW_CARD_USED] &
+ 0x1F) == 3) {
+ a = u16_ffd1178[0]
+ [info->
+ clock_speed_index];
+ b = u16_fe0eb8[0][info->
+ clock_speed_index];
+ } else
+ if ((info->
+ spd[channel][slot]
+ [REFERENCE_RAW_CARD_USED]
+ & 0x1F) == 5) {
+ a = u16_ffd1178[1]
+ [info->
+ clock_speed_index];
+ b = u16_fe0eb8[1][info->
+ clock_speed_index];
+ }
+ }
+ min_of_unk_2 = min(min_of_unk_2, a);
+ min_of_unk_2 = min(min_of_unk_2, b);
+ if (rank == 0) {
+ sum += a;
+ count++;
+ }
+ {
+ int t;
+ t = b +
+ u8_FFFD0EF8[channel]
+ [extended_silicon_revision]
+ [info->
+ mode4030[channel]][info->
+ clock_speed_index];
+ if (unk1 >= t)
+ max_of_unk =
+ max(max_of_unk,
+ unk1 - t);
+ }
+ }
+ {
+ int t =
+ u8_FFFD17E0[channel]
+ [extended_silicon_revision][info->
+ mode4030
+ [channel]]
+ [info->clock_speed_index] + min_of_unk_2;
+ if (unk1 >= t)
+ max_of_unk = max(max_of_unk, unk1 - t);
+ }
+ }
+
+ info->avg4044[channel] = sum / count;
+ info->max4048[channel] = max_of_unk;
+ }
+}
+
+static void jedec_read(struct raminfo *info,
+ int channel, int slot, int rank,
+ int total_rank, u8 addr3, unsigned int value)
+{
+ /* Handle mirrored mapping. */
+ if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
+ addr3 =
+ (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) &
+ 0x10);
+ write_mchbar8(0x271, addr3 | (read_mchbar8(0x271) & 0xC1));
+ write_mchbar8(0x671, addr3 | (read_mchbar8(0x671) & 0xC1));
+
+ /* Handle mirrored mapping. */
+ if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1))
+ value =
+ (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8)
+ << 1);
+
+ read32((value << 3) | (total_rank << 28));
+
+ write_mchbar8(0x271, (read_mchbar8(0x271) & 0xC3) | 2);
+ write_mchbar8(0x671, (read_mchbar8(0x671) & 0xC3) | 2);
+
+ read32(total_rank << 28);
+}
+
+enum {
+ MR1_RZQ12 = 512,
+ MR1_RZQ2 = 64,
+ MR1_RZQ4 = 4,
+ MR1_ODS34OHM = 2
+};
+
+enum {
+ MR0_BT_INTERLEAVED = 8,
+ MR0_DLL_RESET_ON = 256
+};
+
+enum {
+ MR2_RTT_WR_DISABLED = 0,
+ MR2_RZQ2 = 1 << 10
+};
+
+static void jedec_init(struct raminfo *info)
+{
+ int write_recovery;
+ int channel, slot, rank;
+ int total_rank;
+ int dll_on;
+ int self_refresh_temperature;
+ int auto_self_refresh;
+
+ auto_self_refresh = 1;
+ self_refresh_temperature = 1;
+ if (info->board_lane_delay[3] <= 10) {
+ if (info->board_lane_delay[3] <= 8)
+ write_recovery = info->board_lane_delay[3] - 4;
+ else
+ write_recovery = 5;
+ } else {
+ write_recovery = 6;
+ }
+ FOR_POPULATED_RANKS {
+ auto_self_refresh &=
+ (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
+ self_refresh_temperature &=
+ info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
+ }
+ if (auto_self_refresh == 1)
+ self_refresh_temperature = 0;
+
+ dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
+ || (info->populated_ranks[0][0][0]
+ && info->populated_ranks[0][1][0])
+ || (info->populated_ranks[1][0][0]
+ && info->populated_ranks[1][1][0]));
+
+ total_rank = 0;
+
+ for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
+ int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
+ int rzq_reg58e;
+
+ if (info->silicon_revision == 2 || info->silicon_revision == 3) {
+ rzq_reg58e = 64;
+ rtt = MR1_RZQ2;
+ if (info->clock_speed_index != 0) {
+ rzq_reg58e = 4;
+ if (info->populated_ranks_mask[channel] == 3)
+ rtt = MR1_RZQ4;
+ }
+ } else {
+ if ((info->populated_ranks_mask[channel] & 5) == 5) {
+ rtt = MR1_RZQ12;
+ rzq_reg58e = 64;
+ rtt_wr = MR2_RZQ2;
+ } else {
+ rzq_reg58e = 4;
+ rtt = MR1_RZQ4;
+ }
+ }
+
+ write_mchbar16(0x588 + (channel << 10), 0x0);
+ write_mchbar16(0x58a + (channel << 10), 0x4);
+ write_mchbar16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
+ write_mchbar16(0x58e + (channel << 10), rzq_reg58e | 0x82);
+ write_mchbar16(0x590 + (channel << 10), 0x1282);
+
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ if (info->populated_ranks[channel][slot][rank]) {
+ jedec_read(info, channel, slot, rank,
+ total_rank, 0x28,
+ rtt_wr | (info->
+ clock_speed_index
+ << 3)
+ | (auto_self_refresh << 6) |
+ (self_refresh_temperature <<
+ 7));
+ jedec_read(info, channel, slot, rank,
+ total_rank, 0x38, 0);
+ jedec_read(info, channel, slot, rank,
+ total_rank, 0x18,
+ rtt | MR1_ODS34OHM);
+ jedec_read(info, channel, slot, rank,
+ total_rank, 6,
+ (dll_on << 12) |
+ (write_recovery << 9)
+ | ((info->cas_latency - 4) <<
+ 4) | MR0_BT_INTERLEAVED |
+ MR0_DLL_RESET_ON);
+ total_rank++;
+ }
+ }
+}
+
+static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
+{
+ unsigned channel, slot, rank;
+ unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */
+ unsigned int channel_0_non_interleaved;
+
+ FOR_ALL_RANKS {
+ if (info->populated_ranks[channel][slot][rank]) {
+ total_mb[channel] +=
+ pre_jedec ? 256 : (256 << info->
+ density[channel][slot] >> info->
+ is_x16_module[channel][slot]);
+ write_mchbar8(0x208 + rank + 2 * slot + (channel << 10),
+ (pre_jedec ? (1 | ((1 + 1) << 1))
+ : (info->
+ is_x16_module[channel][slot] |
+ ((info->density[channel][slot] +
+ 1) << 1))) | 0x80);
+ }
+ write_mchbar16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
+ total_mb[channel] >> 6);
+ }
+
+ info->total_memory_mb = total_mb[0] + total_mb[1];
+
+ info->interleaved_part_mb =
+ pre_jedec ? 0 : 2 * min(total_mb[0], total_mb[1]);
+ info->non_interleaved_part_mb =
+ total_mb[0] + total_mb[1] - info->interleaved_part_mb;
+ channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
+ write_mchbar32(0x100,
+ channel_0_non_interleaved | (info->
+ non_interleaved_part_mb <<
+ 16));
+ if (!pre_jedec)
+ write_mchbar16(0x104, info->interleaved_part_mb);
+}
+
+static void program_board_delay(struct raminfo *info)
+{
+ int cas_latency_shift;
+ int some_delay_ns;
+ int some_delay_3_half_cycles;
+
+ unsigned channel, i;
+ int high_multiplier;
+ int lane_3_delay;
+ int cas_latency_derived;
+
+ high_multiplier = 0;
+ some_delay_ns = 200;
+ some_delay_3_half_cycles = 4;
+ cas_latency_shift = info->silicon_revision == 0
+ || info->silicon_revision == 1 ? 1 : 0;
+ if (info->revision < 8) {
+ some_delay_ns = 600;
+ cas_latency_shift = 0;
+ }
+ {
+ int speed_bit;
+ speed_bit =
+ ((info->clock_speed_index > 1
+ || (info->silicon_revision != 2
+ && info->silicon_revision != 3))) ^ (info->revision >=
+ 0x10);
+ write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
+ 3, 1);
+ write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
+ 3, 1);
+ if (info->revision >= 0x10 && info->clock_speed_index <= 1
+ && (info->silicon_revision == 2
+ || info->silicon_revision == 3))
+ rmw_1d0(0x116, 5, 2, 4, 1);
+ }
+ write_mchbar32(0x120,
+ (1 << (info->max_slots_used_in_channel + 28)) |
+ 0x188e7f9f);
+
+ write_mchbar8(0x124,
+ info->board_lane_delay[4] +
+ ((frequency_01(info) + 999) / 1000));
+ write_mchbar16(0x125, 0x1360);
+ write_mchbar8(0x127, 0x40);
+ if (info->fsb_frequency < frequency_11(info) / 2) {
+ unsigned some_delay_2_half_cycles;
+ high_multiplier = 1;
+ some_delay_2_half_cycles = ps_to_halfcycles(info,
+ ((3 *
+ fsbcycle_ps(info))
+ >> 1) +
+ (halfcycle_ps(info)
+ *
+ reg178_min[info->
+ clock_speed_index]
+ >> 6)
+ +
+ 4 *
+ halfcycle_ps(info)
+ + 2230);
+ some_delay_3_half_cycles =
+ min((some_delay_2_half_cycles +
+ (frequency_11(info) * 2) * (28 -
+ some_delay_2_half_cycles) /
+ (frequency_11(info) * 2 -
+ 4 * (info->fsb_frequency))) >> 3, 7);
+ }
+ if (read_mchbar8(0x2ca9) & 1)
+ some_delay_3_half_cycles = 3;
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar32(0x220 + (channel << 10),
+ read_mchbar32(0x220 +
+ (channel << 10)) | 0x18001117);
+ write_mchbar32(0x224 + (channel << 10),
+ (info->max_slots_used_in_channel - 1)
+ |
+ ((info->cas_latency - 5 -
+ info->clock_speed_index) << 21)
+ |
+ ((info->max_slots_used_in_channel +
+ info->cas_latency - cas_latency_shift -
+ 4) << 16)
+ | ((info->cas_latency - cas_latency_shift - 4) <<
+ 26)
+ |
+ ((info->cas_latency - info->clock_speed_index +
+ info->max_slots_used_in_channel - 6) << 8));
+ write_mchbar32(0x228 + (channel << 10),
+ info->max_slots_used_in_channel);
+ write_mchbar8(0x239 + (channel << 10), 32);
+ write_mchbar32(0x248 + (channel << 10),
+ (high_multiplier << 24) |
+ (some_delay_3_half_cycles << 25) | 0x840000);
+ write_mchbar32(0x278 + (channel << 10), 0xc362042);
+ write_mchbar32(0x27c + (channel << 10), 0x8b000062);
+ write_mchbar32(0x24c + (channel << 10),
+ ((! !info->
+ clock_speed_index) << 17) | (((2 +
+ info->
+ clock_speed_index
+ -
+ (! !info->
+ clock_speed_index)))
+ << 12) | 0x10200);
+
+ write_mchbar8(0x267 + (channel << 10), 0x4);
+ write_mchbar16(0x272 + (channel << 10), 0x155);
+ write_mchbar32(0x2bc + (channel << 10),
+ (read_mchbar32(0x2bc + (channel << 10)) &
+ 0xFF000000)
+ | 0x707070);
+
+ write_500(info, channel,
+ ((!info->populated_ranks[channel][1][1])
+ | (!info->populated_ranks[channel][1][0] << 1)
+ | (!info->populated_ranks[channel][0][1] << 2)
+ | (!info->populated_ranks[channel][0][0] << 3)),
+ 0x4c9, 4, 1);
+ }
+
+ write_mchbar8(0x2c4, ((1 + (info->clock_speed_index != 0)) << 6) | 0xC);
+ {
+ u8 freq_divisor = 2;
+ if (info->fsb_frequency == frequency_11(info))
+ freq_divisor = 3;
+ else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
+ freq_divisor = 1;
+ else
+ freq_divisor = 2;
+ write_mchbar32(0x2c0, (freq_divisor << 11) | 0x6009c400);
+ }
+
+ if (info->board_lane_delay[3] <= 10) {
+ if (info->board_lane_delay[3] <= 8)
+ lane_3_delay = info->board_lane_delay[3];
+ else
+ lane_3_delay = 10;
+ } else {
+ lane_3_delay = 12;
+ }
+ cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
+ if (info->clock_speed_index > 1)
+ cas_latency_derived++;
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar32(0x240 + (channel << 10),
+ ((info->clock_speed_index ==
+ 0) * 0x11000) | 0x1002100 | ((2 +
+ info->
+ clock_speed_index)
+ << 4) | (info->
+ cas_latency
+ - 3));
+ write_500(info, channel, (info->clock_speed_index << 1) | 1,
+ 0x609, 6, 1);
+ write_500(info, channel,
+ info->clock_speed_index + 2 * info->cas_latency - 7,
+ 0x601, 6, 1);
+
+ write_mchbar32(0x250 + (channel << 10),
+ ((lane_3_delay + info->clock_speed_index +
+ 9) << 6)
+ | (info->board_lane_delay[7] << 2) | (info->
+ board_lane_delay
+ [4] << 16)
+ | (info->board_lane_delay[1] << 25) | (info->
+ board_lane_delay
+ [1] << 29)
+ | 1);
+ write_mchbar32(0x254 + (channel << 10),
+ (info->
+ board_lane_delay[1] >> 3) | ((info->
+ board_lane_delay
+ [8] +
+ 4 *
+ info->
+ use_ecc) << 6) |
+ 0x80 | (info->board_lane_delay[6] << 1) | (info->
+ board_lane_delay
+ [2] <<
+ 28) |
+ (cas_latency_derived << 16) | 0x4700000);
+ write_mchbar32(0x258 + (channel << 10),
+ ((info->board_lane_delay[5] +
+ info->clock_speed_index +
+ 9) << 12) | ((info->clock_speed_index -
+ info->cas_latency + 12) << 8)
+ | (info->board_lane_delay[2] << 17) | (info->
+ board_lane_delay
+ [4] << 24)
+ | 0x47);
+ write_mchbar32(0x25c + (channel << 10),
+ (info->board_lane_delay[1] << 1) | (info->
+ board_lane_delay
+ [0] << 8) |
+ 0x1da50000);
+ write_mchbar8(0x264 + (channel << 10), 0xff);
+ write_mchbar8(0x5f8 + (channel << 10),
+ (cas_latency_shift << 3) | info->use_ecc);
+ }
+
+ program_modules_memory_map(info, 1);
+
+ write_mchbar16(0x610,
+ (min(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9)
+ | (read_mchbar16(0x610) & 0x1C3) | 0x3C);
+ write_mchbar16(0x612, read_mchbar16(0x612) | 0x100);
+ write_mchbar16(0x214, read_mchbar16(0x214) | 0x3E00);
+ for (i = 0; i < 8; i++) {
+ pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0x80 + 4 * i,
+ (info->total_memory_mb - 64) | !i | 2);
+ pcie_write_config32(PCI_DEV (QUICKPATH_BUS, 0, 1), 0xc0 + 4 * i, 0);
+ }
+}
+
+#define BETTER_MEMORY_MAP 0
+
+static void program_total_memory_map(struct raminfo *info)
+{
+ unsigned int TOM, TOLUD, TOUUD;
+ unsigned int quickpath_reserved;
+ unsigned int REMAPbase;
+ unsigned int uma_base_igd;
+ unsigned int uma_base_gtt;
+ int memory_remap;
+ unsigned int memory_map[8];
+ int i;
+ unsigned int current_limit;
+ unsigned int tseg_base;
+ int uma_size_igd = 0, uma_size_gtt = 0;
+
+ memset(memory_map, 0, sizeof(memory_map));
+
+#if REAL
+ if (info->uma_enabled) {
+ u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
+ gav(t);
+ const int uma_sizes_gtt[16] =
+ { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
+ /* Igd memory */
+ const int uma_sizes_igd[16] = {
+ 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
+ 256, 512
+ };
+
+ uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
+ uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
+ }
+#endif
+
+ TOM = info->total_memory_mb;
+ if (TOM == 4096)
+ TOM = 4032;
+ TOUUD = ALIGN_DOWN(TOM - info->memory_reserved_for_heci_mb, 64);
+ TOLUD = ALIGN_DOWN(min(3072 + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
+ , TOUUD), 64);
+ memory_remap = 0;
+ if (TOUUD - TOLUD > 64) {
+ memory_remap = 1;
+ REMAPbase = max(4096, TOUUD);
+ TOUUD = TOUUD - TOLUD + 4096;
+ }
+ if (TOUUD > 4096)
+ memory_map[2] = TOUUD | 1;
+ quickpath_reserved = 0;
+
+ {
+ u32 t;
+
+ gav(t = pcie_read_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 0x68));
+ if (t & 0x800)
+ quickpath_reserved =
+ (1 << find_lowest_bit_set32(t >> 20));
+ }
+ if (memory_remap)
+ TOUUD -= quickpath_reserved;
+
+#if !REAL
+ if (info->uma_enabled) {
+ u16 t = pcie_read_config16(NORTHBRIDGE, D0F0_GGC);
+ gav(t);
+ const int uma_sizes_gtt[16] =
+ { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
+ /* Igd memory */
+ const int uma_sizes_igd[16] = {
+ 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
+ 256, 512
+ };
+
+ uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
+ uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
+ }
+#endif
+
+ uma_base_igd = TOLUD - uma_size_igd;
+ uma_base_gtt = uma_base_igd - uma_size_gtt;
+ tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
+ if (!memory_remap)
+ tseg_base -= quickpath_reserved;
+ tseg_base = ALIGN_DOWN(tseg_base, 8);
+
+ pcie_write_config16(NORTHBRIDGE, D0F0_TOLUD, TOLUD << 4);
+ pcie_write_config16(NORTHBRIDGE, D0F0_TOM, TOM >> 6);
+ if (memory_remap) {
+ pcie_write_config16(NORTHBRIDGE, D0F0_REMAPBASE, REMAPbase >> 6);
+ pcie_write_config16(NORTHBRIDGE, D0F0_REMAPLIMIT, (TOUUD - 64) >> 6);
+ }
+ pcie_write_config16(NORTHBRIDGE, D0F0_TOUUD, TOUUD);
+
+ if (info->uma_enabled) {
+ pcie_write_config32(NORTHBRIDGE, D0F0_IGD_BASE, uma_base_igd << 20);
+ pcie_write_config32(NORTHBRIDGE, D0F0_GTT_BASE, uma_base_gtt << 20);
+ }
+ pcie_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
+
+ current_limit = 0;
+ memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
+ memory_map[1] = 4096;
+ for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
+ current_limit = max(current_limit, memory_map[i] & ~1);
+ pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0x80,
+ (memory_map[i] & 1) | ALIGN_DOWN(current_limit -
+ 1, 64) | 2);
+ pcie_write_config32(PCI_DEV(QUICKPATH_BUS, 0, 1), 4 * i + 0xc0, 0);
+ }
+}
+
+static void collect_system_info(struct raminfo *info)
+{
+ u32 capid0[3];
+ int i;
+ unsigned channel;
+
+ /* Wait for some bit, maybe TXT clear. */
+ while (!(read8(0xfed40000) & (1 << 7))) ;
+
+ if (!info->heci_bar)
+ gav(info->heci_bar =
+ pcie_read_config32(HECIDEV, HECIBAR) & 0xFFFFFFF8);
+ if (!info->memory_reserved_for_heci_mb) {
+ /* Wait for ME to be ready */
+ intel_early_me_init();
+ info->memory_reserved_for_heci_mb = intel_early_me_uma_size();
+ }
+
+ for (i = 0; i < 3; i++)
+ gav(capid0[i] =
+ pcie_read_config32(NORTHBRIDGE, D0F0_CAPID0 | (i << 2)));
+ gav(info->revision = pcie_read_config8(NORTHBRIDGE, PCI_REVISION_ID));
+ info->max_supported_clock_speed_index = (~capid0[1] & 7);
+
+ if ((capid0[1] >> 11) & 1)
+ info->uma_enabled = 0;
+ else
+ gav(info->uma_enabled =
+ pcie_read_config8(NORTHBRIDGE, D0F0_DEVEN) & 8);
+ /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
+ info->silicon_revision = 0;
+
+ if (capid0[2] & 2) {
+ info->silicon_revision = 0;
+ info->max_supported_clock_speed_index = 2;
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ if (info->populated_ranks[channel][0][0]
+ && (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
+ 3) {
+ info->silicon_revision = 2;
+ info->max_supported_clock_speed_index = 1;
+ }
+ } else {
+ switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
+ case 1:
+ case 2:
+ info->silicon_revision = 3;
+ break;
+ case 3:
+ info->silicon_revision = 0;
+ break;
+ case 0:
+ info->silicon_revision = 2;
+ break;
+ }
+ switch (pcie_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
+ case 0x40:
+ info->silicon_revision = 0;
+ break;
+ case 0x48:
+ info->silicon_revision = 1;
+ break;
+ }
+ }
+}
+
+static void write_training_data(struct raminfo *info)
+{
+ int tm, channel, slot, rank, lane;
+ if (info->revision < 8)
+ return;
+
+ for (tm = 0; tm < 4; tm++)
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ for (lane = 0; lane < 9; lane++)
+ write_500(info, channel,
+ info->
+ cached_training->
+ lane_timings[tm]
+ [channel][slot][rank]
+ [lane],
+ get_timing_register_addr
+ (lane, tm, slot,
+ rank), 9, 0);
+ write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
+ write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
+}
+
+static void dump_timings(struct raminfo *info)
+{
+#if REAL
+ int channel, slot, rank, lane, i;
+ printk(BIOS_DEBUG, "Timings:\n");
+ FOR_POPULATED_RANKS {
+ printk(BIOS_DEBUG, "channel %d, slot %d, rank %d\n", channel,
+ slot, rank);
+ for (lane = 0; lane < 9; lane++) {
+ printk(BIOS_DEBUG, "lane %d: ", lane);
+ for (i = 0; i < 4; i++) {
+ printk(BIOS_DEBUG, "%x (%x) ",
+ read_500(info, channel,
+ get_timing_register_addr
+ (lane, i, slot, rank),
+ 9),
+ info->training.
+ lane_timings[i][channel][slot][rank]
+ [lane]);
+ }
+ printk(BIOS_DEBUG, "\n");
+ }
+ }
+ printk(BIOS_DEBUG, "[178] = %x (%x)\n", read_1d0(0x178, 7),
+ info->training.reg_178);
+ printk(BIOS_DEBUG, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
+ info->training.reg_10b);
+#endif
+}
+
+static void save_timings(struct raminfo *info)
+{
+#if CONFIG_EARLY_CBMEM_INIT
+ struct ram_training train;
+ struct mrc_data_container *mrcdata;
+ int output_len = ALIGN(sizeof(train), 16);
+ int channel, slot, rank, lane, i;
+
+ train = info->training;
+ FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
+ for (i = 0; i < 4; i++)
+ train.lane_timings[i][channel][slot][rank][lane] =
+ read_500(info, channel,
+ get_timing_register_addr(lane, i, slot,
+ rank), 9);
+ train.reg_178 = read_1d0(0x178, 7);
+ train.reg_10b = read_1d0(0x10b, 6);
+
+ /* Save the MRC S3 restore data to cbmem */
+ cbmem_initialize();
+ mrcdata = cbmem_add
+ (CBMEM_ID_MRCDATA, output_len + sizeof(struct mrc_data_container));
+
+ printk(BIOS_DEBUG, "Relocate MRC DATA from %p to %p (%u bytes)\n",
+ &train, mrcdata, output_len);
+
+ mrcdata->mrc_signature = MRC_DATA_SIGNATURE;
+ mrcdata->mrc_data_size = output_len;
+ mrcdata->reserved = 0;
+ memcpy(mrcdata->mrc_data, &train, sizeof(train));
+
+ /* Zero the unused space in aligned buffer. */
+ if (output_len > sizeof(train))
+ memset(mrcdata->mrc_data + sizeof(train), 0,
+ output_len - sizeof(train));
+
+ mrcdata->mrc_checksum = compute_ip_checksum(mrcdata->mrc_data,
+ mrcdata->mrc_data_size);
+#endif
+}
+
+#if REAL
+static const struct ram_training *get_cached_training(void)
+{
+ struct mrc_data_container *cont;
+ cont = find_current_mrc_cache();
+ if (!cont)
+ return 0;
+ return (void *)cont->mrc_data;
+}
+#endif
+
+/* FIXME: add timeout. */
+static void wait_heci_ready(void)
+{
+ while (!(read32(DEFAULT_HECIBAR | 0xc) & 8)) ; // = 0x8000000c
+ write32((DEFAULT_HECIBAR | 0x4),
+ (read32(DEFAULT_HECIBAR | 0x4) & ~0x10) | 0xc);
+}
+
+/* FIXME: add timeout. */
+static void wait_heci_cb_avail(int len)
+{
+ union {
+ struct mei_csr csr;
+ u32 raw;
+ } csr;
+
+ while (!(read32(DEFAULT_HECIBAR | 0xc) & 8)) ;
+
+ do
+ csr.raw = read32(DEFAULT_HECIBAR | 0x4);
+ while (len >
+ csr.csr.buffer_depth - (csr.csr.buffer_write_ptr -
+ csr.csr.buffer_read_ptr));
+}
+
+static void send_heci_packet(struct mei_header *head, u32 * payload)
+{
+ int len = (head->length + 3) / 4;
+ int i;
+
+ wait_heci_cb_avail(len + 1);
+
+ /* FIXME: handle leftovers correctly. */
+ write32(DEFAULT_HECIBAR | 0, *(u32 *) head);
+ for (i = 0; i < len - 1; i++)
+ write32(DEFAULT_HECIBAR | 0, payload[i]);
+
+ write32(DEFAULT_HECIBAR | 0, payload[i] & ((1 << (8 * len)) - 1));
+ write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 0x4);
+}
+
+static void
+send_heci_message(u8 * msg, int len, u8 hostaddress, u8 clientaddress)
+{
+ struct mei_header head;
+ int maxlen;
+
+ wait_heci_ready();
+ maxlen = (read32(DEFAULT_HECIBAR | 0x4) >> 24) * 4 - 4;
+
+ while (len) {
+ int cur = len;
+ if (cur > maxlen) {
+ cur = maxlen;
+ head.is_complete = 0;
+ } else
+ head.is_complete = 1;
+ head.length = cur;
+ head.reserved = 0;
+ head.client_address = clientaddress;
+ head.host_address = hostaddress;
+ send_heci_packet(&head, (u32 *) msg);
+ len -= cur;
+ msg += cur;
+ }
+}
+
+/* FIXME: Add timeout. */
+static int
+recv_heci_packet(struct raminfo *info, struct mei_header *head, u32 * packet,
+ u32 * packet_size)
+{
+ union {
+ struct mei_csr csr;
+ u32 raw;
+ } csr;
+ int i = 0;
+
+ write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 2);
+ do {
+ csr.raw = read32(DEFAULT_HECIBAR | 0xc);
+#if !REAL
+ if (i++ > 346)
+ return -1;
+#endif
+ }
+ while (csr.csr.buffer_write_ptr == csr.csr.buffer_read_ptr);
+ *(u32 *) head = read32(DEFAULT_HECIBAR | 0x8);
+ if (!head->length) {
+ write32(DEFAULT_HECIBAR | 0x4,
+ read32(DEFAULT_HECIBAR | 0x4) | 2);
+ *packet_size = 0;
+ return 0;
+ }
+ if (head->length + 4 > 4 * csr.csr.buffer_depth
+ || head->length > *packet_size) {
+ *packet_size = 0;
+ return -1;
+ }
+
+ do
+ csr.raw = read32(DEFAULT_HECIBAR | 0xc);
+ while ((head->length + 3) >> 2 >
+ csr.csr.buffer_write_ptr - csr.csr.buffer_read_ptr);
+
+ for (i = 0; i < (head->length + 3) >> 2; i++)
+ packet[i++] = read32(DEFAULT_HECIBAR | 0x8);
+ *packet_size = head->length;
+ if (!csr.csr.ready)
+ *packet_size = 0;
+ write32(DEFAULT_HECIBAR | 0x4, read32(DEFAULT_HECIBAR | 0x4) | 4);
+ return 0;
+}
+
+/* FIXME: Add timeout. */
+static int
+recv_heci_message(struct raminfo *info, u32 * message, u32 * message_size)
+{
+ struct mei_header head;
+ int current_position;
+
+ current_position = 0;
+ while (1) {
+ u32 current_size;
+ current_size = *message_size - current_position;
+ if (recv_heci_packet
+ (info, &head, message + (current_position >> 2),
+ &current_size) == -1)
+ break;
+ if (!current_size)
+ break;
+ current_position += current_size;
+ if (head.is_complete) {
+ *message_size = current_position;
+ return 0;
+ }
+
+ if (current_position >= *message_size)
+ break;
+ }
+ *message_size = 0;
+ return -1;
+}
+
+static void send_heci_uma_message(struct raminfo *info)
+{
+ struct uma_reply {
+ u8 group_id;
+ u8 command;
+ u8 reserved;
+ u8 result;
+ u8 field2;
+ u8 unk3[0x48 - 4 - 1];
+ } __attribute__ ((packed)) reply;
+ struct uma_message {
+ u8 group_id;
+ u8 cmd;
+ u8 reserved;
+ u8 result;
+ u32 c2;
+ u64 heci_uma_addr;
+ u32 memory_reserved_for_heci_mb;
+ u16 c3;
+ } __attribute__ ((packed)) msg = {
+ 0, MKHI_SET_UMA, 0, 0,
+ 0x82,
+ info->heci_uma_addr, info->memory_reserved_for_heci_mb, 0};
+ u32 reply_size;
+
+ send_heci_message((u8 *) & msg, sizeof(msg), 0, 7);
+
+ reply_size = sizeof(reply);
+ if (recv_heci_message(info, (u32 *) & reply, &reply_size) == -1)
+ return;
+
+ if (reply.command != (MKHI_SET_UMA | (1 << 7)))
+ die("HECI init failed\n");
+}
+
+static void setup_heci_uma(struct raminfo *info)
+{
+ u32 reg44;
+
+ reg44 = pcie_read_config32(HECIDEV, 0x44); // = 0x80010020
+ info->memory_reserved_for_heci_mb = 0;
+ info->heci_uma_addr = 0;
+ if (!((reg44 & 0x10000) && !(pcie_read_config32(HECIDEV, 0x40) & 0x20)))
+ return;
+
+ info->heci_bar = pcie_read_config32(HECIDEV, 0x10) & 0xFFFFFFF0;
+ info->memory_reserved_for_heci_mb = reg44 & 0x3f;
+ info->heci_uma_addr =
+ ((u64)
+ ((((u64) pcie_read_config16(NORTHBRIDGE, D0F0_TOM)) << 6) -
+ info->memory_reserved_for_heci_mb)) << 20;
+
+ pcie_read_config32(NORTHBRIDGE, DMIBAR);
+ if (info->memory_reserved_for_heci_mb) {
+ write32(DEFAULT_DMIBAR | 0x14,
+ read32(DEFAULT_DMIBAR | 0x14) & ~0x80);
+ write32(DEFAULT_RCBA | 0x14,
+ read32(DEFAULT_RCBA | 0x14) & ~0x80);
+ write32(DEFAULT_DMIBAR | 0x20,
+ read32(DEFAULT_DMIBAR | 0x20) & ~0x80);
+ write32(DEFAULT_RCBA | 0x20,
+ read32(DEFAULT_RCBA | 0x20) & ~0x80);
+ write32(DEFAULT_DMIBAR | 0x2c,
+ read32(DEFAULT_DMIBAR | 0x2c) & ~0x80);
+ write32(DEFAULT_RCBA | 0x30,
+ read32(DEFAULT_RCBA | 0x30) & ~0x80);
+ write32(DEFAULT_DMIBAR | 0x38,
+ read32(DEFAULT_DMIBAR | 0x38) & ~0x80);
+ write32(DEFAULT_RCBA | 0x40,
+ read32(DEFAULT_RCBA | 0x40) & ~0x80);
+
+ write32(DEFAULT_RCBA | 0x40, 0x87000080); // OK
+ write32(DEFAULT_DMIBAR | 0x38, 0x87000080); // OK
+ while (read16(DEFAULT_RCBA | 0x46) & 2
+ && read16(DEFAULT_DMIBAR | 0x3e) & 2) ;
+ }
+
+ write_mchbar32(0x24, 0x10000 + info->memory_reserved_for_heci_mb);
+
+ send_heci_uma_message(info);
+
+ pcie_write_config32(HECIDEV, 0x10, 0x0);
+ pcie_write_config8(HECIDEV, 0x4, 0x0);
+
+}
+
+static int have_match_ranks(struct raminfo *info, int channel, int ranks)
+{
+ int ranks_in_channel;
+ ranks_in_channel = info->populated_ranks[channel][0][0]
+ + info->populated_ranks[channel][0][1]
+ + info->populated_ranks[channel][1][0]
+ + info->populated_ranks[channel][1][1];
+
+ /* empty channel */
+ if (ranks_in_channel == 0)
+ return 1;
+
+ if (ranks_in_channel != ranks)
+ return 0;
+ /* single slot */
+ if (info->populated_ranks[channel][0][0] !=
+ info->populated_ranks[channel][1][0])
+ return 1;
+ if (info->populated_ranks[channel][0][1] !=
+ info->populated_ranks[channel][1][1])
+ return 1;
+ if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
+ return 0;
+ if (info->density[channel][0] != info->density[channel][1])
+ return 0;
+ return 1;
+}
+
+#define WTF1 1
+
+static void read_4090(struct raminfo *info)
+{
+ int i, channel, slot, rank, lane;
+ for (i = 0; i < 2; i++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ for (lane = 0; lane < 9; lane++)
+ info->training.
+ lane_timings[0][i][slot][rank][lane]
+ = 32;
+
+ for (i = 1; i < 4; i++)
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ for (lane = 0; lane < 9; lane++) {
+ info->training.
+ lane_timings[i][channel]
+ [slot][rank][lane] =
+ read_500(info, channel,
+ get_timing_register_addr
+ (lane, i, slot,
+ rank), 9)
+ + (i == 1) * 11; // !!!!
+ }
+
+}
+
+static u32 get_etalon2(int flip, u32 addr)
+{
+ const u16 invmask[] = {
+ 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
+ 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
+ };
+ u32 ret;
+ u32 comp4 = addr / 480;
+ addr %= 480;
+ u32 comp1 = addr & 0xf;
+ u32 comp2 = (addr >> 4) & 1;
+ u32 comp3 = addr >> 5;
+
+ if (comp4)
+ ret = 0x1010101 << (comp4 - 1);
+ else
+ ret = 0;
+ if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
+ ret = ~ret;
+
+ return ret;
+}
+
+static void disable_cache(void)
+{
+ msr_t msr = {.lo = 0, .hi = 0 };
+
+ wrmsr(MTRRphysBase_MSR(3), msr);
+ wrmsr(MTRRphysMask_MSR(3), msr);
+}
+
+static void enable_cache(unsigned int base, unsigned int size)
+{
+ msr_t msr;
+ msr.lo = base | MTRR_TYPE_WRPROT;
+ msr.hi = 0;
+ wrmsr(MTRRphysBase_MSR(3), msr);
+ msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRRdefTypeEn)
+ & 0xffffffff);
+ msr.hi = 0x0000000f;
+ wrmsr(MTRRphysMask_MSR(3), msr);
+}
+
+static void flush_cache(u32 start, u32 size)
+{
+ u32 end;
+ u32 addr;
+
+ end = start + (ALIGN_DOWN(size + 4096, 4096));
+ for (addr = start; addr < end; addr += 64)
+ clflush(addr);
+}
+
+static void clear_errors(void)
+{
+ pcie_write_config8(NORTHBRIDGE, 0xc0, 0x01);
+}
+
+static void write_testing(struct raminfo *info, int totalrank, int flip)
+{
+ int nwrites = 0;
+ /* in 8-byte units. */
+ u32 offset;
+ u32 base;
+
+ base = totalrank << 28;
+ for (offset = 0; offset < 9 * 480; offset += 2) {
+ write32(base + offset * 8, get_etalon2(flip, offset));
+ write32(base + offset * 8 + 4, get_etalon2(flip, offset));
+ write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
+ write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
+ nwrites += 4;
+ if (nwrites >= 320) {
+ clear_errors();
+ nwrites = 0;
+ }
+ }
+}
+
+static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
+{
+ u8 failmask = 0;
+ int i;
+ int comp1, comp2, comp3;
+ u32 failxor[2] = { 0, 0 };
+
+ enable_cache((total_rank << 28), 1728 * 5 * 4);
+
+ for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
+ for (comp1 = 0; comp1 < 4; comp1++)
+ for (comp2 = 0; comp2 < 60; comp2++) {
+ u32 re[4];
+ u32 curroffset =
+ comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
+ read128((total_rank << 28) | (curroffset << 3),
+ (u64 *) re);
+ failxor[0] |=
+ get_etalon2(flip, curroffset) ^ re[0];
+ failxor[1] |=
+ get_etalon2(flip, curroffset) ^ re[1];
+ failxor[0] |=
+ get_etalon2(flip, curroffset | 1) ^ re[2];
+ failxor[1] |=
+ get_etalon2(flip, curroffset | 1) ^ re[3];
+ }
+ for (i = 0; i < 8; i++)
+ if ((0xff << (8 * (i % 4))) & failxor[i / 4])
+ failmask |= 1 << i;
+ }
+ disable_cache();
+ flush_cache((total_rank << 28), 1728 * 5 * 4);
+ return failmask;
+}
+
+const u32 seed1[0x18] = {
+ 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
+ 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
+ 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
+ 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
+ 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
+ 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
+};
+
+static u32 get_seed2(int a, int b)
+{
+ const u32 seed2[5] = {
+ 0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
+ 0x5b6db6db,
+ };
+ u32 r;
+ r = seed2[(a + (a >= 10)) / 5];
+ return b ? ~r : r;
+}
+
+static int make_shift(int comp2, int comp5, int x)
+{
+ const u8 seed3[32] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
+ 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+ };
+
+ return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
+}
+
+static u32 get_etalon(int flip, u32 addr)
+{
+ u32 mask_byte = 0;
+ int comp1 = (addr >> 1) & 1;
+ int comp2 = (addr >> 3) & 0x1f;
+ int comp3 = (addr >> 8) & 0xf;
+ int comp4 = (addr >> 12) & 0xf;
+ int comp5 = (addr >> 16) & 0x1f;
+ u32 mask_bit = ~(0x10001 << comp3);
+ u32 part1;
+ u32 part2;
+ int byte;
+
+ part2 =
+ ((seed1[comp5] >>
+ make_shift(comp2, comp5,
+ (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
+ part1 =
+ ((seed1[comp5] >>
+ make_shift(comp2, comp5,
+ (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
+
+ for (byte = 0; byte < 4; byte++)
+ if ((get_seed2(comp5, comp4) >>
+ make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
+ mask_byte |= 0xff << (8 * byte);
+
+ return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
+ (comp3 + 16));
+}
+
+static void
+write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
+ char flip)
+{
+ int i;
+ for (i = 0; i < 2048; i++)
+ write32((totalrank << 28) | (region << 25) | (block << 16) |
+ (i << 2), get_etalon(flip, (block << 16) | (i << 2)));
+}
+
+static u8
+check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
+ char flip)
+{
+ u8 failmask = 0;
+ u32 failxor[2];
+ int i;
+ int comp1, comp2, comp3;
+
+ failxor[0] = 0;
+ failxor[1] = 0;
+
+ enable_cache(totalrank << 28, 134217728);
+ for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
+ for (comp1 = 0; comp1 < 16; comp1++)
+ for (comp2 = 0; comp2 < 64; comp2++) {
+ u32 addr =
+ (totalrank << 28) | (region << 25) | (block
+ << 16)
+ | (comp3 << 12) | (comp2 << 6) | (comp1 <<
+ 2);
+ failxor[comp1 & 1] |=
+ read32(addr) ^ get_etalon(flip, addr);
+ }
+ for (i = 0; i < 8; i++)
+ if ((0xff << (8 * (i % 4))) & failxor[i / 4])
+ failmask |= 1 << i;
+ }
+ disable_cache();
+ flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
+ return failmask;
+}
+
+static int check_bounded(unsigned short *vals, u16 bound)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ if (vals[i] < bound)
+ return 0;
+ return 1;
+}
+
+enum state {
+ BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
+};
+
+static int validate_state(enum state *in)
+{
+ int i;
+ for (i = 0; i < 8; i++)
+ if (in[i] != COMPLETE)
+ return 0;
+ return 1;
+}
+
+static void
+do_fsm(enum state *state, u16 * counter,
+ u8 fail_mask, int margin, int uplimit,
+ u8 * res_low, u8 * res_high, u8 val)
+{
+ int lane;
+
+ for (lane = 0; lane < 8; lane++) {
+ int is_fail = (fail_mask >> lane) & 1;
+ switch (state[lane]) {
+ case BEFORE_USABLE:
+ if (!is_fail) {
+ counter[lane] = 1;
+ state[lane] = AT_USABLE;
+ break;
+ }
+ counter[lane] = 0;
+ state[lane] = BEFORE_USABLE;
+ break;
+ case AT_USABLE:
+ if (!is_fail) {
+ ++counter[lane];
+ if (counter[lane] >= margin) {
+ state[lane] = AT_MARGIN;
+ res_low[lane] = val - margin + 1;
+ break;
+ }
+ state[lane] = 1;
+ break;
+ }
+ counter[lane] = 0;
+ state[lane] = BEFORE_USABLE;
+ break;
+ case AT_MARGIN:
+ if (is_fail) {
+ state[lane] = COMPLETE;
+ res_high[lane] = val - 1;
+ } else {
+ counter[lane]++;
+ state[lane] = AT_MARGIN;
+ if (val == uplimit) {
+ state[lane] = COMPLETE;
+ res_high[lane] = uplimit;
+ }
+ }
+ break;
+ case COMPLETE:
+ break;
+ }
+ }
+}
+
+static void
+train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
+ u8 total_rank, u8 reg_178, int first_run, int niter,
+ timing_bounds_t * timings)
+{
+ int lane;
+ enum state state[8];
+ u16 count[8];
+ u8 lower_usable[8];
+ u8 upper_usable[8];
+ unsigned short num_sucessfully_checked[8];
+ u8 secondary_total_rank;
+ u8 reg1b3;
+
+ if (info->populated_ranks_mask[1]) {
+ if (channel == 1)
+ secondary_total_rank =
+ info->populated_ranks[1][0][0] +
+ info->populated_ranks[1][0][1]
+ + info->populated_ranks[1][1][0] +
+ info->populated_ranks[1][1][1];
+ else
+ secondary_total_rank = 0;
+ } else
+ secondary_total_rank = total_rank;
+
+ {
+ int i;
+ for (i = 0; i < 8; i++)
+ state[i] = BEFORE_USABLE;
+ }
+
+ if (!first_run) {
+ int is_all_ok = 1;
+ for (lane = 0; lane < 8; lane++)
+ if (timings[reg_178][channel][slot][rank][lane].
+ smallest ==
+ timings[reg_178][channel][slot][rank][lane].
+ largest) {
+ timings[reg_178][channel][slot][rank][lane].
+ smallest = 0;
+ timings[reg_178][channel][slot][rank][lane].
+ largest = 0;
+ is_all_ok = 0;
+ }
+ if (is_all_ok) {
+ int i;
+ for (i = 0; i < 8; i++)
+ state[i] = COMPLETE;
+ }
+ }
+
+ for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
+ u8 failmask = 0;
+ write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
+ write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
+ failmask = check_testing(info, total_rank, 0);
+ write_mchbar32(0xfb0, read_mchbar32(0xfb0) | 0x00030000);
+ do_fsm(state, count, failmask, 5, 47, lower_usable,
+ upper_usable, reg1b3);
+ }
+
+ if (reg1b3) {
+ write_1d0(0, 0x1b3, 6, 1);
+ write_1d0(0, 0x1a3, 6, 1);
+ for (lane = 0; lane < 8; lane++) {
+ if (state[lane] == COMPLETE) {
+ timings[reg_178][channel][slot][rank][lane].
+ smallest =
+ lower_usable[lane] +
+ (info->training.
+ lane_timings[0][channel][slot][rank][lane]
+ & 0x3F) - 32;
+ timings[reg_178][channel][slot][rank][lane].
+ largest =
+ upper_usable[lane] +
+ (info->training.
+ lane_timings[0][channel][slot][rank][lane]
+ & 0x3F) - 32;
+ }
+ }
+ }
+
+ if (!first_run) {
+ for (lane = 0; lane < 8; lane++)
+ if (state[lane] == COMPLETE) {
+ write_500(info, channel,
+ timings[reg_178][channel][slot][rank]
+ [lane].smallest,
+ get_timing_register_addr(lane, 0,
+ slot, rank),
+ 9, 1);
+ write_500(info, channel,
+ timings[reg_178][channel][slot][rank]
+ [lane].smallest +
+ info->training.
+ lane_timings[1][channel][slot][rank]
+ [lane]
+ -
+ info->training.
+ lane_timings[0][channel][slot][rank]
+ [lane], get_timing_register_addr(lane,
+ 1,
+ slot,
+ rank),
+ 9, 1);
+ num_sucessfully_checked[lane] = 0;
+ } else
+ num_sucessfully_checked[lane] = -1;
+
+ do {
+ u8 failmask = 0;
+ int i;
+ for (i = 0; i < niter; i++) {
+ if (failmask == 0xFF)
+ break;
+ failmask |=
+ check_testing_type2(info, total_rank, 2, i,
+ 0);
+ failmask |=
+ check_testing_type2(info, total_rank, 3, i,
+ 1);
+ }
+ write_mchbar32(0xfb0,
+ read_mchbar32(0xfb0) | 0x00030000);
+ for (lane = 0; lane < 8; lane++)
+ if (num_sucessfully_checked[lane] != 0xffff) {
+ if ((1 << lane) & failmask) {
+ if (timings[reg_178][channel]
+ [slot][rank][lane].
+ largest <=
+ timings[reg_178][channel]
+ [slot][rank][lane].smallest)
+ num_sucessfully_checked
+ [lane] = -1;
+ else {
+ num_sucessfully_checked
+ [lane] = 0;
+ timings[reg_178]
+ [channel][slot]
+ [rank][lane].
+ smallest++;
+ write_500(info, channel,
+ timings
+ [reg_178]
+ [channel]
+ [slot][rank]
+ [lane].
+ smallest,
+ get_timing_register_addr
+ (lane, 0,
+ slot, rank),
+ 9, 1);
+ write_500(info, channel,
+ timings
+ [reg_178]
+ [channel]
+ [slot][rank]
+ [lane].
+ smallest +
+ info->
+ training.
+ lane_timings
+ [1][channel]
+ [slot][rank]
+ [lane]
+ -
+ info->
+ training.
+ lane_timings
+ [0][channel]
+ [slot][rank]
+ [lane],
+ get_timing_register_addr
+ (lane, 1,
+ slot, rank),
+ 9, 1);
+ }
+ } else
+ num_sucessfully_checked[lane]++;
+ }
+ }
+ while (!check_bounded(num_sucessfully_checked, 2));
+
+ for (lane = 0; lane < 8; lane++)
+ if (state[lane] == COMPLETE) {
+ write_500(info, channel,
+ timings[reg_178][channel][slot][rank]
+ [lane].largest,
+ get_timing_register_addr(lane, 0,
+ slot, rank),
+ 9, 1);
+ write_500(info, channel,
+ timings[reg_178][channel][slot][rank]
+ [lane].largest +
+ info->training.
+ lane_timings[1][channel][slot][rank]
+ [lane]
+ -
+ info->training.
+ lane_timings[0][channel][slot][rank]
+ [lane], get_timing_register_addr(lane,
+ 1,
+ slot,
+ rank),
+ 9, 1);
+ num_sucessfully_checked[lane] = 0;
+ } else
+ num_sucessfully_checked[lane] = -1;
+
+ do {
+ int failmask = 0;
+ int i;
+ for (i = 0; i < niter; i++) {
+ if (failmask == 0xFF)
+ break;
+ failmask |=
+ check_testing_type2(info, total_rank, 2, i,
+ 0);
+ failmask |=
+ check_testing_type2(info, total_rank, 3, i,
+ 1);
+ }
+
+ write_mchbar32(0xfb0,
+ read_mchbar32(0xfb0) | 0x00030000);
+ for (lane = 0; lane < 8; lane++) {
+ if (num_sucessfully_checked[lane] != 0xffff) {
+ if ((1 << lane) & failmask) {
+ if (timings[reg_178][channel]
+ [slot][rank][lane].
+ largest <=
+ timings[reg_178][channel]
+ [slot][rank][lane].
+ smallest) {
+ num_sucessfully_checked
+ [lane] = -1;
+ } else {
+ num_sucessfully_checked
+ [lane] = 0;
+ timings[reg_178]
+ [channel][slot]
+ [rank][lane].
+ largest--;
+ write_500(info, channel,
+ timings
+ [reg_178]
+ [channel]
+ [slot][rank]
+ [lane].
+ largest,
+ get_timing_register_addr
+ (lane, 0,
+ slot, rank),
+ 9, 1);
+ write_500(info, channel,
+ timings
+ [reg_178]
+ [channel]
+ [slot][rank]
+ [lane].
+ largest +
+ info->
+ training.
+ lane_timings
+ [1][channel]
+ [slot][rank]
+ [lane]
+ -
+ info->
+ training.
+ lane_timings
+ [0][channel]
+ [slot][rank]
+ [lane],
+ get_timing_register_addr
+ (lane, 1,
+ slot, rank),
+ 9, 1);
+ }
+ } else
+ num_sucessfully_checked[lane]++;
+ }
+ }
+ }
+ while (!check_bounded(num_sucessfully_checked, 3));
+
+ for (lane = 0; lane < 8; lane++) {
+ write_500(info, channel,
+ info->training.
+ lane_timings[0][channel][slot][rank][lane],
+ get_timing_register_addr(lane, 0, slot, rank),
+ 9, 1);
+ write_500(info, channel,
+ info->training.
+ lane_timings[1][channel][slot][rank][lane],
+ get_timing_register_addr(lane, 1, slot, rank),
+ 9, 1);
+ if (timings[reg_178][channel][slot][rank][lane].
+ largest <=
+ timings[reg_178][channel][slot][rank][lane].
+ smallest) {
+ timings[reg_178][channel][slot][rank][lane].
+ largest = 0;
+ timings[reg_178][channel][slot][rank][lane].
+ smallest = 0;
+ }
+ }
+ }
+}
+
+static void set_10b(struct raminfo *info, u8 val)
+{
+ int channel;
+ int slot, rank;
+ int lane;
+
+ if (read_1d0(0x10b, 6) == val)
+ return;
+
+ write_1d0(val, 0x10b, 6, 1);
+
+ FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) {
+ u16 reg_500;
+ reg_500 = read_500(info, channel,
+ get_timing_register_addr(lane, 0, slot,
+ rank), 9);
+ if (val == 1) {
+ if (lut16[info->clock_speed_index] <= reg_500)
+ reg_500 -= lut16[info->clock_speed_index];
+ else
+ reg_500 = 0;
+ } else {
+ reg_500 += lut16[info->clock_speed_index];
+ }
+ write_500(info, channel, reg_500,
+ get_timing_register_addr(lane, 0, slot, rank), 9, 1);
+ }
+}
+
+static void set_ecc(int onoff)
+{
+ int channel;
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ u8 t;
+ t = read_mchbar8((channel << 10) + 0x5f8);
+ if (onoff)
+ t |= 1;
+ else
+ t &= ~1;
+ write_mchbar8((channel << 10) + 0x5f8, t);
+ }
+}
+
+static void set_178(u8 val)
+{
+ if (val >= 31)
+ val = val - 31;
+ else
+ val = 63 - val;
+
+ write_1d0(2 * val, 0x178, 7, 1);
+}
+
+static void
+write_500_timings_type(struct raminfo *info, int channel, int slot, int rank,
+ int type)
+{
+ int lane;
+
+ for (lane = 0; lane < 8; lane++)
+ write_500(info, channel,
+ info->training.
+ lane_timings[type][channel][slot][rank][lane],
+ get_timing_register_addr(lane, type, slot, rank), 9,
+ 0);
+}
+
+static void
+try_timing_offsets(struct raminfo *info, int channel,
+ int slot, int rank, int totalrank)
+{
+ u16 count[8];
+ enum state state[8];
+ u8 lower_usable[8], upper_usable[8];
+ int lane;
+ int i;
+ int flip = 1;
+ int timing_offset;
+
+ for (i = 0; i < 8; i++)
+ state[i] = BEFORE_USABLE;
+
+ memset(count, 0, sizeof(count));
+
+ for (lane = 0; lane < 8; lane++)
+ write_500(info, channel,
+ info->training.
+ lane_timings[2][channel][slot][rank][lane] + 32,
+ get_timing_register_addr(lane, 3, slot, rank), 9, 1);
+
+ for (timing_offset = 0; !validate_state(state) && timing_offset < 64;
+ timing_offset++) {
+ u8 failmask;
+ write_1d0(timing_offset ^ 32, 0x1bb, 6, 1);
+ failmask = 0;
+ for (i = 0; i < 2 && failmask != 0xff; i++) {
+ flip = !flip;
+ write_testing(info, totalrank, flip);
+ failmask |= check_testing(info, totalrank, flip);
+ }
+ do_fsm(state, count, failmask, 10, 63, lower_usable,
+ upper_usable, timing_offset);
+ }
+ write_1d0(0, 0x1bb, 6, 1);
+ dump_timings(info);
+ if (!validate_state(state))
+ die("Couldn't discover DRAM timings (1)\n");
+
+ for (lane = 0; lane < 8; lane++) {
+ u8 bias = 0;
+
+ if (info->silicon_revision) {
+ int usable_length;
+
+ usable_length = upper_usable[lane] - lower_usable[lane];
+ if (usable_length >= 20) {
+ bias = usable_length / 2 - 10;
+ if (bias >= 2)
+ bias = 2;
+ }
+ }
+ write_500(info, channel,
+ info->training.
+ lane_timings[2][channel][slot][rank][lane] +
+ (upper_usable[lane] + lower_usable[lane]) / 2 - bias,
+ get_timing_register_addr(lane, 3, slot, rank), 9, 1);
+ info->training.timing2_bounds[channel][slot][rank][lane][0] =
+ info->training.lane_timings[2][channel][slot][rank][lane] +
+ lower_usable[lane];
+ info->training.timing2_bounds[channel][slot][rank][lane][1] =
+ info->training.lane_timings[2][channel][slot][rank][lane] +
+ upper_usable[lane];
+ info->training.timing2_offset[channel][slot][rank][lane] =
+ info->training.lane_timings[2][channel][slot][rank][lane];
+ }
+}
+
+static u8
+choose_training(struct raminfo *info, int channel, int slot, int rank,
+ int lane, timing_bounds_t * timings, u8 center_178)
+{
+ u16 central_weight;
+ u16 side_weight;
+ unsigned int sum = 0, count = 0;
+ u8 span;
+ u8 lower_margin, upper_margin;
+ u8 reg_178;
+ u8 result;
+
+ span = 12;
+ central_weight = 20;
+ side_weight = 20;
+ if (info->silicon_revision == 1 && channel == 1) {
+ central_weight = 5;
+ side_weight = 20;
+ if ((info->
+ populated_ranks_mask[1] ^ (info->
+ populated_ranks_mask[1] >> 2)) &
+ 1)
+ span = 18;
+ }
+ if ((info->populated_ranks_mask[0] & 5) == 5) {
+ central_weight = 20;
+ side_weight = 20;
+ }
+ if (info->clock_speed_index >= 2
+ && (info->populated_ranks_mask[0] & 5) == 5 && slot == 1) {
+ if (info->silicon_revision == 1) {
+ switch (channel) {
+ case 0:
+ if (lane == 1) {
+ central_weight = 10;
+ side_weight = 20;
+ }
+ break;
+ case 1:
+ if (lane == 6) {
+ side_weight = 5;
+ central_weight = 20;
+ }
+ break;
+ }
+ }
+ if (info->silicon_revision == 0 && channel == 0 && lane == 0) {
+ side_weight = 5;
+ central_weight = 20;
+ }
+ }
+ for (reg_178 = center_178 - span; reg_178 <= center_178 + span;
+ reg_178 += span) {
+ u8 smallest;
+ u8 largest;
+ largest = timings[reg_178][channel][slot][rank][lane].largest;
+ smallest = timings[reg_178][channel][slot][rank][lane].smallest;
+ if (largest - smallest + 1 >= 5) {
+ unsigned int weight;
+ if (reg_178 == center_178)
+ weight = central_weight;
+ else
+ weight = side_weight;
+ sum += weight * (largest + smallest);
+ count += weight;
+ }
+ }
+ dump_timings(info);
+ if (count == 0)
+ die("Couldn't discover DRAM timings (2)\n");
+ result = sum / (2 * count);
+ lower_margin =
+ result - timings[center_178][channel][slot][rank][lane].smallest;
+ upper_margin =
+ timings[center_178][channel][slot][rank][lane].largest - result;
+ if (upper_margin < 10 && lower_margin > 10)
+ result -= min(lower_margin - 10, 10 - upper_margin);
+ if (upper_margin > 10 && lower_margin < 10)
+ result += min(upper_margin - 10, 10 - lower_margin);
+ return result;
+}
+
+#define STANDARD_MIN_MARGIN 5
+
+static u8 choose_reg178(struct raminfo *info, timing_bounds_t * timings)
+{
+ u16 margin[64];
+ int lane, rank, slot, channel;
+ u8 reg178;
+ int count = 0, sum = 0;
+
+ for (reg178 = reg178_min[info->clock_speed_index];
+ reg178 < reg178_max[info->clock_speed_index];
+ reg178 += reg178_step[info->clock_speed_index]) {
+ margin[reg178] = -1;
+ FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
+ int curmargin =
+ timings[reg178][channel][slot][rank][lane].largest -
+ timings[reg178][channel][slot][rank][lane].
+ smallest + 1;
+ if (curmargin < margin[reg178])
+ margin[reg178] = curmargin;
+ }
+ if (margin[reg178] >= STANDARD_MIN_MARGIN) {
+ u16 weight;
+ weight = margin[reg178] - STANDARD_MIN_MARGIN;
+ sum += weight * reg178;
+ count += weight;
+ }
+ }
+ dump_timings(info);
+ if (count == 0)
+ die("Couldn't discover DRAM timings (3)\n");
+
+ u8 threshold;
+
+ for (threshold = 30; threshold >= 5; threshold--) {
+ int usable_length = 0;
+ int smallest_fount = 0;
+ for (reg178 = reg178_min[info->clock_speed_index];
+ reg178 < reg178_max[info->clock_speed_index];
+ reg178 += reg178_step[info->clock_speed_index])
+ if (margin[reg178] >= threshold) {
+ usable_length +=
+ reg178_step[info->clock_speed_index];
+ info->training.reg178_largest =
+ reg178 -
+ 2 * reg178_step[info->clock_speed_index];
+
+ if (!smallest_fount) {
+ smallest_fount = 1;
+ info->training.reg178_smallest =
+ reg178 +
+ reg178_step[info->
+ clock_speed_index];
+ }
+ }
+ if (usable_length >= 0x21)
+ break;
+ }
+
+ return sum / count;
+}
+
+static int check_cached_sanity(struct raminfo *info)
+{
+ int lane;
+ int slot, rank;
+ int channel;
+
+ if (!info->cached_training)
+ return 0;
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ for (lane = 0; lane < 8 + info->use_ecc; lane++) {
+ u16 cached_value, estimation_value;
+ cached_value =
+ info->cached_training->
+ lane_timings[1][channel][slot][rank]
+ [lane];
+ if (cached_value >= 0x18
+ && cached_value <= 0x1E7) {
+ estimation_value =
+ info->training.
+ lane_timings[1][channel]
+ [slot][rank][lane];
+ if (estimation_value <
+ cached_value - 24)
+ return 0;
+ if (estimation_value >
+ cached_value + 24)
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int try_cached_training(struct raminfo *info)
+{
+ u8 saved_243[2];
+ u8 tm;
+
+ int channel, slot, rank, lane;
+ int flip = 1;
+ int i, j;
+
+ if (!check_cached_sanity(info))
+ return 0;
+
+ info->training.reg178_center = info->cached_training->reg178_center;
+ info->training.reg178_smallest = info->cached_training->reg178_smallest;
+ info->training.reg178_largest = info->cached_training->reg178_largest;
+ memcpy(&info->training.timing_bounds,
+ &info->cached_training->timing_bounds,
+ sizeof(info->training.timing_bounds));
+ memcpy(&info->training.timing_offset,
+ &info->cached_training->timing_offset,
+ sizeof(info->training.timing_offset));
+
+ write_1d0(2, 0x142, 3, 1);
+ saved_243[0] = read_mchbar8(0x243);
+ saved_243[1] = read_mchbar8(0x643);
+ write_mchbar8(0x243, saved_243[0] | 2);
+ write_mchbar8(0x643, saved_243[1] | 2);
+ set_ecc(0);
+ pcie_write_config16(NORTHBRIDGE, 0xc8, 3);
+ if (read_1d0(0x10b, 6) & 1)
+ set_10b(info, 0);
+ for (tm = 0; tm < 2; tm++) {
+ int totalrank;
+
+ set_178(tm ? info->cached_training->reg178_largest : info->
+ cached_training->reg178_smallest);
+
+ totalrank = 0;
+ /* Check timing ranges. With i == 0 we check smallest one and with
+ i == 1 the largest bound. With j == 0 we check that on the bound
+ it still works whereas with j == 1 we check that just outside of
+ bound we fail.
+ */
+ FOR_POPULATED_RANKS_BACKWARDS {
+ for (i = 0; i < 2; i++) {
+ for (lane = 0; lane < 8; lane++) {
+ write_500(info, channel,
+ info->cached_training->
+ timing2_bounds[channel][slot]
+ [rank][lane][i],
+ get_timing_register_addr(lane,
+ 3,
+ slot,
+ rank),
+ 9, 1);
+
+ if (!i)
+ write_500(info, channel,
+ info->
+ cached_training->
+ timing2_offset
+ [channel][slot][rank]
+ [lane],
+ get_timing_register_addr
+ (lane, 2, slot, rank),
+ 9, 1);
+ write_500(info, channel,
+ i ? info->cached_training->
+ timing_bounds[tm][channel]
+ [slot][rank][lane].
+ largest : info->
+ cached_training->
+ timing_bounds[tm][channel]
+ [slot][rank][lane].smallest,
+ get_timing_register_addr(lane,
+ 0,
+ slot,
+ rank),
+ 9, 1);
+ write_500(info, channel,
+ info->cached_training->
+ timing_offset[channel][slot]
+ [rank][lane] +
+ (i ? info->cached_training->
+ timing_bounds[tm][channel]
+ [slot][rank][lane].
+ largest : info->
+ cached_training->
+ timing_bounds[tm][channel]
+ [slot][rank][lane].
+ smallest) - 64,
+ get_timing_register_addr(lane,
+ 1,
+ slot,
+ rank),
+ 9, 1);
+ }
+ for (j = 0; j < 2; j++) {
+ u8 failmask;
+ u8 expected_failmask;
+ char reg1b3;
+
+ reg1b3 = (j == 1) + 4;
+ reg1b3 =
+ j == i ? reg1b3 : (-reg1b3) & 0x3f;
+ write_1d0(reg1b3, 0x1bb, 6, 1);
+ write_1d0(reg1b3, 0x1b3, 6, 1);
+ write_1d0(reg1b3, 0x1a3, 6, 1);
+
+ flip = !flip;
+ write_testing(info, totalrank, flip);
+ failmask =
+ check_testing(info, totalrank,
+ flip);
+ expected_failmask =
+ j == 0 ? 0x00 : 0xff;
+ if (failmask != expected_failmask)
+ goto fail;
+ }
+ }
+ totalrank++;
+ }
+ }
+
+ set_178(info->cached_training->reg178_center);
+ if (info->use_ecc)
+ set_ecc(1);
+ write_training_data(info);
+ write_1d0(0, 322, 3, 1);
+ info->training = *info->cached_training;
+
+ write_1d0(0, 0x1bb, 6, 1);
+ write_1d0(0, 0x1b3, 6, 1);
+ write_1d0(0, 0x1a3, 6, 1);
+ write_mchbar8(0x243, saved_243[0]);
+ write_mchbar8(0x643, saved_243[1]);
+
+ return 1;
+
+fail:
+ FOR_POPULATED_RANKS {
+ write_500_timings_type(info, channel, slot, rank, 1);
+ write_500_timings_type(info, channel, slot, rank, 2);
+ write_500_timings_type(info, channel, slot, rank, 3);
+ }
+
+ write_1d0(0, 0x1bb, 6, 1);
+ write_1d0(0, 0x1b3, 6, 1);
+ write_1d0(0, 0x1a3, 6, 1);
+ write_mchbar8(0x243, saved_243[0]);
+ write_mchbar8(0x643, saved_243[1]);
+
+ return 0;
+}
+
+static void do_ram_training(struct raminfo *info)
+{
+ u8 saved_243[2];
+ int totalrank = 0;
+ u8 reg_178;
+ int niter;
+
+ timing_bounds_t timings[64];
+ int lane, rank, slot, channel;
+ u8 reg178_center;
+
+ write_1d0(2, 0x142, 3, 1);
+ saved_243[0] = read_mchbar8(0x243);
+ saved_243[1] = read_mchbar8(0x643);
+ write_mchbar8(0x243, saved_243[0] | 2);
+ write_mchbar8(0x643, saved_243[1] | 2);
+ switch (info->clock_speed_index) {
+ case 0:
+ niter = 5;
+ break;
+ case 1:
+ niter = 10;
+ break;
+ default:
+ niter = 19;
+ break;
+ }
+ set_ecc(0);
+
+ FOR_POPULATED_RANKS_BACKWARDS {
+ int i;
+
+ write_500_timings_type(info, channel, slot, rank, 0);
+
+ write_testing(info, totalrank, 0);
+ for (i = 0; i < niter; i++) {
+ write_testing_type2(info, totalrank, 2, i, 0);
+ write_testing_type2(info, totalrank, 3, i, 1);
+ }
+ pcie_write_config8(NORTHBRIDGE, 0xc0, 0x01);
+ totalrank++;
+ }
+
+ if (reg178_min[info->clock_speed_index] <
+ reg178_max[info->clock_speed_index])
+ memset(timings[reg178_min[info->clock_speed_index]], 0,
+ sizeof(timings[0]) *
+ (reg178_max[info->clock_speed_index] -
+ reg178_min[info->clock_speed_index]));
+ for (reg_178 = reg178_min[info->clock_speed_index];
+ reg_178 < reg178_max[info->clock_speed_index];
+ reg_178 += reg178_step[info->clock_speed_index]) {
+ totalrank = 0;
+ set_178(reg_178);
+ for (channel = NUM_CHANNELS - 1; channel >= 0; channel--)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++) {
+ memset(&timings[reg_178][channel][slot]
+ [rank][0].smallest, 0, 16);
+ if (info->
+ populated_ranks[channel][slot]
+ [rank]) {
+ train_ram_at_178(info, channel,
+ slot, rank,
+ totalrank,
+ reg_178, 1,
+ niter,
+ timings);
+ totalrank++;
+ }
+ }
+ }
+
+ reg178_center = choose_reg178(info, timings);
+
+ FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
+ info->training.timing_bounds[0][channel][slot][rank][lane].
+ smallest =
+ timings[info->training.
+ reg178_smallest][channel][slot][rank][lane].
+ smallest;
+ info->training.timing_bounds[0][channel][slot][rank][lane].
+ largest =
+ timings[info->training.
+ reg178_smallest][channel][slot][rank][lane].largest;
+ info->training.timing_bounds[1][channel][slot][rank][lane].
+ smallest =
+ timings[info->training.
+ reg178_largest][channel][slot][rank][lane].smallest;
+ info->training.timing_bounds[1][channel][slot][rank][lane].
+ largest =
+ timings[info->training.
+ reg178_largest][channel][slot][rank][lane].largest;
+ info->training.timing_offset[channel][slot][rank][lane] =
+ info->training.lane_timings[1][channel][slot][rank][lane]
+ -
+ info->training.lane_timings[0][channel][slot][rank][lane] +
+ 64;
+ }
+
+ if (info->silicon_revision == 1
+ && (info->
+ populated_ranks_mask[1] ^ (info->
+ populated_ranks_mask[1] >> 2)) & 1) {
+ int ranks_after_channel1;
+
+ totalrank = 0;
+ for (reg_178 = reg178_center - 18;
+ reg_178 <= reg178_center + 18; reg_178 += 18) {
+ totalrank = 0;
+ set_178(reg_178);
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++) {
+ if (info->
+ populated_ranks[1][slot][rank]) {
+ train_ram_at_178(info, 1, slot,
+ rank,
+ totalrank,
+ reg_178, 0,
+ niter,
+ timings);
+ totalrank++;
+ }
+ }
+ }
+ ranks_after_channel1 = totalrank;
+
+ for (reg_178 = reg178_center - 12;
+ reg_178 <= reg178_center + 12; reg_178 += 12) {
+ totalrank = ranks_after_channel1;
+ set_178(reg_178);
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ if (info->
+ populated_ranks[0][slot][rank]) {
+ train_ram_at_178(info, 0, slot,
+ rank,
+ totalrank,
+ reg_178, 0,
+ niter,
+ timings);
+ totalrank++;
+ }
+
+ }
+ } else {
+ for (reg_178 = reg178_center - 12;
+ reg_178 <= reg178_center + 12; reg_178 += 12) {
+ totalrank = 0;
+ set_178(reg_178);
+ FOR_POPULATED_RANKS_BACKWARDS {
+ train_ram_at_178(info, channel, slot, rank,
+ totalrank, reg_178, 0, niter,
+ timings);
+ totalrank++;
+ }
+ }
+ }
+
+ set_178(reg178_center);
+ FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
+ u16 tm0;
+
+ tm0 =
+ choose_training(info, channel, slot, rank, lane, timings,
+ reg178_center);
+ write_500(info, channel, tm0,
+ get_timing_register_addr(lane, 0, slot, rank), 9, 1);
+ write_500(info, channel,
+ tm0 +
+ info->training.
+ lane_timings[1][channel][slot][rank][lane] -
+ info->training.
+ lane_timings[0][channel][slot][rank][lane],
+ get_timing_register_addr(lane, 1, slot, rank), 9, 1);
+ }
+
+ totalrank = 0;
+ FOR_POPULATED_RANKS_BACKWARDS {
+ try_timing_offsets(info, channel, slot, rank, totalrank);
+ totalrank++;
+ }
+ write_mchbar8(0x243, saved_243[0]);
+ write_mchbar8(0x643, saved_243[1]);
+ write_1d0(0, 0x142, 3, 1);
+ info->training.reg178_center = reg178_center;
+}
+
+static void ram_training(struct raminfo *info)
+{
+ u16 saved_fc4;
+
+ saved_fc4 = read_mchbar16(0xfc4);
+ write_mchbar16(0xfc4, 0xffff);
+
+ if (info->revision >= 8)
+ read_4090(info);
+
+ if (!try_cached_training(info))
+ do_ram_training(info);
+ if ((info->silicon_revision == 2 || info->silicon_revision == 3)
+ && info->clock_speed_index < 2)
+ set_10b(info, 1);
+ write_mchbar16(0xfc4, saved_fc4);
+}
+
+static unsigned gcd(unsigned a, unsigned b)
+{
+ unsigned t;
+ if (a > b) {
+ t = a;
+ a = b;
+ b = t;
+ }
+ /* invariant a < b. */
+ while (a) {
+ t = b % a;
+ b = a;
+ a = t;
+ }
+ return b;
+}
+
+static inline int div_roundup(int a, int b)
+{
+ return (a + b - 1) / b;
+}
+
+static unsigned lcm(unsigned a, unsigned b)
+{
+ return (a * b) / gcd(a, b);
+}
+
+struct stru1 {
+ u8 freqs_reversed;
+ u8 freq_diff_reduced;
+ u8 freq_min_reduced;
+ u8 divisor_f4_to_fmax;
+ u8 divisor_f3_to_fmax;
+ u8 freq4_to_max_remainder;
+ u8 freq3_to_2_remainder;
+ u8 freq3_to_2_remaindera;
+ u8 freq4_to_2_remainder;
+ int divisor_f3_to_f1, divisor_f4_to_f2;
+ int common_time_unit_ps;
+ int freq_max_reduced;
+};
+
+static void
+compute_frequence_ratios(struct raminfo *info, u16 freq1, u16 freq2,
+ int num_cycles_2, int num_cycles_1, int round_it,
+ int add_freqs, struct stru1 *result)
+{
+ int g;
+ int common_time_unit_ps;
+ int freq1_reduced, freq2_reduced;
+ int freq_min_reduced;
+ int freq_max_reduced;
+ int freq3, freq4;
+
+ g = gcd(freq1, freq2);
+ freq1_reduced = freq1 / g;
+ freq2_reduced = freq2 / g;
+ freq_min_reduced = min(freq1_reduced, freq2_reduced);
+ freq_max_reduced = max(freq1_reduced, freq2_reduced);
+
+ common_time_unit_ps = div_roundup(900000, lcm(freq1, freq2));
+ freq3 = div_roundup(num_cycles_2, common_time_unit_ps) - 1;
+ freq4 = div_roundup(num_cycles_1, common_time_unit_ps) - 1;
+ if (add_freqs) {
+ freq3 += freq2_reduced;
+ freq4 += freq1_reduced;
+ }
+
+ if (round_it) {
+ result->freq3_to_2_remainder = 0;
+ result->freq3_to_2_remaindera = 0;
+ result->freq4_to_max_remainder = 0;
+ result->divisor_f4_to_f2 = 0;
+ result->divisor_f3_to_f1 = 0;
+ } else {
+ if (freq2_reduced < freq1_reduced) {
+ result->freq3_to_2_remainder =
+ result->freq3_to_2_remaindera =
+ freq3 % freq1_reduced - freq1_reduced + 1;
+ result->freq4_to_max_remainder =
+ -(freq4 % freq1_reduced);
+ result->divisor_f3_to_f1 = freq3 / freq1_reduced;
+ result->divisor_f4_to_f2 =
+ (freq4 -
+ (freq1_reduced - freq2_reduced)) / freq2_reduced;
+ result->freq4_to_2_remainder =
+ -(char)((freq1_reduced - freq2_reduced) +
+ ((u8) freq4 -
+ (freq1_reduced -
+ freq2_reduced)) % (u8) freq2_reduced);
+ } else {
+ if (freq2_reduced > freq1_reduced) {
+ result->freq4_to_max_remainder =
+ (freq4 % freq2_reduced) - freq2_reduced + 1;
+ result->freq4_to_2_remainder =
+ freq4 % freq_max_reduced -
+ freq_max_reduced + 1;
+ } else {
+ result->freq4_to_max_remainder =
+ -(freq4 % freq2_reduced);
+ result->freq4_to_2_remainder =
+ -(char)(freq4 % freq_max_reduced);
+ }
+ result->divisor_f4_to_f2 = freq4 / freq2_reduced;
+ result->divisor_f3_to_f1 =
+ (freq3 -
+ (freq2_reduced - freq1_reduced)) / freq1_reduced;
+ result->freq3_to_2_remainder = -(freq3 % freq2_reduced);
+ result->freq3_to_2_remaindera =
+ -(char)((freq_max_reduced - freq_min_reduced) +
+ (freq3 -
+ (freq_max_reduced -
+ freq_min_reduced)) % freq1_reduced);
+ }
+ }
+ result->divisor_f3_to_fmax = freq3 / freq_max_reduced;
+ result->divisor_f4_to_fmax = freq4 / freq_max_reduced;
+ if (round_it) {
+ if (freq2_reduced > freq1_reduced) {
+ if (freq3 % freq_max_reduced)
+ result->divisor_f3_to_fmax++;
+ }
+ if (freq2_reduced < freq1_reduced) {
+ if (freq4 % freq_max_reduced)
+ result->divisor_f4_to_fmax++;
+ }
+ }
+ result->freqs_reversed = (freq2_reduced < freq1_reduced);
+ result->freq_diff_reduced = freq_max_reduced - freq_min_reduced;
+ result->freq_min_reduced = freq_min_reduced;
+ result->common_time_unit_ps = common_time_unit_ps;
+ result->freq_max_reduced = freq_max_reduced;
+}
+
+static void
+set_2d5x_reg(struct raminfo *info, u16 reg, u16 freq1, u16 freq2,
+ int num_cycles_2, int num_cycles_1, int num_cycles_3,
+ int num_cycles_4, int reverse)
+{
+ struct stru1 vv;
+ char multiplier;
+
+ compute_frequence_ratios(info, freq1, freq2, num_cycles_2, num_cycles_1,
+ 0, 1, &vv);
+
+ multiplier =
+ div_roundup(max
+ (div_roundup(num_cycles_2, vv.common_time_unit_ps) +
+ div_roundup(num_cycles_3, vv.common_time_unit_ps),
+ div_roundup(num_cycles_1,
+ vv.common_time_unit_ps) +
+ div_roundup(num_cycles_4, vv.common_time_unit_ps))
+ + vv.freq_min_reduced - 1, vv.freq_max_reduced) - 1;
+
+ u32 y =
+ (u8) ((vv.freq_max_reduced - vv.freq_min_reduced) +
+ vv.freq_max_reduced * multiplier)
+ | (vv.
+ freqs_reversed << 8) | ((u8) (vv.freq_min_reduced *
+ multiplier) << 16) | ((u8) (vv.
+ freq_min_reduced
+ *
+ multiplier)
+ << 24);
+ u32 x =
+ vv.freq3_to_2_remaindera | (vv.freq4_to_2_remainder << 8) | (vv.
+ divisor_f3_to_f1
+ << 16)
+ | (vv.divisor_f4_to_f2 << 20) | (vv.freq_min_reduced << 24);
+ if (reverse) {
+ write_mchbar32(reg, y);
+ write_mchbar32(reg + 4, x);
+ } else {
+ write_mchbar32(reg + 4, y);
+ write_mchbar32(reg, x);
+ }
+}
+
+static void
+set_6d_reg(struct raminfo *info, u16 reg, u16 freq1, u16 freq2,
+ int num_cycles_1, int num_cycles_2, int num_cycles_3,
+ int num_cycles_4)
+{
+ struct stru1 ratios1;
+ struct stru1 ratios2;
+
+ compute_frequence_ratios(info, freq1, freq2, num_cycles_1, num_cycles_2,
+ 0, 1, &ratios2);
+ compute_frequence_ratios(info, freq1, freq2, num_cycles_3, num_cycles_4,
+ 0, 1, &ratios1);
+ write_mchbar32(reg,
+ ratios1.freq4_to_max_remainder | (ratios2.
+ freq4_to_max_remainder
+ << 8)
+ | (ratios1.divisor_f4_to_fmax << 16) | (ratios2.
+ divisor_f4_to_fmax
+ << 20));
+}
+
+static void
+set_2dx8_reg(struct raminfo *info, u16 reg, u8 mode, u16 freq1, u16 freq2,
+ int num_cycles_2, int num_cycles_1, int round_it, int add_freqs)
+{
+ struct stru1 ratios;
+
+ compute_frequence_ratios(info, freq1, freq2, num_cycles_2, num_cycles_1,
+ round_it, add_freqs, &ratios);
+ switch (mode) {
+ case 0:
+ write_mchbar32(reg + 4,
+ ratios.freq_diff_reduced | (ratios.
+ freqs_reversed <<
+ 8));
+ write_mchbar32(reg,
+ ratios.freq3_to_2_remainder | (ratios.
+ freq4_to_max_remainder
+ << 8)
+ | (ratios.divisor_f3_to_fmax << 16) | (ratios.
+ divisor_f4_to_fmax
+ << 20) |
+ (ratios.freq_min_reduced << 24));
+ break;
+
+ case 1:
+ write_mchbar32(reg,
+ ratios.freq3_to_2_remainder | (ratios.
+ divisor_f3_to_fmax
+ << 16));
+ break;
+
+ case 2:
+ write_mchbar32(reg,
+ ratios.freq3_to_2_remainder | (ratios.
+ freq4_to_max_remainder
+ << 8) | (ratios.
+ divisor_f3_to_fmax
+ << 16) |
+ (ratios.divisor_f4_to_fmax << 20));
+ break;
+
+ case 4:
+ write_mchbar32(reg, (ratios.divisor_f3_to_fmax << 4)
+ | (ratios.divisor_f4_to_fmax << 8) | (ratios.
+ freqs_reversed
+ << 12) |
+ (ratios.freq_min_reduced << 16) | (ratios.
+ freq_diff_reduced
+ << 24));
+ break;
+ }
+}
+
+static void set_2dxx_series(struct raminfo *info)
+{
+ set_2dx8_reg(info, 0x2d00, 0, 0x78, frequency_11(info) / 2, 1359, 1005,
+ 0, 1);
+ set_2dx8_reg(info, 0x2d08, 0, 0x78, 0x78, 3273, 5033, 1, 1);
+ set_2dx8_reg(info, 0x2d10, 0, 0x78, info->fsb_frequency, 1475, 1131, 0,
+ 1);
+ set_2dx8_reg(info, 0x2d18, 0, 2 * info->fsb_frequency,
+ frequency_11(info), 1231, 1524, 0, 1);
+ set_2dx8_reg(info, 0x2d20, 0, 2 * info->fsb_frequency,
+ frequency_11(info) / 2, 1278, 2008, 0, 1);
+ set_2dx8_reg(info, 0x2d28, 0, info->fsb_frequency, frequency_11(info),
+ 1167, 1539, 0, 1);
+ set_2dx8_reg(info, 0x2d30, 0, info->fsb_frequency,
+ frequency_11(info) / 2, 1403, 1318, 0, 1);
+ set_2dx8_reg(info, 0x2d38, 0, info->fsb_frequency, 0x78, 3460, 5363, 1,
+ 1);
+ set_2dx8_reg(info, 0x2d40, 0, info->fsb_frequency, 0x3c, 2792, 5178, 1,
+ 1);
+ set_2dx8_reg(info, 0x2d48, 0, 2 * info->fsb_frequency, 0x78, 2738, 4610,
+ 1, 1);
+ set_2dx8_reg(info, 0x2d50, 0, info->fsb_frequency, 0x78, 2819, 5932, 1,
+ 1);
+ set_2dx8_reg(info, 0x6d4, 1, info->fsb_frequency,
+ frequency_11(info) / 2, 4000, 0, 0, 0);
+ set_2dx8_reg(info, 0x6d8, 2, info->fsb_frequency,
+ frequency_11(info) / 2, 4000, 4000, 0, 0);
+
+ set_6d_reg(info, 0x6dc, 2 * info->fsb_frequency, frequency_11(info), 0,
+ info->delay46_ps[0], 0, info->delay54_ps[0]);
+ set_2dx8_reg(info, 0x6e0, 1, 2 * info->fsb_frequency,
+ frequency_11(info), 2500, 0, 0, 0);
+ set_2dx8_reg(info, 0x6e4, 1, 2 * info->fsb_frequency,
+ frequency_11(info) / 2, 3500, 0, 0, 0);
+ set_6d_reg(info, 0x6e8, 2 * info->fsb_frequency, frequency_11(info), 0,
+ info->delay46_ps[1], 0, info->delay54_ps[1]);
+ set_2d5x_reg(info, 0x2d58, 0x78, 0x78, 864, 1195, 762, 786, 0);
+ set_2d5x_reg(info, 0x2d60, 0x195, info->fsb_frequency, 1352, 725, 455,
+ 470, 0);
+ set_2d5x_reg(info, 0x2d68, 0x195, 0x3c, 2707, 5632, 3277, 2207, 0);
+ set_2d5x_reg(info, 0x2d70, 0x195, frequency_11(info) / 2, 1276, 758,
+ 454, 459, 0);
+ set_2d5x_reg(info, 0x2d78, 0x195, 0x78, 1021, 799, 510, 513, 0);
+ set_2d5x_reg(info, 0x2d80, info->fsb_frequency, 0xe1, 0, 2862, 2579,
+ 2588, 0);
+ set_2d5x_reg(info, 0x2d88, info->fsb_frequency, 0xe1, 0, 2690, 2405,
+ 2405, 0);
+ set_2d5x_reg(info, 0x2da0, 0x78, 0xe1, 0, 2560, 2264, 2251, 0);
+ set_2d5x_reg(info, 0x2da8, 0x195, frequency_11(info), 1060, 775, 484,
+ 480, 0);
+ set_2d5x_reg(info, 0x2db0, 0x195, 0x78, 4183, 6023, 2217, 2048, 0);
+ write_mchbar32(0x2dbc, ((frequency_11(info) / 2) - 1) | 0xe00000);
+ write_mchbar32(0x2db8, ((info->fsb_frequency - 1) << 16) | 0x77);
+}
+
+static u16 get_max_timing(struct raminfo *info, int channel)
+{
+ int slot, rank, lane;
+ u16 ret = 0;
+
+ if ((read_mchbar8(0x2ca8) >> 2) < 1)
+ return 384;
+
+ if (info->revision < 8)
+ return 256;
+
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ if (info->populated_ranks[channel][slot][rank])
+ for (lane = 0; lane < 8 + info->use_ecc; lane++)
+ ret = max(ret, read_500(info, channel,
+ get_timing_register_addr
+ (lane, 0, slot,
+ rank), 9));
+ return ret;
+}
+
+static void set_274265(struct raminfo *info)
+{
+ int delay_a_ps, delay_b_ps, delay_c_ps, delay_d_ps;
+ int delay_e_ps, delay_e_cycles, delay_f_cycles;
+ int delay_e_over_cycle_ps;
+ int cycletime_ps;
+ int channel;
+
+ delay_a_ps = 4 * halfcycle_ps(info) + 6 * fsbcycle_ps(info);
+ info->reg2ca9_bit0 = 0;
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ cycletime_ps =
+ 900000 / lcm(2 * info->fsb_frequency, frequency_11(info));
+ delay_d_ps =
+ (halfcycle_ps(info) * get_max_timing(info, channel) >> 6)
+ - info->some_delay_3_ps_rounded + 200;
+ if (!
+ ((info->silicon_revision == 0
+ || info->silicon_revision == 1)
+ && (info->revision >= 8)))
+ delay_d_ps += halfcycle_ps(info) * 2;
+ delay_d_ps +=
+ halfcycle_ps(info) * (!info->revision_flag_1 +
+ info->some_delay_2_halfcycles_ceil +
+ 2 * info->some_delay_1_cycle_floor +
+ info->clock_speed_index +
+ 2 * info->cas_latency - 7 + 11);
+ delay_d_ps += info->revision >= 8 ? 2758 : 4428;
+
+ write_mchbar32(0x140,
+ (read_mchbar32(0x140) & 0xfaffffff) | 0x2000000);
+ write_mchbar32(0x138,
+ (read_mchbar32(0x138) & 0xfaffffff) | 0x2000000);
+ if ((read_mchbar8(0x144) & 0x1f) > 0x13)
+ delay_d_ps += 650;
+ delay_c_ps = delay_d_ps + 1800;
+ if (delay_c_ps <= delay_a_ps)
+ delay_e_ps = 0;
+ else
+ delay_e_ps =
+ cycletime_ps * div_roundup(delay_c_ps - delay_a_ps,
+ cycletime_ps);
+
+ delay_e_over_cycle_ps = delay_e_ps % (2 * halfcycle_ps(info));
+ delay_e_cycles = delay_e_ps / (2 * halfcycle_ps(info));
+ delay_f_cycles =
+ div_roundup(2500 - delay_e_over_cycle_ps,
+ 2 * halfcycle_ps(info));
+ if (delay_f_cycles > delay_e_cycles) {
+ info->delay46_ps[channel] = delay_e_ps;
+ delay_e_cycles = 0;
+ } else {
+ info->delay46_ps[channel] =
+ delay_e_over_cycle_ps +
+ 2 * halfcycle_ps(info) * delay_f_cycles;
+ delay_e_cycles -= delay_f_cycles;
+ }
+
+ if (info->delay46_ps[channel] < 2500) {
+ info->delay46_ps[channel] = 2500;
+ info->reg2ca9_bit0 = 1;
+ }
+ delay_b_ps = halfcycle_ps(info) + delay_c_ps;
+ if (delay_b_ps <= delay_a_ps)
+ delay_b_ps = 0;
+ else
+ delay_b_ps -= delay_a_ps;
+ info->delay54_ps[channel] =
+ cycletime_ps * div_roundup(delay_b_ps,
+ cycletime_ps) -
+ 2 * halfcycle_ps(info) * delay_e_cycles;
+ if (info->delay54_ps[channel] < 2500)
+ info->delay54_ps[channel] = 2500;
+ info->reg274265[channel][0] = delay_e_cycles;
+ if (delay_d_ps + 7 * halfcycle_ps(info) <=
+ 24 * halfcycle_ps(info))
+ info->reg274265[channel][1] = 0;
+ else
+ info->reg274265[channel][1] =
+ div_roundup(delay_d_ps + 7 * halfcycle_ps(info),
+ 4 * halfcycle_ps(info)) - 6;
+ write_mchbar32((channel << 10) + 0x274,
+ info->reg274265[channel][1] | (info->
+ reg274265[channel]
+ [0] << 16));
+ info->reg274265[channel][2] =
+ div_roundup(delay_c_ps + 3 * fsbcycle_ps(info),
+ 4 * halfcycle_ps(info)) + 1;
+ write_mchbar16((channel << 10) + 0x265,
+ info->reg274265[channel][2] << 8);
+ }
+ if (info->reg2ca9_bit0)
+ write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) | 1);
+ else
+ write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) & ~1);
+}
+
+static void restore_274265(struct raminfo *info)
+{
+ int channel;
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar32((channel << 10) + 0x274,
+ (info->reg274265[channel][0] << 16) | info->
+ reg274265[channel][1]);
+ write_mchbar16((channel << 10) + 0x265,
+ info->reg274265[channel][2] << 8);
+ }
+ if (info->reg2ca9_bit0)
+ write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) | 1);
+ else
+ write_mchbar8(0x2ca9, read_mchbar8(0x2ca9) & ~1);
+}
+
+#if REAL
+static void dmi_setup(void)
+{
+ gav(read8(DEFAULT_DMIBAR | 0x254));
+ write8(DEFAULT_DMIBAR | 0x254, 0x1);
+ write16(DEFAULT_DMIBAR | 0x1b8, 0x18f2);
+ read_mchbar16(0x48);
+ write_mchbar16(0x48, 0x2);
+
+ write32(DEFAULT_DMIBAR | 0xd68, read32(DEFAULT_DMIBAR | 0xd68) | 0x08000000);
+
+ outl((gav(inl(DEFAULT_GPIOBASE | 0x38)) & ~0x140000) | 0x400000,
+ DEFAULT_GPIOBASE | 0x38);
+ gav(inb(DEFAULT_GPIOBASE | 0xe)); // = 0xfdcaff6e
+}
+#endif
+
+static void
+set_fsb_frequency (void)
+{
+ u8 block[5];
+ u16 fsbfreq = 62879;
+ smbus_block_read(0x69, 0, 5, block);
+ block[0] = fsbfreq;
+ block[1] = fsbfreq >> 8;
+
+ smbus_block_write(0x69, 0, 5, block);
+}
+
+#if REAL
+void raminit(const int s3resume)
+#else
+void raminit(int s3resume)
+#endif
+{
+ unsigned channel, slot, lane, rank;
+ int i;
+ struct raminfo info;
+
+#if !REAL
+ pre_raminit1();
+#endif
+
+ if (s3resume) {
+ read_mchbar32(0x1e8);
+ write_mchbar32(0x1e8, 0x6);
+ read_mchbar32(0x1e8);
+ write_mchbar32(0x1e8, 0x4);
+ }
+
+#if !REAL
+ pre_raminit_2();
+#endif
+ u8 x2ca8;
+
+ gav(x2ca8 = read_mchbar8(0x2ca8));
+ if ((x2ca8 & 1) || (x2ca8 == 8 && !s3resume)) {
+ printk(BIOS_DEBUG, "soft reset detected, rebooting properly\n");
+ write_mchbar8(0x2ca8, 0);
+ outb(0xe, 0xcf9);
+#if REAL
+ while (1) {
+ asm volatile ("hlt");
+ }
+#else
+ printf("CP5\n");
+ exit(0);
+#endif
+ }
+#if !REAL
+ if (!s3resume) {
+ pre_raminit_3(x2ca8);
+ }
+#endif
+
+#if !REAL
+ pre_raminit_4a();
+#endif
+
+ dmi_setup();
+
+ write_mchbar16(0x1170, 0xa880);
+ write_mchbar8(0x11c1, 0x1);
+ write_mchbar16(0x1170, 0xb880);
+ read_mchbar8(0x1210);
+ write_mchbar8(0x1210, 0x84);
+ pcie_read_config8(NORTHBRIDGE, D0F0_GGC); // = 0x52
+ pcie_write_config8(NORTHBRIDGE, D0F0_GGC, 0x2);
+ pcie_read_config8(NORTHBRIDGE, D0F0_GGC); // = 0x2
+ pcie_write_config8(NORTHBRIDGE, D0F0_GGC, 0x52);
+ pcie_read_config16(NORTHBRIDGE, D0F0_GGC); // = 0xb52
+
+ pcie_write_config16(NORTHBRIDGE, D0F0_GGC, 0xb52);
+
+ u16 deven;
+ deven = pcie_read_config16(NORTHBRIDGE, D0F0_DEVEN); // = 0x3
+
+ if (deven & 8) {
+ write_mchbar8(0x2c30, 0x20);
+ pcie_read_config8(NORTHBRIDGE, 0x8); // = 0x18
+ write_mchbar16(0x2c30, read_mchbar16(0x2c30) | 0x200);
+ write_mchbar16(0x2c32, 0x434);
+ read_mchbar32(0x2c44);
+ write_mchbar32(0x2c44, 0x1053687);
+ pcie_read_config8(GMA, 0x62); // = 0x2
+ pcie_write_config8(GMA, 0x62, 0x2);
+ read8(DEFAULT_RCBA | 0x2318);
+ write8(DEFAULT_RCBA | 0x2318, 0x47);
+ read8(DEFAULT_RCBA | 0x2320);
+ write8(DEFAULT_RCBA | 0x2320, 0xfc);
+ }
+
+ read_mchbar32(0x30);
+ write_mchbar32(0x30, 0x40);
+
+ pcie_read_config8(SOUTHBRIDGE, 0x8); // = 0x6
+ pcie_read_config16(NORTHBRIDGE, D0F0_GGC); // = 0xb52
+ pcie_write_config16(NORTHBRIDGE, D0F0_GGC, 0xb50);
+ gav(read32(DEFAULT_RCBA | 0x3428));
+ write32(DEFAULT_RCBA | 0x3428, 0x1d);
+
+#if !REAL
+ pre_raminit_5(s3resume);
+#else
+ set_fsb_frequency();
+#endif
+
+ memset(&info, 0x5a, sizeof(info));
+
+ info.last_500_command[0] = 0;
+ info.last_500_command[1] = 0;
+
+ info.fsb_frequency = 135 * 2;
+ info.board_lane_delay[0] = 0x14;
+ info.board_lane_delay[1] = 0x07;
+ info.board_lane_delay[2] = 0x07;
+ info.board_lane_delay[3] = 0x08;
+ info.board_lane_delay[4] = 0x56;
+ info.board_lane_delay[5] = 0x04;
+ info.board_lane_delay[6] = 0x04;
+ info.board_lane_delay[7] = 0x05;
+ info.board_lane_delay[8] = 0x10;
+
+ info.training.reg_178 = 0;
+ info.training.reg_10b = 0;
+
+ info.heci_bar = 0;
+ info.memory_reserved_for_heci_mb = 0;
+
+ /* before SPD */
+ timestamp_add_now(101);
+
+ if (!s3resume || REAL) {
+ pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2); // = 0x80
+
+ collect_system_info(&info);
+
+#if REAL
+ /* Enable SMBUS. */
+ enable_smbus();
+#endif
+
+ memset(&info.populated_ranks, 0, sizeof(info.populated_ranks));
+
+ info.use_ecc = 1;
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_CHANNELS; slot++) {
+ int v;
+ int try;
+ int addr;
+ const u8 useful_addresses[] = {
+ DEVICE_TYPE,
+ MODULE_TYPE,
+ DENSITY,
+ RANKS_AND_DQ,
+ MEMORY_BUS_WIDTH,
+ TIMEBASE_DIVIDEND,
+ TIMEBASE_DIVISOR,
+ CYCLETIME,
+ CAS_LATENCIES_LSB,
+ CAS_LATENCIES_MSB,
+ CAS_LATENCY_TIME,
+ 0x11, 0x12, 0x13, 0x14, 0x15,
+ 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b,
+ 0x1c, 0x1d,
+ THERMAL_AND_REFRESH,
+ 0x20,
+ REFERENCE_RAW_CARD_USED,
+ RANK1_ADDRESS_MAPPING,
+ 0x75, 0x76, 0x77, 0x78,
+ 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
+ 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84,
+ 0x85, 0x86, 0x87, 0x88,
+ 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
+ 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
+ 0x95
+ };
+ if (slot)
+ continue;
+ for (try = 0; try < 5; try++) {
+ v = smbus_read_byte(0x50 + channel,
+ DEVICE_TYPE);
+ if (v >= 0)
+ break;
+ }
+ if (v < 0)
+ continue;
+ for (addr = 0;
+ addr <
+ sizeof(useful_addresses) /
+ sizeof(useful_addresses[0]); addr++)
+ gav(info.
+ spd[channel][0][useful_addresses
+ [addr]] =
+ smbus_read_byte(0x50 + channel,
+ useful_addresses
+ [addr]));
+ if (info.spd[channel][0][DEVICE_TYPE] != 11)
+ die("Only DDR3 is supported");
+
+ v = info.spd[channel][0][RANKS_AND_DQ];
+ info.populated_ranks[channel][0][0] = 1;
+ info.populated_ranks[channel][0][1] =
+ ((v >> 3) & 7);
+ if (((v >> 3) & 7) > 1)
+ die("At most 2 ranks are supported");
+ if ((v & 7) == 0 || (v & 7) > 2)
+ die("Only x8 and x16 modules are supported");
+ if ((info.
+ spd[channel][slot][MODULE_TYPE] & 0xF) != 2
+ && (info.
+ spd[channel][slot][MODULE_TYPE] & 0xF)
+ != 3)
+ die("Registered memory is not supported");
+ info.is_x16_module[channel][0] = (v & 7) - 1;
+ info.density[channel][slot] =
+ info.spd[channel][slot][DENSITY] & 0xF;
+ if (!
+ (info.
+ spd[channel][slot][MEMORY_BUS_WIDTH] &
+ 0x18))
+ info.use_ecc = 0;
+ }
+
+ gav(0x55);
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ int v = 0;
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ v |= info.
+ populated_ranks[channel][slot][rank]
+ << (2 * slot + rank);
+ info.populated_ranks_mask[channel] = v;
+ }
+
+ gav(0x55);
+
+ gav(pcie_read_config32(NORTHBRIDGE, D0F0_CAPID0 + 4));
+ }
+
+ /* after SPD */
+ timestamp_add_now(102);
+
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) & 0xfc);
+#if !REAL
+ rdmsr (MTRRphysMask_MSR (3));
+#endif
+
+ collect_system_info(&info);
+ calculate_timings(&info);
+
+#if !REAL
+ pcie_write_config8(NORTHBRIDGE, 0xdf, 0x82);
+#endif
+
+ if (!s3resume) {
+ u8 reg8 = pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2);
+ if (x2ca8 == 0 && (reg8 & 0x80)) {
+ /* Don't enable S4-assertion stretch. Makes trouble on roda/rk9.
+ reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4);
+ pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08);
+ */
+
+ /* Clear bit7. */
+
+ pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
+ (reg8 & ~(1 << 7)));
+
+ printk(BIOS_INFO,
+ "Interrupted RAM init, reset required.\n");
+ outb(0x6, 0xcf9);
+#if REAL
+ while (1) {
+ asm volatile ("hlt");
+ }
+#endif
+ }
+ }
+#if !REAL
+ gav(read_mchbar8(0x2ca8)); ///!!!!
+#endif
+
+ if (!s3resume && x2ca8 == 0)
+ pcie_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
+ pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2) | 0x80);
+
+ compute_derived_timings(&info);
+
+ if (x2ca8 == 0) {
+ gav(read_mchbar8(0x164));
+ write_mchbar8(0x164, 0x26);
+ write_mchbar16(0x2c20, 0x10);
+ }
+
+ write_mchbar32(0x18b4, read_mchbar32(0x18b4) | 0x210000); /* OK */
+ write_mchbar32(0x1890, read_mchbar32(0x1890) | 0x2000000); /* OK */
+ write_mchbar32(0x18b4, read_mchbar32(0x18b4) | 0x8000);
+
+ gav(pcie_read_config32(PCI_DEV(0xff, 2, 1), 0x50)); // !!!!
+ pcie_write_config8(PCI_DEV(0xff, 2, 1), 0x54, 0x12);
+
+ gav(read_mchbar16(0x2c10)); // !!!!
+ write_mchbar16(0x2c10, 0x412);
+ gav(read_mchbar16(0x2c10)); // !!!!
+ write_mchbar16(0x2c12, read_mchbar16(0x2c12) | 0x100); /* OK */
+
+ gav(read_mchbar8(0x2ca8)); // !!!!
+ write_mchbar32(0x1804,
+ (read_mchbar32(0x1804) & 0xfffffffc) | 0x8400080);
+
+ pcie_read_config32(PCI_DEV(0xff, 2, 1), 0x6c); // !!!!
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0x6c, 0x40a0a0);
+ gav(read_mchbar32(0x1c04)); // !!!!
+ gav(read_mchbar32(0x1804)); // !!!!
+
+ if (x2ca8 == 0) {
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1);
+ }
+
+ write_mchbar32(0x18d8, 0x120000);
+ write_mchbar32(0x18dc, 0x30a484a);
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xe0, 0x0);
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xf4, 0x9444a);
+ write_mchbar32(0x18d8, 0x40000);
+ write_mchbar32(0x18dc, 0xb000000);
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xe0, 0x60000);
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xf4, 0x0);
+ write_mchbar32(0x18d8, 0x180000);
+ write_mchbar32(0x18dc, 0xc0000142);
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xe0, 0x20000);
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xf4, 0x142);
+ write_mchbar32(0x18d8, 0x1e0000);
+
+ gav(read_mchbar32(0x18dc)); // !!!!
+ write_mchbar32(0x18dc, 0x3);
+ gav(read_mchbar32(0x18dc)); // !!!!
+
+ if (x2ca8 == 0) {
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1); // guess
+ }
+
+ write_mchbar32(0x188c, 0x20bc09);
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0xd0, 0x40b0c09);
+ write_mchbar32(0x1a10, 0x4200010e);
+ write_mchbar32(0x18b8, read_mchbar32(0x18b8) | 0x200);
+ gav(read_mchbar32(0x1918)); // !!!!
+ write_mchbar32(0x1918, 0x332);
+
+ gav(read_mchbar32(0x18b8)); // !!!!
+ write_mchbar32(0x18b8, 0xe00);
+ gav(read_mchbar32(0x182c)); // !!!!
+ write_mchbar32(0x182c, 0x10202);
+ gav(pcie_read_config32(PCI_DEV(0xff, 2, 1), 0x94)); // !!!!
+ pcie_write_config32(PCI_DEV(0xff, 2, 1), 0x94, 0x10202);
+ write_mchbar32(0x1a1c, read_mchbar32(0x1a1c) & 0x8fffffff);
+ write_mchbar32(0x1a70, read_mchbar32(0x1a70) | 0x100000);
+
+ write_mchbar32(0x18b4, read_mchbar32(0x18b4) & 0xffff7fff);
+ gav(read_mchbar32(0x1a68)); // !!!!
+ write_mchbar32(0x1a68, 0x343800);
+ gav(read_mchbar32(0x1e68)); // !!!!
+ gav(read_mchbar32(0x1a68)); // !!!!
+
+ if (x2ca8 == 0) {
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1); // guess
+ }
+
+ pcie_read_config32(PCI_DEV(0xff, 2, 0), 0x048); // !!!!
+ pcie_write_config32(PCI_DEV(0xff, 2, 0), 0x048, 0x140000);
+ pcie_read_config32(PCI_DEV(0xff, 2, 0), 0x058); // !!!!
+ pcie_write_config32(PCI_DEV(0xff, 2, 0), 0x058, 0x64555);
+ pcie_read_config32(PCI_DEV(0xff, 2, 0), 0x058); // !!!!
+ pcie_read_config32(PCI_DEV (0xff, 0, 0), 0xd0); // !!!!
+ pcie_write_config32(PCI_DEV (0xff, 0, 0), 0xd0, 0x180);
+ gav(read_mchbar32(0x1af0)); // !!!!
+ gav(read_mchbar32(0x1af0)); // !!!!
+ write_mchbar32(0x1af0, 0x1f020003);
+ gav(read_mchbar32(0x1af0)); // !!!!
+
+ if (((x2ca8 == 0))) {
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) | 1); // guess
+ }
+
+ gav(read_mchbar32(0x1890)); // !!!!
+ write_mchbar32(0x1890, 0x80102);
+ gav(read_mchbar32(0x18b4)); // !!!!
+ write_mchbar32(0x18b4, 0x216000);
+ write_mchbar32(0x18a4, 0x22222222);
+ write_mchbar32(0x18a8, 0x22222222);
+ write_mchbar32(0x18ac, 0x22222);
+
+ udelay(1000);
+
+ if (x2ca8 == 0) {
+ if (s3resume) {
+#if REAL && 0
+ info.reg2ca9_bit0 = 0;
+ info.reg274265[0][0] = 5;
+ info.reg274265[0][1] = 5;
+ info.reg274265[0][2] = 0xe;
+ info.reg274265[1][0] = 5;
+ info.reg274265[1][1] = 5;
+ info.reg274265[1][2] = 0xe;
+ info.delay46_ps[0] = 0xa86;
+ info.delay46_ps[1] = 0xa86;
+ info.delay54_ps[0] = 0xdc6;
+ info.delay54_ps[1] = 0xdc6;
+#else
+ info.reg2ca9_bit0 = 0;
+ info.reg274265[0][0] = 3;
+ info.reg274265[0][1] = 5;
+ info.reg274265[0][2] = 0xd;
+ info.reg274265[1][0] = 4;
+ info.reg274265[1][1] = 5;
+ info.reg274265[1][2] = 0xd;
+ info.delay46_ps[0] = 0x110a;
+ info.delay46_ps[1] = 0xb58;
+ info.delay54_ps[0] = 0x144a;
+ info.delay54_ps[1] = 0xe98;
+#endif
+ restore_274265(&info);
+ } else
+ set_274265(&info);
+ int j;
+ printk(BIOS_DEBUG, "reg2ca9_bit0 = %x\n", info.reg2ca9_bit0);
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 3; j++)
+ printk(BIOS_DEBUG, "reg274265[%d][%d] = %x\n",
+ i, j, info.reg274265[i][j]);
+ for (i = 0; i < 2; i++)
+ printk(BIOS_DEBUG, "delay46_ps[%d] = %x\n", i,
+ info.delay46_ps[i]);
+ for (i = 0; i < 2; i++)
+ printk(BIOS_DEBUG, "delay54_ps[%d] = %x\n", i,
+ info.delay54_ps[i]);
+
+ set_2dxx_series(&info);
+
+ if (!(deven & 8)) {
+ read_mchbar32(0x2cb0);
+ write_mchbar32(0x2cb0, 0x40);
+ }
+
+ udelay(1000);
+
+ if (deven & 8) {
+ write_mchbar32(0xff8, 0x1800 | read_mchbar32(0xff8));
+ read_mchbar32(0x2cb0);
+ write_mchbar32(0x2cb0, 0x00);
+ pcie_read_config8(PCI_DEV (0, 0x2, 0x0), 0x4c);
+ pcie_read_config8(PCI_DEV (0, 0x2, 0x0), 0x4c);
+ pcie_read_config8(PCI_DEV (0, 0x2, 0x0), 0x4e);
+
+ read_mchbar8(0x1150);
+ read_mchbar8(0x1151);
+ read_mchbar8(0x1022);
+ read_mchbar8(0x16d0);
+ write_mchbar32(0x1300, 0x60606060);
+ write_mchbar32(0x1304, 0x60606060);
+ write_mchbar32(0x1308, 0x78797a7b);
+ write_mchbar32(0x130c, 0x7c7d7e7f);
+ write_mchbar32(0x1310, 0x60606060);
+ write_mchbar32(0x1314, 0x60606060);
+ write_mchbar32(0x1318, 0x60606060);
+ write_mchbar32(0x131c, 0x60606060);
+ write_mchbar32(0x1320, 0x50515253);
+ write_mchbar32(0x1324, 0x54555657);
+ write_mchbar32(0x1328, 0x58595a5b);
+ write_mchbar32(0x132c, 0x5c5d5e5f);
+ write_mchbar32(0x1330, 0x40414243);
+ write_mchbar32(0x1334, 0x44454647);
+ write_mchbar32(0x1338, 0x48494a4b);
+ write_mchbar32(0x133c, 0x4c4d4e4f);
+ write_mchbar32(0x1340, 0x30313233);
+ write_mchbar32(0x1344, 0x34353637);
+ write_mchbar32(0x1348, 0x38393a3b);
+ write_mchbar32(0x134c, 0x3c3d3e3f);
+ write_mchbar32(0x1350, 0x20212223);
+ write_mchbar32(0x1354, 0x24252627);
+ write_mchbar32(0x1358, 0x28292a2b);
+ write_mchbar32(0x135c, 0x2c2d2e2f);
+ write_mchbar32(0x1360, 0x10111213);
+ write_mchbar32(0x1364, 0x14151617);
+ write_mchbar32(0x1368, 0x18191a1b);
+ write_mchbar32(0x136c, 0x1c1d1e1f);
+ write_mchbar32(0x1370, 0x10203);
+ write_mchbar32(0x1374, 0x4050607);
+ write_mchbar32(0x1378, 0x8090a0b);
+ write_mchbar32(0x137c, 0xc0d0e0f);
+ write_mchbar8(0x11cc, 0x4e);
+ write_mchbar32(0x1110, 0x73970404);
+ write_mchbar32(0x1114, 0x72960404);
+ write_mchbar32(0x1118, 0x6f950404);
+ write_mchbar32(0x111c, 0x6d940404);
+ write_mchbar32(0x1120, 0x6a930404);
+ write_mchbar32(0x1124, 0x68a41404);
+ write_mchbar32(0x1128, 0x66a21404);
+ write_mchbar32(0x112c, 0x63a01404);
+ write_mchbar32(0x1130, 0x609e1404);
+ write_mchbar32(0x1134, 0x5f9c1404);
+ write_mchbar32(0x1138, 0x5c961404);
+ write_mchbar32(0x113c, 0x58a02404);
+ write_mchbar32(0x1140, 0x54942404);
+ write_mchbar32(0x1190, 0x900080a);
+ write_mchbar16(0x11c0, 0xc40b);
+ write_mchbar16(0x11c2, 0x303);
+ write_mchbar16(0x11c4, 0x301);
+ read_mchbar32(0x1190);
+ write_mchbar32(0x1190, 0x8900080a);
+ write_mchbar32(0x11b8, 0x70c3000);
+ write_mchbar8(0x11ec, 0xa);
+ write_mchbar16(0x1100, 0x800);
+ read_mchbar32(0x11bc);
+ write_mchbar32(0x11bc, 0x1e84800);
+ write_mchbar16(0x11ca, 0xfa);
+ write_mchbar32(0x11e4, 0x4e20);
+ write_mchbar8(0x11bc, 0xf);
+ write_mchbar16(0x11da, 0x19);
+ write_mchbar16(0x11ba, 0x470c);
+ write_mchbar32(0x1680, 0xe6ffe4ff);
+ write_mchbar32(0x1684, 0xdeffdaff);
+ write_mchbar32(0x1688, 0xd4ffd0ff);
+ write_mchbar32(0x168c, 0xccffc6ff);
+ write_mchbar32(0x1690, 0xc0ffbeff);
+ write_mchbar32(0x1694, 0xb8ffb0ff);
+ write_mchbar32(0x1698, 0xa8ff0000);
+ write_mchbar32(0x169c, 0xc00);
+ write_mchbar32(0x1290, 0x5000000);
+ }
+
+ write_mchbar32(0x124c, 0x15040d00);
+ write_mchbar32(0x1250, 0x7f0000);
+ write_mchbar32(0x1254, 0x1e220004);
+ write_mchbar32(0x1258, 0x4000004);
+ write_mchbar32(0x1278, 0x0);
+ write_mchbar32(0x125c, 0x0);
+ write_mchbar32(0x1260, 0x0);
+ write_mchbar32(0x1264, 0x0);
+ write_mchbar32(0x1268, 0x0);
+ write_mchbar32(0x126c, 0x0);
+ write_mchbar32(0x1270, 0x0);
+ write_mchbar32(0x1274, 0x0);
+ }
+
+ if ((deven & 8) && x2ca8 == 0) {
+ write_mchbar16(0x1214, 0x320);
+ write_mchbar32(0x1600, 0x40000000);
+ read_mchbar32(0x11f4);
+ write_mchbar32(0x11f4, 0x10000000);
+ read_mchbar16(0x1230);
+ write_mchbar16(0x1230, 0x8000);
+ write_mchbar32(0x1400, 0x13040020);
+ write_mchbar32(0x1404, 0xe090120);
+ write_mchbar32(0x1408, 0x5120220);
+ write_mchbar32(0x140c, 0x5120330);
+ write_mchbar32(0x1410, 0xe090220);
+ write_mchbar32(0x1414, 0x1010001);
+ write_mchbar32(0x1418, 0x1110000);
+ write_mchbar32(0x141c, 0x9020020);
+ write_mchbar32(0x1420, 0xd090220);
+ write_mchbar32(0x1424, 0x2090220);
+ write_mchbar32(0x1428, 0x2090330);
+ write_mchbar32(0x142c, 0xd090220);
+ write_mchbar32(0x1430, 0x1010001);
+ write_mchbar32(0x1434, 0x1110000);
+ write_mchbar32(0x1438, 0x11040020);
+ write_mchbar32(0x143c, 0x4030220);
+ write_mchbar32(0x1440, 0x1060220);
+ write_mchbar32(0x1444, 0x1060330);
+ write_mchbar32(0x1448, 0x4030220);
+ write_mchbar32(0x144c, 0x1010001);
+ write_mchbar32(0x1450, 0x1110000);
+ write_mchbar32(0x1454, 0x4010020);
+ write_mchbar32(0x1458, 0xb090220);
+ write_mchbar32(0x145c, 0x1090220);
+ write_mchbar32(0x1460, 0x1090330);
+ write_mchbar32(0x1464, 0xb090220);
+ write_mchbar32(0x1468, 0x1010001);
+ write_mchbar32(0x146c, 0x1110000);
+ write_mchbar32(0x1470, 0xf040020);
+ write_mchbar32(0x1474, 0xa090220);
+ write_mchbar32(0x1478, 0x1120220);
+ write_mchbar32(0x147c, 0x1120330);
+ write_mchbar32(0x1480, 0xa090220);
+ write_mchbar32(0x1484, 0x1010001);
+ write_mchbar32(0x1488, 0x1110000);
+ write_mchbar32(0x148c, 0x7020020);
+ write_mchbar32(0x1490, 0x1010220);
+ write_mchbar32(0x1494, 0x10210);
+ write_mchbar32(0x1498, 0x10320);
+ write_mchbar32(0x149c, 0x1010220);
+ write_mchbar32(0x14a0, 0x1010001);
+ write_mchbar32(0x14a4, 0x1110000);
+ write_mchbar32(0x14a8, 0xd040020);
+ write_mchbar32(0x14ac, 0x8090220);
+ write_mchbar32(0x14b0, 0x1111310);
+ write_mchbar32(0x14b4, 0x1111420);
+ write_mchbar32(0x14b8, 0x8090220);
+ write_mchbar32(0x14bc, 0x1010001);
+ write_mchbar32(0x14c0, 0x1110000);
+ write_mchbar32(0x14c4, 0x3010020);
+ write_mchbar32(0x14c8, 0x7090220);
+ write_mchbar32(0x14cc, 0x1081310);
+ write_mchbar32(0x14d0, 0x1081420);
+ write_mchbar32(0x14d4, 0x7090220);
+ write_mchbar32(0x14d8, 0x1010001);
+ write_mchbar32(0x14dc, 0x1110000);
+ write_mchbar32(0x14e0, 0xb040020);
+ write_mchbar32(0x14e4, 0x2030220);
+ write_mchbar32(0x14e8, 0x1051310);
+ write_mchbar32(0x14ec, 0x1051420);
+ write_mchbar32(0x14f0, 0x2030220);
+ write_mchbar32(0x14f4, 0x1010001);
+ write_mchbar32(0x14f8, 0x1110000);
+ write_mchbar32(0x14fc, 0x5020020);
+ write_mchbar32(0x1500, 0x5090220);
+ write_mchbar32(0x1504, 0x2071310);
+ write_mchbar32(0x1508, 0x2071420);
+ write_mchbar32(0x150c, 0x5090220);
+ write_mchbar32(0x1510, 0x1010001);
+ write_mchbar32(0x1514, 0x1110000);
+ write_mchbar32(0x1518, 0x7040120);
+ write_mchbar32(0x151c, 0x2090220);
+ write_mchbar32(0x1520, 0x70b1210);
+ write_mchbar32(0x1524, 0x70b1310);
+ write_mchbar32(0x1528, 0x2090220);
+ write_mchbar32(0x152c, 0x1010001);
+ write_mchbar32(0x1530, 0x1110000);
+ write_mchbar32(0x1534, 0x1010110);
+ write_mchbar32(0x1538, 0x1081310);
+ write_mchbar32(0x153c, 0x5041200);
+ write_mchbar32(0x1540, 0x5041310);
+ write_mchbar32(0x1544, 0x1081310);
+ write_mchbar32(0x1548, 0x1010001);
+ write_mchbar32(0x154c, 0x1110000);
+ write_mchbar32(0x1550, 0x1040120);
+ write_mchbar32(0x1554, 0x4051210);
+ write_mchbar32(0x1558, 0xd051200);
+ write_mchbar32(0x155c, 0xd051200);
+ write_mchbar32(0x1560, 0x4051210);
+ write_mchbar32(0x1564, 0x1010001);
+ write_mchbar32(0x1568, 0x1110000);
+ write_mchbar16(0x1222, 0x220a);
+ write_mchbar16(0x123c, 0x1fc0);
+ write_mchbar16(0x1220, 0x1388);
+ }
+
+ read_mchbar32(0x2c80); // !!!!
+ write_mchbar32(0x2c80, 0x1053688);
+ read_mchbar32(0x1c04); // !!!!
+ write_mchbar32(0x1804, 0x406080);
+
+ read_mchbar8(0x2ca8);
+
+ if (x2ca8 == 0) {
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) & ~3);
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8) + 4);
+ write_mchbar32(0x1af0, read_mchbar32(0x1af0) | 0x10);
+#if REAL
+ while (1) {
+ asm volatile ("hlt");
+ }
+#else
+ printf("CP5\n");
+ exit(0);
+#endif
+ }
+
+ write_mchbar8(0x2ca8, read_mchbar8(0x2ca8));
+ read_mchbar32(0x2c80); // !!!!
+ write_mchbar32(0x2c80, 0x53688);
+ pcie_write_config32(PCI_DEV (0xff, 0, 0), 0x60, 0x20220);
+ read_mchbar16(0x2c20); // !!!!
+ read_mchbar16(0x2c10); // !!!!
+ read_mchbar16(0x2c00); // !!!!
+ write_mchbar16(0x2c00, 0x8c0);
+ udelay(1000);
+ write_1d0(0, 0x33d, 0, 0);
+ write_500(&info, 0, 0, 0xb61, 0, 0);
+ write_500(&info, 1, 0, 0xb61, 0, 0);
+ write_mchbar32(0x1a30, 0x0);
+ write_mchbar32(0x1a34, 0x0);
+ write_mchbar16(0x614,
+ 0xb5b | (info.populated_ranks[1][0][0] *
+ 0x404) | (info.populated_ranks[0][0][0] *
+ 0xa0));
+ write_mchbar16(0x616, 0x26a);
+ write_mchbar32(0x134, 0x856000);
+ write_mchbar32(0x160, 0x5ffffff);
+ read_mchbar32(0x114); // !!!!
+ write_mchbar32(0x114, 0xc2024440);
+ read_mchbar32(0x118); // !!!!
+ write_mchbar32(0x118, 0x4);
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ write_mchbar32(0x260 + (channel << 10),
+ 0x30809ff |
+ ((info.
+ populated_ranks_mask[channel] & 3) << 20));
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar16(0x31c + (channel << 10), 0x101);
+ write_mchbar16(0x360 + (channel << 10), 0x909);
+ write_mchbar16(0x3a4 + (channel << 10), 0x101);
+ write_mchbar16(0x3e8 + (channel << 10), 0x101);
+ write_mchbar32(0x320 + (channel << 10), 0x29002900);
+ write_mchbar32(0x324 + (channel << 10), 0x0);
+ write_mchbar32(0x368 + (channel << 10), 0x32003200);
+ write_mchbar16(0x352 + (channel << 10), 0x505);
+ write_mchbar16(0x354 + (channel << 10), 0x3c3c);
+ write_mchbar16(0x356 + (channel << 10), 0x1040);
+ write_mchbar16(0x39a + (channel << 10), 0x73e4);
+ write_mchbar16(0x3de + (channel << 10), 0x77ed);
+ write_mchbar16(0x422 + (channel << 10), 0x1040);
+ }
+
+ write_1d0(0x4, 0x151, 4, 1);
+ write_1d0(0, 0x142, 3, 1);
+ rdmsr(0x1ac); // !!!!
+ write_500(&info, 1, 1, 0x6b3, 4, 1);
+ write_500(&info, 1, 1, 0x6cf, 4, 1);
+
+ rmw_1d0(0x21c, 0x38, 0, 6, 1);
+
+ write_1d0(((!info.populated_ranks[1][0][0]) << 1) | ((!info.
+ populated_ranks[0]
+ [0][0]) << 0),
+ 0x1d1, 3, 1);
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar16(0x38e + (channel << 10), 0x5f5f);
+ write_mchbar16(0x3d2 + (channel << 10), 0x5f5f);
+ }
+
+ set_334(0);
+
+ program_base_timings(&info);
+
+ write_mchbar8(0x5ff, read_mchbar8(0x5ff) | 0x80); /* OK */
+
+ write_1d0(0x2, 0x1d5, 2, 1);
+ write_1d0(0x20, 0x166, 7, 1);
+ write_1d0(0x0, 0xeb, 3, 1);
+ write_1d0(0x0, 0xf3, 6, 1);
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (lane = 0; lane < 9; lane++) {
+ u16 addr = 0x125 + get_lane_offset(0, 0, lane);
+ u8 a;
+ a = read_500(&info, channel, addr, 6); // = 0x20040080 //!!!!
+ write_500(&info, channel, a, addr, 6, 1);
+ }
+
+ udelay(1000);
+
+ info.cached_training = get_cached_training();
+
+ if (s3resume) {
+ if (info.cached_training == NULL) {
+ u32 reg32;
+ printk(BIOS_ERR,
+ "Couldn't find training data. Rebooting\n");
+ reg32 = inl(DEFAULT_PMBASE + 0x04);
+ outl(reg32 & ~(7 << 10), DEFAULT_PMBASE + 0x04);
+ outb(0xe, 0xcf9);
+
+#if REAL
+ while (1) {
+ asm volatile ("hlt");
+ }
+#else
+ printf("CP5\n");
+ exit(0);
+#endif
+ }
+ int tm;
+ info.training = *info.cached_training;
+ for (tm = 0; tm < 4; tm++)
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ for (lane = 0; lane < 9; lane++)
+ write_500(&info,
+ channel,
+ info.training.
+ lane_timings
+ [tm][channel]
+ [slot][rank]
+ [lane],
+ get_timing_register_addr
+ (lane, tm,
+ slot, rank),
+ 9, 0);
+ write_1d0(info.cached_training->reg_178, 0x178, 7, 1);
+ write_1d0(info.cached_training->reg_10b, 0x10b, 6, 1);
+ }
+
+ read_mchbar32(0x1f4); // !!!!
+ write_mchbar32(0x1f4, 0x20000);
+ write_mchbar32(0x1f0, 0x1d000200);
+ read_mchbar8(0x1f0); // !!!!
+ write_mchbar8(0x1f0, 0x1);
+ read_mchbar8(0x1f0); // !!!!
+
+ program_board_delay(&info);
+
+ write_mchbar8(0x5ff, 0x0); /* OK */
+ write_mchbar8(0x5ff, 0x80); /* OK */
+ write_mchbar8(0x5f4, 0x1); /* OK */
+
+ write_mchbar32(0x130, read_mchbar32(0x130) & 0xfffffffd); // | 2 when ?
+ while (read_mchbar32(0x130) & 1) ;
+ gav(read_1d0(0x14b, 7)); // = 0x81023100
+ write_1d0(0x30, 0x14b, 7, 1);
+ read_1d0(0xd6, 6); // = 0xfa008080 // !!!!
+ write_1d0(7, 0xd6, 6, 1);
+ read_1d0(0x328, 6); // = 0xfa018080 // !!!!
+ write_1d0(7, 0x328, 6, 1);
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ set_4cf(&info, channel,
+ info.populated_ranks[channel][0][0] ? 8 : 0);
+
+ read_1d0(0x116, 4); // = 0x4040432 // !!!!
+ write_1d0(2, 0x116, 4, 1);
+ read_1d0(0xae, 6); // = 0xe8088080 // !!!!
+ write_1d0(0, 0xae, 6, 1);
+ read_1d0(0x300, 4); // = 0x48088080 // !!!!
+ write_1d0(0, 0x300, 6, 1);
+ read_mchbar16(0x356); // !!!!
+ write_mchbar16(0x356, 0x1040);
+ read_mchbar16(0x756); // !!!!
+ write_mchbar16(0x756, 0x1040);
+ write_mchbar32(0x140, read_mchbar32(0x140) & ~0x07000000);
+ write_mchbar32(0x138, read_mchbar32(0x138) & ~0x07000000);
+ write_mchbar32(0x130, 0x31111301);
+ while (read_mchbar32(0x130) & 1) ;
+
+ {
+ u32 t;
+ u8 val_a1;
+ val_a1 = read_1d0(0xa1, 6); // = 0x1cf4040 // !!!!
+ t = read_1d0(0x2f3, 6); // = 0x10a4040 // !!!!
+ rmw_1d0(0x320, 0x07,
+ (t & 4) | ((t & 8) >> 2) | ((t & 0x10) >> 4), 6, 1);
+ rmw_1d0(0x14b, 0x78,
+ ((((val_a1 >> 2) & 4) | (val_a1 & 8)) >> 2) | (val_a1 &
+ 4), 7,
+ 1);
+ rmw_1d0(0xce, 0x38,
+ ((((val_a1 >> 2) & 4) | (val_a1 & 8)) >> 2) | (val_a1 &
+ 4), 6,
+ 1);
+ }
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ set_4cf(&info, channel,
+ info.populated_ranks[channel][0][0] ? 9 : 1);
+
+ rmw_1d0(0x116, 0xe, 1, 4, 1); // = 0x4040432 // !!!!
+ read_mchbar32(0x144); // !!!!
+ write_1d0(2, 0xae, 6, 1);
+ write_1d0(2, 0x300, 6, 1);
+ write_1d0(2, 0x121, 3, 1);
+ read_1d0(0xd6, 6); // = 0xfa00c0c7 // !!!!
+ write_1d0(4, 0xd6, 6, 1);
+ read_1d0(0x328, 6); // = 0xfa00c0c7 // !!!!
+ write_1d0(4, 0x328, 6, 1);
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ set_4cf(&info, channel,
+ info.populated_ranks[channel][0][0] ? 9 : 0);
+
+ write_mchbar32(0x130,
+ 0x11111301 | (info.
+ populated_ranks[1][0][0] << 30) | (info.
+ populated_ranks
+ [0][0]
+ [0] <<
+ 29));
+ while (read_mchbar8(0x130) & 1) ; // !!!!
+ read_1d0(0xa1, 6); // = 0x1cf4054 // !!!!
+ read_1d0(0x2f3, 6); // = 0x10a4054 // !!!!
+ read_1d0(0x21c, 6); // = 0xafa00c0 // !!!!
+ write_1d0(0, 0x21c, 6, 1);
+ read_1d0(0x14b, 7); // = 0x810231b0 // !!!!
+ write_1d0(0x35, 0x14b, 7, 1);
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ set_4cf(&info, channel,
+ info.populated_ranks[channel][0][0] ? 0xb : 0x2);
+
+ set_334(1);
+
+ write_mchbar8(0x1e8, 0x4); /* OK */
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_500(&info, channel,
+ 0x3 & ~(info.populated_ranks_mask[channel]), 0x6b7, 2,
+ 1);
+ write_500(&info, channel, 0x3, 0x69b, 2, 1);
+ }
+ write_mchbar32(0x2d0, (read_mchbar32(0x2d0) & 0xff2c01ff) | 0x200000); /* OK */
+ write_mchbar16(0x6c0, 0x14a0); /* OK */
+ write_mchbar32(0x6d0, (read_mchbar32(0x6d0) & 0xff0080ff) | 0x8000); /* OK */
+ write_mchbar16(0x232, 0x8);
+ write_mchbar32(0x234, (read_mchbar32(0x234) & 0xfffbfffb) | 0x40004); /* 0x40004 or 0 depending on ? */
+ write_mchbar32(0x34, (read_mchbar32(0x34) & 0xfffffffd) | 5); /* OK */
+ write_mchbar32(0x128, 0x2150d05);
+ write_mchbar8(0x12c, 0x1f); /* OK */
+ write_mchbar8(0x12d, 0x56); /* OK */
+ write_mchbar8(0x12e, 0x31);
+ write_mchbar8(0x12f, 0x0); /* OK */
+ write_mchbar8(0x271, 0x2); /* OK */
+ write_mchbar8(0x671, 0x2); /* OK */
+ write_mchbar8(0x1e8, 0x4); /* OK */
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ write_mchbar32(0x294 + (channel << 10),
+ (info.populated_ranks_mask[channel] & 3) << 16);
+ write_mchbar32(0x134, (read_mchbar32(0x134) & 0xfc01ffff) | 0x10000); /* OK */
+ write_mchbar32(0x134, (read_mchbar32(0x134) & 0xfc85ffff) | 0x850000); /* OK */
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ write_mchbar32(0x260 + (channel << 10),
+ (read_mchbar32(0x260 + (channel << 10)) &
+ ~0xf00000) | 0x8000000 | ((info.
+ populated_ranks_mask
+ [channel] & 3) <<
+ 20));
+
+ if (!s3resume)
+ jedec_init(&info);
+
+ int totalrank = 0;
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ if (info.populated_ranks[channel][slot][rank]) {
+ jedec_read(&info, channel, slot, rank,
+ totalrank, 0xa, 0x400);
+ totalrank++;
+ }
+
+ write_mchbar8(0x12c, 0x9f);
+
+ read_mchbar8(0x271); // 2 // !!!!
+ write_mchbar8(0x271, 0xe);
+ read_mchbar8(0x671); // !!!!
+ write_mchbar8(0x671, 0xe);
+
+ if (!s3resume) {
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar32(0x294 + (channel << 10),
+ (info.
+ populated_ranks_mask[channel] & 3) <<
+ 16);
+ write_mchbar16(0x298 + (channel << 10),
+ (info.
+ populated_ranks[channel][0][0]) | (info.
+ populated_ranks
+ [channel]
+ [0]
+ [1]
+ <<
+ 5));
+ write_mchbar32(0x29c + (channel << 10), 0x77a);
+ }
+ read_mchbar32(0x2c0); /// !!!
+ write_mchbar32(0x2c0, 0x6009cc00);
+
+ {
+ u8 a, b;
+ a = read_mchbar8(0x243); // !!!!
+ b = read_mchbar8(0x643); // !!!!
+ write_mchbar8(0x243, a | 2);
+ write_mchbar8(0x643, b | 2);
+ }
+
+ write_1d0(7, 0x19b, 3, 1);
+ write_1d0(7, 0x1c0, 3, 1);
+ write_1d0(4, 0x1c6, 4, 1);
+ write_1d0(4, 0x1cc, 4, 1);
+ read_1d0(0x151, 4); // = 0x408c6d74 // !!!!
+ write_1d0(4, 0x151, 4, 1);
+ write_mchbar32(0x584, 0xfffff);
+ write_mchbar32(0x984, 0xfffff);
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++)
+ for (slot = 0; slot < NUM_SLOTS; slot++)
+ for (rank = 0; rank < NUM_RANKS; rank++)
+ if (info.
+ populated_ranks[channel][slot]
+ [rank])
+ config_rank(&info, s3resume,
+ channel, slot,
+ rank);
+
+ write_mchbar8(0x243, 0x1);
+ write_mchbar8(0x643, 0x1);
+ }
+
+ /* was == 1 but is common */
+ pcie_write_config16(NORTHBRIDGE, 0xc8, 3);
+ write_26c(0, 0x820);
+ write_26c(1, 0x820);
+ write_mchbar32(0x130, read_mchbar32(0x130) | 2);
+ /* end */
+
+ if (s3resume) {
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar32(0x294 + (channel << 10),
+ (info.
+ populated_ranks_mask[channel] & 3) <<
+ 16);
+ write_mchbar16(0x298 + (channel << 10),
+ (info.
+ populated_ranks[channel][0][0]) | (info.
+ populated_ranks
+ [channel]
+ [0]
+ [1]
+ <<
+ 5));
+ write_mchbar32(0x29c + (channel << 10), 0x77a);
+ }
+ read_mchbar32(0x2c0); /// !!!
+ write_mchbar32(0x2c0, 0x6009cc00);
+ }
+
+ write_mchbar32(0xfa4, read_mchbar32(0xfa4) & ~0x01000002);
+ write_mchbar32(0xfb0, 0x2000e019);
+
+#if !REAL
+ printf("CP16\n");
+#endif
+
+ /* Before training. */
+ timestamp_add_now(103);
+
+ if (!s3resume)
+ ram_training(&info);
+
+ /* After training. */
+ timestamp_add_now (104);
+
+ dump_timings(&info);
+
+#if 0
+ ram_check(0x100000, 0x200000);
+#endif
+ program_modules_memory_map(&info, 0);
+ program_total_memory_map(&info);
+
+ if (info.non_interleaved_part_mb != 0 && info.interleaved_part_mb != 0)
+ write_mchbar8(0x111, 0x20 | (0 << 2) | (1 << 6) | (0 << 7));
+ else if (have_match_ranks(&info, 0, 4) && have_match_ranks(&info, 1, 4))
+ write_mchbar8(0x111, 0x20 | (3 << 2) | (0 << 6) | (1 << 7));
+ else if (have_match_ranks(&info, 0, 2) && have_match_ranks(&info, 1, 2))
+ write_mchbar8(0x111, 0x20 | (3 << 2) | (0 << 6) | (0 << 7));
+ else
+ write_mchbar8(0x111, 0x20 | (3 << 2) | (1 << 6) | (0 << 7));
+
+ write_mchbar32(0xfac, read_mchbar32(0xfac) & ~0x80000000); // OK
+ write_mchbar32(0xfb4, 0x4800); // OK
+ write_mchbar32(0xfb8, (info.revision < 8) ? 0x20 : 0x0); // OK
+ write_mchbar32(0xe94, 0x7ffff); // OK
+ write_mchbar32(0xfc0, 0x80002040); // OK
+ write_mchbar32(0xfc4, 0x701246); // OK
+ write_mchbar8(0xfc8, read_mchbar8(0xfc8) & ~0x70); // OK
+ write_mchbar32(0xe5c, 0x1000000 | read_mchbar32(0xe5c)); // OK
+ write_mchbar32(0x1a70, (read_mchbar32(0x1a70) | 0x00200000) & ~0x00100000); // OK
+ write_mchbar32(0x50, 0x700b0); // OK
+ write_mchbar32(0x3c, 0x10); // OK
+ write_mchbar8(0x1aa8, (read_mchbar8(0x1aa8) & ~0x35) | 0xa); // OK
+ write_mchbar8(0xff4, read_mchbar8(0xff4) | 0x2); // OK
+ write_mchbar32(0xff8, (read_mchbar32(0xff8) & ~0xe008) | 0x1020); // OK
+
+#if REAL
+ write_mchbar32(0xd00, IOMMU_BASE2 | 1);
+ write_mchbar32(0xd40, IOMMU_BASE1 | 1);
+ write_mchbar32(0xdc0, IOMMU_BASE4 | 1);
+
+ write32(IOMMU_BASE1 | 0xffc, 0x80000000);
+ write32(IOMMU_BASE2 | 0xffc, 0xc0000000);
+ write32(IOMMU_BASE4 | 0xffc, 0x80000000);
+
+#else
+ {
+ u32 eax;
+ eax = read32(0xffc + (read_mchbar32(0xd00) & ~1)) | 0x08000000; // = 0xe911714b// OK
+ write32(0xffc + (read_mchbar32(0xd00) & ~1), eax); // OK
+ eax = read32(0xffc + (read_mchbar32(0xdc0) & ~1)) | 0x40000000; // = 0xe911714b// OK
+ write32(0xffc + (read_mchbar32(0xdc0) & ~1), eax); // OK
+ }
+#endif
+
+ {
+ u32 eax;
+
+ eax = info.fsb_frequency / 9;
+ write_mchbar32(0xfcc, (read_mchbar32(0xfcc) & 0xfffc0000) | (eax * 0x280) | (eax * 0x5000) | eax | 0x40000); // OK
+ write_mchbar32(0x20, 0x33001); //OK
+ }
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar32(0x220 + (channel << 10), read_mchbar32(0x220 + (channel << 10)) & ~0x7770); //OK
+ if (info.max_slots_used_in_channel == 1)
+ write_mchbar16(0x237 + (channel << 10), (read_mchbar16(0x237 + (channel << 10)) | 0x0201)); //OK
+ else
+ write_mchbar16(0x237 + (channel << 10), (read_mchbar16(0x237 + (channel << 10)) & ~0x0201)); //OK
+
+ write_mchbar8(0x241 + (channel << 10), read_mchbar8(0x241 + (channel << 10)) | 1); // OK
+
+ if (info.clock_speed_index <= 1
+ && (info.silicon_revision == 2
+ || info.silicon_revision == 3))
+ write_mchbar32(0x248 + (channel << 10), (read_mchbar32(0x248 + (channel << 10)) | 0x00102000)); // OK
+ else
+ write_mchbar32(0x248 + (channel << 10), (read_mchbar32(0x248 + (channel << 10)) & ~0x00102000)); // OK
+ }
+
+ write_mchbar32(0x115, read_mchbar32(0x115) | 0x1000000); // OK
+
+ {
+ u8 al;
+ al = 0xd;
+ if (!(info.silicon_revision == 0 || info.silicon_revision == 1))
+ al += 2;
+ al |= ((1 << (info.max_slots_used_in_channel - 1)) - 1) << 4;
+ write_mchbar32(0x210, (al << 16) | 0x20); // OK
+ }
+
+ for (channel = 0; channel < NUM_CHANNELS; channel++) {
+ write_mchbar32(0x288 + (channel << 10), 0x70605040); // OK
+ write_mchbar32(0x28c + (channel << 10), 0xfffec080); // OK
+ write_mchbar32(0x290 + (channel << 10), 0x282091c | ((info.max_slots_used_in_channel - 1) << 0x16)); // OK
+ }
+ u32 reg1c;
+ pcie_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
+ reg1c = read32(DEFAULT_EPBAR | 0x01c); // = 0x8001 // OK
+ pcie_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
+ write32(DEFAULT_EPBAR | 0x01c, reg1c); // OK
+ read_mchbar8(0xe08); // = 0x0
+ pcie_read_config32(NORTHBRIDGE, 0xe4); // = 0x316126
+ write_mchbar8(0x1210, read_mchbar8(0x1210) | 2); // OK
+ write_mchbar32(0x1200, 0x8800440); // OK
+ write_mchbar32(0x1204, 0x53ff0453); // OK
+ write_mchbar32(0x1208, 0x19002043); // OK
+ write_mchbar16(0x1214, 0x320); // OK
+
+ if (info.revision == 0x10 || info.revision == 0x11) {
+ write_mchbar16(0x1214, 0x220); // OK
+ write_mchbar8(0x1210, read_mchbar8(0x1210) | 0x40); // OK
+ }
+
+ write_mchbar8(0x1214, read_mchbar8(0x1214) | 0x4); // OK
+ write_mchbar8(0x120c, 0x1); // OK
+ write_mchbar8(0x1218, 0x3); // OK
+ write_mchbar8(0x121a, 0x3); // OK
+ write_mchbar8(0x121c, 0x3); // OK
+ write_mchbar16(0xc14, 0x0); // OK
+ write_mchbar16(0xc20, 0x0); // OK
+ write_mchbar32(0x1c, 0x0); // OK
+
+ /* revision dependent here. */
+
+ write_mchbar16(0x1230, read_mchbar16(0x1230) | 0x1f07); // OK
+
+ if (info.uma_enabled)
+ write_mchbar32(0x11f4, read_mchbar32(0x11f4) | 0x10000000); // OK
+
+ write_mchbar16(0x1230, read_mchbar16(0x1230) | 0x8000); // OK
+ write_mchbar8(0x1214, read_mchbar8(0x1214) | 1); // OK
+
+ u8 bl, ebpb;
+ u16 reg_1020;
+
+ reg_1020 = read_mchbar32(0x1020); // = 0x6c733c // OK
+ write_mchbar8(0x1070, 0x1); // OK
+
+ write_mchbar32(0x1000, 0x100); // OK
+ write_mchbar8(0x1007, 0x0); // OK
+
+ if (reg_1020 != 0) {
+ write_mchbar16(0x1018, 0x0); // OK
+ bl = reg_1020 >> 8;
+ ebpb = reg_1020 & 0xff;
+ } else {
+ ebpb = 0;
+ bl = 8;
+ }
+
+ rdmsr(0x1a2);
+
+ write_mchbar32(0x1014, 0xffffffff); // OK
+
+ write_mchbar32(0x1010, ((((ebpb + 0x7d) << 7) / bl) & 0xff) * (! !reg_1020)); // OK
+
+ write_mchbar8(0x101c, 0xb8); // OK
+
+ write_mchbar8(0x123e, (read_mchbar8(0x123e) & 0xf) | 0x60); // OK
+ if (reg_1020 != 0) {
+ write_mchbar32(0x123c, (read_mchbar32(0x123c) & ~0x00900000) | 0x600000); // OK
+ write_mchbar8(0x101c, 0xb8); // OK
+ }
+
+ setup_heci_uma(&info);
+
+ if (info.uma_enabled) {
+ u16 ax;
+ write_mchbar32(0x11b0, read_mchbar32(0x11b0) | 0x4000); // OK
+ write_mchbar32(0x11b4, read_mchbar32(0x11b4) | 0x4000); // OK
+ write_mchbar16(0x1190, read_mchbar16(0x1190) | 0x4000); // OK
+
+ ax = read_mchbar16(0x1190) & 0xf00; // = 0x480a // OK
+ write_mchbar16(0x1170, ax | (read_mchbar16(0x1170) & 0x107f) | 0x4080); // OK
+ write_mchbar16(0x1170, read_mchbar16(0x1170) | 0x1000); // OK
+#if REAL
+ udelay(1000);
+#endif
+ u16 ecx;
+ for (ecx = 0xffff; ecx && (read_mchbar16(0x1170) & 0x1000); ecx--) ; // OK
+ write_mchbar16(0x1190, read_mchbar16(0x1190) & ~0x4000); // OK
+ }
+
+ pcie_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
+ pcie_read_config8(SOUTHBRIDGE, GEN_PMCON_2) & ~0x80);
+ udelay(10000);
+ write_mchbar16(0x2ca8, 0x0);
+
+#if REAL
+ udelay(1000);
+ dump_timings(&info);
+ if (!s3resume)
+ save_timings(&info);
+#endif
+}
+
+#if REAL
+unsigned long get_top_of_ram(void)
+{
+ /* Base of TSEG is top of usable DRAM */
+ u32 tom = pci_read_config32(PCI_DEV(0, 0, 0), TSEG);
+ return (unsigned long)tom;
+}
+#endif
+
+#if !REAL
+int main(void)
+{
+ raminit(0);
+ return 0;
+}
+#endif
OpenPOWER on IntegriCloud