From d28f53509437b2bb242a41d6f572f69c04eebfdd Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Mon, 12 Jan 2015 10:48:44 +0100 Subject: ARM: socfpga: Import sequencer code from generated uboot This patch imports the sequencer code from uboot using the new script scripts/socfpga_get_sequencer. Signed-off-by: Markus Pargmann Signed-off-by: Steffen Trumtrar Signed-off-by: Sascha Hauer --- arch/arm/mach-socfpga/include/mach/sdram_io.h | 58 + arch/arm/mach-socfpga/include/mach/sequencer.c | 6131 ++++++++++++-------- arch/arm/mach-socfpga/include/mach/sequencer.h | 397 +- .../mach-socfpga/include/mach/sequencer_defines.h | 6 + arch/arm/mach-socfpga/include/mach/system.h | 37 + arch/arm/mach-socfpga/include/mach/tclrpt.h | 38 + 6 files changed, 4015 insertions(+), 2652 deletions(-) create mode 100755 arch/arm/mach-socfpga/include/mach/sdram_io.h create mode 100644 arch/arm/mach-socfpga/include/mach/sequencer_defines.h create mode 100755 arch/arm/mach-socfpga/include/mach/system.h create mode 100755 arch/arm/mach-socfpga/include/mach/tclrpt.h (limited to 'arch/arm/mach-socfpga/include/mach') diff --git a/arch/arm/mach-socfpga/include/mach/sdram_io.h b/arch/arm/mach-socfpga/include/mach/sdram_io.h new file mode 100755 index 0000000000..62698000f6 --- /dev/null +++ b/arch/arm/mach-socfpga/include/mach/sdram_io.h @@ -0,0 +1,58 @@ +/* + * Copyright Altera Corporation (C) 2012-2014. All rights reserved + * + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Altera Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#define MGR_SELECT_MASK 0xf8000 + +#define APB_BASE_SCC_MGR SDR_PHYGRP_SCCGRP_ADDRESS +#define APB_BASE_PHY_MGR SDR_PHYGRP_PHYMGRGRP_ADDRESS +#define APB_BASE_RW_MGR SDR_PHYGRP_RWMGRGRP_ADDRESS +#define APB_BASE_DATA_MGR SDR_PHYGRP_DATAMGRGRP_ADDRESS +#define APB_BASE_REG_FILE SDR_PHYGRP_REGFILEGRP_ADDRESS +#define APB_BASE_MMR SDR_CTRLGRP_ADDRESS + +#define __AVL_TO_APB(ADDR) \ + ((((ADDR) & MGR_SELECT_MASK) == (BASE_PHY_MGR)) ? (APB_BASE_PHY_MGR) | (((ADDR) >> (14-6)) & (0x1<<6)) | ((ADDR) & 0x3f) : \ + (((ADDR) & MGR_SELECT_MASK) == (BASE_RW_MGR)) ? (APB_BASE_RW_MGR) | ((ADDR) & 0x1fff) : \ + (((ADDR) & MGR_SELECT_MASK) == (BASE_DATA_MGR)) ? (APB_BASE_DATA_MGR) | ((ADDR) & 0x7ff) : \ + (((ADDR) & MGR_SELECT_MASK) == (BASE_SCC_MGR)) ? (APB_BASE_SCC_MGR) | ((ADDR) & 0xfff) : \ + (((ADDR) & MGR_SELECT_MASK) == (BASE_REG_FILE)) ? (APB_BASE_REG_FILE) | ((ADDR) & 0x7ff) : \ + (((ADDR) & MGR_SELECT_MASK) == (BASE_MMR)) ? (APB_BASE_MMR) | ((ADDR) & 0xfff) : \ + -1) + +#define IOWR_32DIRECT(BASE, OFFSET, DATA) \ + write_register(HPS_SDR_BASE, __AVL_TO_APB((uint32_t)((BASE) + (OFFSET))), DATA) + +#define IORD_32DIRECT(BASE, OFFSET) \ + read_register(HPS_SDR_BASE, __AVL_TO_APB((uint32_t)((BASE) + (OFFSET)))) + #define write_register(BASE, OFFSET, DATA) \ + writel(DATA, ((BASE) + (OFFSET))) + #define read_register(BASE, OFFSET) \ + readl((BASE) + (OFFSET)) + #define HPS_SDR_BASE 0xffc20000 diff --git a/arch/arm/mach-socfpga/include/mach/sequencer.c b/arch/arm/mach-socfpga/include/mach/sequencer.c index 87dc6677ec..259a400db4 100644 --- a/arch/arm/mach-socfpga/include/mach/sequencer.c +++ b/arch/arm/mach-socfpga/include/mach/sequencer.c @@ -1,174 +1,337 @@ /* - * Copyright Altera Corporation (C) 2012-2014. All rights reserved - * - * SPDX-License-Identifier: BSD-3-Clause - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Altera Corporation nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include +* Copyright Altera Corporation (C) 2012-2014. All rights reserved +* +* SPDX-License-Identifier: BSD-3-Clause +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Altera Corporation nor the +* names of its contributors may be used to endorse or promote products +* derived from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "sequencer_defines.h" + +#include "system.h" +#include "sdram_io.h" +#include "sequencer.h" +#include "tclrpt.h" + +/****************************************************************************** + ****************************************************************************** + ** NOTE: Special Rules for Globale Variables ** + ** ** + ** All global variables that are explicitly initialized (including ** + ** explicitly initialized to zero), are only initialized once, during ** + ** configuration time, and not again on reset. This means that they ** + ** preserve their current contents across resets, which is needed for some ** + ** special cases involving communication with external modules. In ** + ** addition, this avoids paying the price to have the memory initialized, ** + ** even for zeroed data, provided it is explicitly set to zero in the code, ** + ** and doesn't rely on implicit initialization. ** + ****************************************************************************** + ******************************************************************************/ + +// Temporary workaround to place the initial stack pointer at a safe offset from end +#define STRINGIFY(s) STRINGIFY_STR(s) +#define STRINGIFY_STR(s) #s +asm(".global __alt_stack_pointer"); +asm("__alt_stack_pointer = " STRINGIFY(STACK_POINTER)); -static void IOWR_32DIRECT(uint32_t base, uint32_t ofs, uint32_t val) -{ - writel(val, CYCLONE5_SDR_ADDRESS + base + ofs); -} +#include -static uint32_t IORD_32DIRECT(uint32_t base, uint32_t ofs) -{ - return readl(CYCLONE5_SDR_ADDRESS + base + ofs); -} +#define NEWVERSION_RDDESKEW 1 +#define NEWVERSION_WRDESKEW 1 +#define NEWVERSION_GW 1 +#define NEWVERSION_WL 1 +#define NEWVERSION_DQSEN 1 -/* Just to make the debugging code more uniform */ -#ifndef RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM -#define RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM 0 -#endif +// Just to make the debugging code more uniform -#if HALF_RATE -#define HALF_RATE_MODE 1 -#else #define HALF_RATE_MODE 0 -#endif -#if QUARTER_RATE -#define QUARTER_RATE_MODE 1 -#else #define QUARTER_RATE_MODE 0 -#endif #define DELTA_D 1 -#define BTFLD_FMT "%x" +// case:56390 +// VFIFO_CONTROL_WIDTH_PER_DQS is the number of VFIFOs actually instantiated per DQS. This is always one except: +// AV QDRII where it is 2 for x18 and x18w2, and 4 for x36 and x36w2 +// RLDRAMII x36 and x36w2 where it is 2. +// In 12.0sp1 we set this to 4 for all of the special cases above to keep it simple. +// In 12.0sp2 or 12.1 this should get moved to generation and unified with the same constant used in the phy mgr + +#define VFIFO_CONTROL_WIDTH_PER_DQS 1 + +// In order to reduce ROM size, most of the selectable calibration steps are +// decided at compile time based on the user's calibration mode selection, +// as captured by the STATIC_CALIB_STEPS selection below. +// +// However, to support simulation-time selection of fast simulation mode, where +// we skip everything except the bare minimum, we need a few of the steps to +// be dynamic. In those cases, we either use the DYNAMIC_CALIB_STEPS for the +// check, which is based on the rtl-supplied value, or we dynamically compute the +// value to use based on the dynamically-chosen calibration mode + +#define BTFLD_FMT "%lx" + +// For HPS running on actual hardware + +#define DLEVEL 0 +#ifdef HPS_HW_SERIAL_SUPPORT +// space around comma is required for varargs macro to remove comma if args is empty +#define DPRINT(level, fmt, args...) if (DLEVEL >= (level)) printf("SEQ.C: " fmt "\n" , ## args) +#define IPRINT(fmt, args...) printf("SEQ.C: " fmt "\n" , ## args) +#else +#define DPRINT(level, fmt, args...) +#define IPRINT(fmt, args...) +#endif +#define BFM_GBL_SET(field,value) +#define BFM_GBL_GET(field) ((long unsigned int)0) +#define BFM_STAGE(stage) +#define BFM_INC_VFIFO +#define COV(label) + +#define TRACE_FUNC(fmt, args...) DPRINT(1, "%s[%d]: " fmt, __func__, __LINE__ , ## args) -#define STATIC_CALIB_STEPS (CALIB_SKIP_FULL_TEST) +#define DYNAMIC_CALIB_STEPS (dyn_calib_steps) -/* calibration steps requested by the rtl */ -static uint16_t dyn_calib_steps; +#define STATIC_IN_RTL_SIM 0 -static uint32_t vfifo_idx; +#define STATIC_SKIP_DELAY_LOOPS 0 -/* - * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option - * instead of static, we use boolean logic to select between - * non-skip and skip values - * - * The mask is set to include all bits when not-skipping, but is - * zero when skipping - */ +#define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | STATIC_SKIP_DELAY_LOOPS) + +// calibration steps requested by the rtl +static uint16_t dyn_calib_steps = 0; -static uint16_t skip_delay_mask; /* mask off bits when skipping/not-skipping */ +// To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option +// instead of static, we use boolean logic to select between +// non-skip and skip values +// +// The mask is set to include all bits when not-skipping, but is +// zero when skipping + +static uint16_t skip_delay_mask = 0; // mask off bits when skipping/not-skipping #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \ ((non_skip_value) & skip_delay_mask) -static gbl_t *gbl; -static param_t *param; +// TODO: The skip group strategy is completely missing -static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, - uint32_t write_group, uint32_t use_dm, - uint32_t all_correct, t_btfld * bit_chk, uint32_t all_ranks); +static gbl_t *gbl = 0; +static param_t *param = 0; -/* - * This (TEST_SIZE) is used to test handling of large roms, to make - * sure we are sizing things correctly - * Note, the initialized data takes up twice the space in rom, since - * there needs to be a copy with the initial value and a copy that is - * written too, since on soft-reset, it needs to have the initial values - * without reloading the memory from external sources - */ - -static void reg_file_set_group(uint32_t set_group) +static uint32_t curr_shadow_reg = 0; + +static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, uint32_t write_group, + uint32_t use_dm, uint32_t all_correct, + t_btfld * bit_chk, uint32_t all_ranks); + +// This (TEST_SIZE) is used to test handling of large roms, to make +// sure we are sizing things correctly +// Note, the initialized data takes up twice the space in rom, since +// there needs to be a copy with the initial value and a copy that is +// written too, since on soft-reset, it needs to have the initial values +// without reloading the memory from external sources + +// #define TEST_SIZE (6*1024) + +#ifdef TEST_SIZE + +#define PRE_POST_TEST_SIZE 3 + +static unsigned int pre_test_size_mem[PRE_POST_TEST_SIZE] = { 1, 2, 3 }; + +static unsigned int test_size_mem[TEST_SIZE / sizeof(unsigned int)] = { 100, 200, 300 }; + +static unsigned int post_test_size_mem[PRE_POST_TEST_SIZE] = { 10, 20, 30 }; + +static void write_test_mem(void) +{ + int i; + + for (i = 0; i < PRE_POST_TEST_SIZE; i++) { + pre_test_size_mem[i] = (i + 1) * 10; + post_test_size_mem[i] = (i + 1); + } + + for (i = 0; i < sizeof(test_size_mem) / sizeof(unsigned int); i++) { + test_size_mem[i] = i; + } + +} + +static int check_test_mem(int start) +{ + int i; + + for (i = 0; i < PRE_POST_TEST_SIZE; i++) { + if (start) { + if (pre_test_size_mem[i] != (i + 1)) { + return 0; + } + if (post_test_size_mem[i] != (i + 1) * 10) { + return 0; + } + } else { + if (pre_test_size_mem[i] != (i + 1) * 10) { + return 0; + } + if (post_test_size_mem[i] != (i + 1)) { + return 0; + } + } + } + + for (i = 0; i < sizeof(test_size_mem) / sizeof(unsigned int); i++) { + if (start) { + if (i < 3) { + if (test_size_mem[i] != (i + 1) * 100) { + return 0; + } + } else { + if (test_size_mem[i] != 0) { + return 0; + } + } + } else { + if (test_size_mem[i] != i) { + return 0; + } + } + } + + return 1; +} + +#endif // TEST_SIZE + +static void set_failing_group_stage(uint32_t group, uint32_t stage, uint32_t substage) +{ + ALTERA_ASSERT(group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); + + // Only set the global stage if there was not been any other failing group + if (gbl->error_stage == CAL_STAGE_NIL) { + gbl->error_substage = substage; + gbl->error_stage = stage; + gbl->error_group = group; + TCLRPT_SET(debug_summary_report->error_sub_stage, substage); + TCLRPT_SET(debug_summary_report->error_stage, stage); + TCLRPT_SET(debug_summary_report->error_group, group); + + } + // Always set the group specific errors + TCLRPT_SET(debug_cal_report->cal_status_per_group[curr_shadow_reg][group].error_stage, + stage); + TCLRPT_SET(debug_cal_report->cal_status_per_group[curr_shadow_reg][group].error_sub_stage, + substage); + +} + +static inline void reg_file_set_group(uint32_t set_group) { - /* Read the current group and stage */ + // Read the current group and stage uint32_t cur_stage_group = IORD_32DIRECT(REG_FILE_CUR_STAGE, 0); - /* Clear the group */ + // Clear the group cur_stage_group &= 0x0000FFFF; - /* Set the group */ + // Set the group cur_stage_group |= (set_group << 16); - /* Write the data back */ + // Write the data back IOWR_32DIRECT(REG_FILE_CUR_STAGE, 0, cur_stage_group); } -static void reg_file_set_stage(uint32_t set_stage) +static inline void reg_file_set_stage(uint32_t set_stage) { - /* Read the current group and stage */ + // Read the current group and stage uint32_t cur_stage_group = IORD_32DIRECT(REG_FILE_CUR_STAGE, 0); - /* Clear the stage and substage */ + // Clear the stage and substage cur_stage_group &= 0xFFFF0000; - /* Set the stage */ + // Set the stage cur_stage_group |= (set_stage & 0x000000FF); - /* Write the data back */ + // Write the data back IOWR_32DIRECT(REG_FILE_CUR_STAGE, 0, cur_stage_group); } -static void reg_file_set_sub_stage(uint32_t set_sub_stage) +static inline void reg_file_set_sub_stage(uint32_t set_sub_stage) { - /* Read the current group and stage */ + // Read the current group and stage uint32_t cur_stage_group = IORD_32DIRECT(REG_FILE_CUR_STAGE, 0); - /* Clear the substage */ + // Clear the substage cur_stage_group &= 0xFFFF00FF; - /* Set the sub stage */ + // Set the sub stage cur_stage_group |= ((set_sub_stage << 8) & 0x0000FF00); - /* Write the data back */ + // Write the data back IOWR_32DIRECT(REG_FILE_CUR_STAGE, 0, cur_stage_group); } +static inline uint32_t is_write_group_enabled_for_dm(uint32_t write_group) +{ + return 1; +} + +static inline void select_curr_shadow_reg_using_rank(uint32_t rank) +{ +} + static void initialize(void) { - /* - * In Hard PHY this is a 2-bit control: - * 0: AFI Mux Select - * 1: DDIO Mux Select - */ + TRACE_FUNC(); + + //USER calibration has control over path to memory + + // In Hard PHY this is a 2-bit control: + // 0: AFI Mux Select + // 1: DDIO Mux Select IOWR_32DIRECT(PHY_MGR_MUX_SEL, 0, 0x3); - /* USER memory clock is not stable we begin initialization */ + //USER memory clock is not stable we begin initialization IOWR_32DIRECT(PHY_MGR_RESET_MEM_STBL, 0, 0); - /* USER calibration status all set to zero */ + //USER calibration status all set to zero IOWR_32DIRECT(PHY_MGR_CAL_STATUS, 0, 0); IOWR_32DIRECT(PHY_MGR_CAL_DEBUG_INFO, 0, 0); - param->read_correct_mask_vg = ((t_btfld)1 << (RW_MGR_MEM_DQ_PER_READ_DQS / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; - param->write_correct_mask_vg = ((t_btfld)1 << (RW_MGR_MEM_DQ_PER_READ_DQS / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; - param->read_correct_mask = ((t_btfld)1 << RW_MGR_MEM_DQ_PER_READ_DQS) - 1; - param->write_correct_mask = ((t_btfld)1 << RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; + if (((DYNAMIC_CALIB_STEPS) & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) { + param->read_correct_mask_vg = + ((t_btfld) 1 << + (RW_MGR_MEM_DQ_PER_READ_DQS / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; + param->write_correct_mask_vg = + ((t_btfld) 1 << + (RW_MGR_MEM_DQ_PER_READ_DQS / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1; + param->read_correct_mask = ((t_btfld) 1 << RW_MGR_MEM_DQ_PER_READ_DQS) - 1; + param->write_correct_mask = ((t_btfld) 1 << RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1; + param->dm_correct_mask = + ((t_btfld) 1 << (RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH)) - 1; + } } -#if DDR3 static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) { uint32_t odt_mask_0 = 0; @@ -176,70 +339,93 @@ static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) uint32_t cs_and_odt_mask; if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) { - if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) { - /* - * 1 Rank - * Read: ODT = 0 - * Write: ODT = 1 - */ + + if (LRDIMM) { + // USER LRDIMMs have two cases to consider: single-slot and dual-slot. + // USER In single-slot, assert ODT for write only. + // USER In dual-slot, assert ODT for both slots for write, + // USER and on the opposite slot only for reads. + // USER + // USER Further complicating this is that both DIMMs have either 1 or 2 ODT + // USER inputs, which do the same thing (only one is actually required). + if ((RW_MGR_MEM_CHIP_SELECT_WIDTH / RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM) == 1) { + // USER Single-slot case + if (RW_MGR_MEM_ODT_WIDTH == 1) { + // USER Read = 0, Write = 1 + odt_mask_0 = 0x0; + odt_mask_1 = 0x1; + } else if (RW_MGR_MEM_ODT_WIDTH == 2) { + // USER Read = 00, Write = 11 + odt_mask_0 = 0x0; + odt_mask_1 = 0x3; + } + } else if ((RW_MGR_MEM_CHIP_SELECT_WIDTH / RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM) + == 2) { + // USER Dual-slot case + if (RW_MGR_MEM_ODT_WIDTH == 2) { + // USER Read: asserted for opposite slot, Write: asserted for both + odt_mask_0 = (rank < 2) ? 0x2 : 0x1; + odt_mask_1 = 0x3; + } else if (RW_MGR_MEM_ODT_WIDTH == 4) { + // USER Read: asserted for opposite slot, Write: asserted for both + odt_mask_0 = (rank < 2) ? 0xC : 0x3; + odt_mask_1 = 0xF; + } + } + } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) { + //USER 1 Rank + //USER Read: ODT = 0 + //USER Write: ODT = 1 odt_mask_0 = 0x0; odt_mask_1 = 0x1; } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) { - /* 2 Ranks */ + //USER 2 Ranks if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1 || - (RDIMM && RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 2 - && RW_MGR_MEM_CHIP_SELECT_WIDTH == 4)) { - /* - Dual-Slot , Single-Rank - * (1 chip-select per DIMM) - * OR - * - RDIMM, 4 total CS (2 CS per DIMM) - * means 2 DIMM - * Since MEM_NUMBER_OF_RANKS is 2 they are - * both single rank - * with 2 CS each (special for RDIMM) - * Read: Turn on ODT on the opposite rank - * Write: Turn on ODT on all ranks - */ + (RDIMM && RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 2 + && RW_MGR_MEM_CHIP_SELECT_WIDTH == 4)) { + //USER - Dual-Slot , Single-Rank (1 chip-select per DIMM) + //USER OR + //USER - RDIMM, 4 total CS (2 CS per DIMM) means 2 DIMM + //USER Since MEM_NUMBER_OF_RANKS is 2 they are both single rank + //USER with 2 CS each (special for RDIMM) + //USER Read: Turn on ODT on the opposite rank + //USER Write: Turn on ODT on all ranks odt_mask_0 = 0x3 & ~(1 << rank); odt_mask_1 = 0x3; } else { - /* - * USER - Single-Slot , Dual-rank DIMMs - * (2 chip-selects per DIMM) - * USER Read: Turn on ODT off on all ranks - * USER Write: Turn on ODT on active rank - */ + //USER - Single-Slot , Dual-rank DIMMs (2 chip-selects per DIMM) + //USER Read: Turn on ODT off on all ranks + //USER Write: Turn on ODT on active rank odt_mask_0 = 0x0; odt_mask_1 = 0x3 & (1 << rank); } - } else { - /* 4 Ranks - * Read: - * ----------+-----------------------+ - * | | - * | ODT | - * Read From +-----------------------+ - * Rank | 3 | 2 | 1 | 0 | - * ----------+-----+-----+-----+-----+ - * 0 | 0 | 1 | 0 | 0 | - * 1 | 1 | 0 | 0 | 0 | - * 2 | 0 | 0 | 0 | 1 | - * 3 | 0 | 0 | 1 | 0 | - * ----------+-----+-----+-----+-----+ - * - * Write: - * ----------+-----------------------+ - * | | - * | ODT | - * Write To +-----------------------+ - * Rank | 3 | 2 | 1 | 0 | - * ----------+-----+-----+-----+-----+ - * 0 | 0 | 1 | 0 | 1 | - * 1 | 1 | 0 | 1 | 0 | - * 2 | 0 | 1 | 0 | 1 | - * 3 | 1 | 0 | 1 | 0 | - * ----------+-----+-----+-----+-----+ - */ + } else { + //USER 4 Ranks + //USER Read: + //USER ----------+-----------------------+ + //USER | | + //USER | ODT | + //USER Read From +-----------------------+ + //USER Rank | 3 | 2 | 1 | 0 | + //USER ----------+-----+-----+-----+-----+ + //USER 0 | 0 | 1 | 0 | 0 | + //USER 1 | 1 | 0 | 0 | 0 | + //USER 2 | 0 | 0 | 0 | 1 | + //USER 3 | 0 | 0 | 1 | 0 | + //USER ----------+-----+-----+-----+-----+ + //USER + //USER Write: + //USER ----------+-----------------------+ + //USER | | + //USER | ODT | + //USER Write To +-----------------------+ + //USER Rank | 3 | 2 | 1 | 0 | + //USER ----------+-----+-----+-----+-----+ + //USER 0 | 0 | 1 | 0 | 1 | + //USER 1 | 1 | 0 | 1 | 0 | + //USER 2 | 0 | 1 | 0 | 1 | + //USER 3 | 1 | 0 | 1 | 0 | + //USER ----------+-----+-----+-----+-----+ switch (rank) { case 0: odt_mask_0 = 0x4; @@ -265,169 +451,93 @@ static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) } if (RDIMM && RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 2 - && RW_MGR_MEM_CHIP_SELECT_WIDTH == 4 - && RW_MGR_MEM_NUMBER_OF_RANKS == 2) { - /* See RDIMM special case above */ + && RW_MGR_MEM_CHIP_SELECT_WIDTH == 4 && RW_MGR_MEM_NUMBER_OF_RANKS == 2) { + //USER See RDIMM special case above cs_and_odt_mask = - (0xFF & ~(1 << (2*rank))) | - ((0xFF & odt_mask_0) << 8) | - ((0xFF & odt_mask_1) << 16); + (0xFF & ~(1 << (2 * rank))) | + ((0xFF & odt_mask_0) << 8) | ((0xFF & odt_mask_1) << 16); + } else if (LRDIMM) { } else { cs_and_odt_mask = - (0xFF & ~(1 << rank)) | - ((0xFF & odt_mask_0) << 8) | - ((0xFF & odt_mask_1) << 16); + (0xFF & ~(1 << rank)) | + ((0xFF & odt_mask_0) << 8) | ((0xFF & odt_mask_1) << 16); } IOWR_32DIRECT(RW_MGR_SET_CS_AND_ODT_MASK, 0, cs_and_odt_mask); } -#else -static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode) -{ - uint32_t odt_mask_0 = 0; - uint32_t odt_mask_1 = 0; - uint32_t cs_and_odt_mask; - - if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) { - if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) { - /* - * 1 Rank - * Read: ODT = 0 - * Write: ODT = 1 - */ - odt_mask_0 = 0x0; - odt_mask_1 = 0x1; - } else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) { - /* 2 Ranks */ - if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) { - /* USER - Dual-Slot , - * Single-Rank (1 chip-select per DIMM) - * OR - * - RDIMM, 4 total CS (2 CS per DIMM) means - * 2 DIMM - * Since MEM_NUMBER_OF_RANKS is 2 they are both - * single rank with 2 CS each (special for - * RDIMM) - * Read/Write: Turn on ODT on the opposite rank - */ - odt_mask_0 = 0x3 & ~(1 << rank); - odt_mask_1 = 0x3 & ~(1 << rank); - } else { - /* - * USER - Single-Slot , Dual-rank DIMMs - * (2 chip-selects per DIMM) - * Read: Turn on ODT off on all ranks - * Write: Turn on ODT on active rank - */ - odt_mask_0 = 0x0; - odt_mask_1 = 0x3 & (1 << rank); - } - } else { - /* - * 4 Ranks - * Read/Write: - * -----------+-----------------------+ - * | | - * | ODT | - * Read/Write | | - * From +-----------------------+ - * Rank | 3 | 2 | 1 | 0 | - * -----------+-----+-----+-----+-----+ - * 0 | 0 | 1 | 0 | 0 | - * 1 | 1 | 0 | 0 | 0 | - * 2 | 0 | 0 | 0 | 1 | - * 3 | 0 | 0 | 1 | 0 | - * -----------+-----+-----+-----+-----+ - */ - switch (rank) { - case 0: - odt_mask_0 = 0x4; - odt_mask_1 = 0x4; - break; - case 1: - odt_mask_0 = 0x8; - odt_mask_1 = 0x8; - break; - case 2: - odt_mask_0 = 0x1; - odt_mask_1 = 0x1; - break; - case 3: - odt_mask_0 = 0x2; - odt_mask_1 = 0x2; - break; - } - } - } else { - odt_mask_0 = 0x0; - odt_mask_1 = 0x0; - } - - cs_and_odt_mask = (0xFF & ~(1 << rank)) | - ((0xFF & odt_mask_0) << 8) | - ((0xFF & odt_mask_1) << 16); - IOWR_32DIRECT(RW_MGR_SET_CS_AND_ODT_MASK, 0, cs_and_odt_mask); +//USER Given a rank, select the set of shadow registers that is responsible for the +//USER delays of such rank, so that subsequent SCC updates will go to those shadow +//USER registers. +static void select_shadow_regs_for_update(uint32_t rank, uint32_t group, + uint32_t update_scan_chains) +{ } -#endif static void scc_mgr_initialize(void) { - /* - * Clear register file for HPS - * 16 (2^4) is the size of the full register file in the scc mgr: - * RFILE_DEPTH = log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + - * MEM_IF_READ_DQS_WIDTH - 1) + 1; - */ + // Clear register file for HPS + // 16 (2^4) is the size of the full register file in the scc mgr: + // RFILE_DEPTH = log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS + MEM_IF_READ_DQS_WIDTH - 1) + 1; uint32_t i; for (i = 0; i < 16; i++) { - pr_debug("Clearing SCC RFILE index %u\n", i); + DPRINT(1, "Clearing SCC RFILE index %lu", i); IOWR_32DIRECT(SCC_MGR_HHP_RFILE, i << 2, 0); } } -static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay) +static inline void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay) { ALTERA_ASSERT(read_group < RW_MGR_MEM_IF_READ_DQS_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQS_IN_DELAY(read_group, delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_in_settings[curr_shadow_reg][read_group]. + dqs_bus_in_delay, delay); + } -static void scc_mgr_set_dqs_io_in_delay(uint32_t write_group, - uint32_t delay) +static inline void scc_mgr_set_dqs_io_in_delay(uint32_t write_group, uint32_t delay) { ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQS_IO_IN_DELAY(delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_out_settings[curr_shadow_reg][write_group]. + dqs_io_in_delay, delay); + } -static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) +static inline void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase) { ALTERA_ASSERT(read_group < RW_MGR_MEM_IF_READ_DQS_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQS_EN_PHASE(read_group, phase); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_in_settings[curr_shadow_reg][read_group].dqs_en_phase, + phase); + } -static void scc_mgr_set_dqs_en_phase_all_ranks (uint32_t read_group, uint32_t phase) +static void scc_mgr_set_dqs_en_phase_all_ranks(uint32_t read_group, uint32_t phase) { uint32_t r; uint32_t update_scan_chains; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; - r += NUM_RANKS_PER_SHADOW_REG) { - /* - * USER although the h/w doesn't support different phases per - * shadow register, for simplicity our scc manager modeling - * keeps different phase settings per shadow reg, and it's - * important for us to keep them in sync to match h/w. - * for efficiency, the scan chain update should occur only - * once to sr0. - */ + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + //USER although the h/w doesn't support different phases per shadow register, + //USER for simplicity our scc manager modeling keeps different phase settings per + //USER shadow reg, and it's important for us to keep them in sync to match h/w. + //USER for efficiency, the scan chain update should occur only once to sr0. update_scan_chains = (r == 0) ? 1 : 0; + select_shadow_regs_for_update(r, read_group, update_scan_chains); scc_mgr_set_dqs_en_phase(read_group, phase); if (update_scan_chains) { @@ -437,33 +547,32 @@ static void scc_mgr_set_dqs_en_phase_all_ranks (uint32_t read_group, uint32_t ph } } -static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, - uint32_t phase) +static inline void scc_mgr_set_dqdqs_output_phase(uint32_t write_group, uint32_t phase) { ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQDQS_OUT_PHASE(write_group, phase); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_out_settings[curr_shadow_reg][write_group]. + dqdqs_out_phase, phase); + } -static void scc_mgr_set_dqdqs_output_phase_all_ranks (uint32_t write_group, - uint32_t phase) +static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, uint32_t phase) { uint32_t r; uint32_t update_scan_chains; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; - r += NUM_RANKS_PER_SHADOW_REG) { - /* - * USER although the h/w doesn't support different phases per - * shadow register, for simplicity our scc manager modeling - * keeps different phase settings per shadow reg, and it's - * important for us to keep them in sync to match h/w. - * for efficiency, the scan chain update should occur only - * once to sr0. - */ + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + //USER although the h/w doesn't support different phases per shadow register, + //USER for simplicity our scc manager modeling keeps different phase settings per + //USER shadow reg, and it's important for us to keep them in sync to match h/w. + //USER for efficiency, the scan chain update should occur only once to sr0. update_scan_chains = (r == 0) ? 1 : 0; + select_shadow_regs_for_update(r, write_group, update_scan_chains); scc_mgr_set_dqdqs_output_phase(write_group, phase); if (update_scan_chains) { @@ -473,32 +582,36 @@ static void scc_mgr_set_dqdqs_output_phase_all_ranks (uint32_t write_group, } } -static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) +static inline void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay) { ALTERA_ASSERT(read_group < RW_MGR_MEM_IF_READ_DQS_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQS_EN_DELAY(read_group, delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_in_settings[curr_shadow_reg][read_group].dqs_en_delay, + delay); + } -static void scc_mgr_set_dqs_en_delay_all_ranks (uint32_t read_group, uint32_t delay) +static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, uint32_t delay) { uint32_t r; for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + select_shadow_regs_for_update(r, read_group, 0); + scc_mgr_set_dqs_en_delay(read_group, delay); IOWR_32DIRECT(SCC_MGR_DQS_ENA, 0, read_group); - /* - * In shadow register mode, the T11 settings are stored in - * registers in the core, which are updated by the DQS_ENA - * signals. Not issuing the SCC_MGR_UPD command allows us to - * save lots of rank switching overhead, by calling - * select_shadow_regs_for_update with update_scan_chains - * set to 0. - */ + // In shadow register mode, the T11 settings are stored in registers + // in the core, which are updated by the DQS_ENA signals. Not issuing + // the SCC_MGR_UPD command allows us to save lots of rank switching + // overhead, by calling select_shadow_regs_for_update with update_scan_chains + // set to 0. IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); } } @@ -509,17 +622,23 @@ static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay) ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); - /* - * Load the setting in the SCC manager - * Although OCT affects only write data, the OCT delay is controlled - * by the DQS logic block which is instantiated once per read group. - * For protocols where a write group consists of multiple read groups, - * the setting must be set multiple times. - */ - for (read_group = write_group * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - read_group < (write_group + 1) * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - ++read_group) + // Load the setting in the SCC manager + // Although OCT affects only write data, the OCT delay is controlled by the DQS logic block + // which is instantiated once per read group. For protocols where a write group consists + // of multiple read groups, the setting must be set multiple times. + for (read_group = + write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + read_group < + (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + ++read_group) { + WRITE_SCC_OCT_OUT1_DELAY(read_group, delay); + } + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_out_settings[curr_shadow_reg][write_group]. + oct_out_delay1, delay); + } static void scc_mgr_set_oct_out2_delay(uint32_t write_group, uint32_t delay) @@ -528,147 +647,207 @@ static void scc_mgr_set_oct_out2_delay(uint32_t write_group, uint32_t delay) ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); - /* - * Load the setting in the SCC manager - * Although OCT affects only write data, the OCT delay is controlled - * by the DQS logic block which is instantiated once per read group. - * For protocols where a write group consists - * of multiple read groups, the setting must be set multiple times. - */ - for (read_group = write_group * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - read_group < (write_group + 1) * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - ++read_group) + // Load the setting in the SCC manager + // Although OCT affects only write data, the OCT delay is controlled by the DQS logic block + // which is instantiated once per read group. For protocols where a write group consists + // of multiple read groups, the setting must be set multiple times. + for (read_group = + write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + read_group < + (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + ++read_group) { + WRITE_SCC_OCT_OUT2_DELAY(read_group, delay); + } + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_out_settings[curr_shadow_reg][write_group]. + oct_out_delay2, delay); + } -static void scc_mgr_set_dq_out1_delay(uint32_t write_group, - uint32_t dq_in_group, uint32_t delay) +static inline void scc_mgr_set_dqs_bypass(uint32_t write_group, uint32_t bypass) { + // Load the setting in the SCC manager + WRITE_SCC_DQS_BYPASS(write_group, bypass); +} + +static inline void scc_mgr_set_dq_out1_delay(uint32_t write_group, uint32_t dq_in_group, + uint32_t delay) +{ + ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); ALTERA_ASSERT(dq < RW_MGR_MEM_DATA_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQ_OUT1_DELAY(dq_in_group, delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dq_settings[curr_shadow_reg][dq].dq_out_delay1, delay); + } -static void scc_mgr_set_dq_out2_delay(uint32_t write_group, - uint32_t dq_in_group, uint32_t delay) +static inline void scc_mgr_set_dq_out2_delay(uint32_t write_group, uint32_t dq_in_group, + uint32_t delay) { + ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); ALTERA_ASSERT(dq < RW_MGR_MEM_DATA_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQ_OUT2_DELAY(dq_in_group, delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dq_settings[curr_shadow_reg][dq].dq_out_delay2, delay); + } -static void scc_mgr_set_dq_in_delay(uint32_t write_group, - uint32_t dq_in_group, uint32_t delay) +static inline void scc_mgr_set_dq_in_delay(uint32_t write_group, uint32_t dq_in_group, + uint32_t delay) { + ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); ALTERA_ASSERT(dq < RW_MGR_MEM_DATA_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQ_IN_DELAY(dq_in_group, delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dq_settings[curr_shadow_reg][dq].dq_in_delay, delay); + +} + +static inline void scc_mgr_set_dq_bypass(uint32_t write_group, uint32_t dq_in_group, + uint32_t bypass) +{ + // Load the setting in the SCC manager + WRITE_SCC_DQ_BYPASS(dq_in_group, bypass); +} + +static inline void scc_mgr_set_rfifo_mode(uint32_t write_group, uint32_t dq_in_group, uint32_t mode) +{ + // Load the setting in the SCC manager + WRITE_SCC_RFIFO_MODE(dq_in_group, mode); } -static void scc_mgr_set_hhp_extras(void) +static inline void scc_mgr_set_hhp_extras(void) { - /* - * Load the fixed setting in the SCC manager - * bits: 0:0 = 1'b1 - dqs bypass - * bits: 1:1 = 1'b1 - dq bypass - * bits: 4:2 = 3'b001 - rfifo_mode - * bits: 6:5 = 2'b01 - rfifo clock_select - * bits: 7:7 = 1'b0 - separate gating from ungating setting - * bits: 8:8 = 1'b0 - separate OE from Output delay setting - */ - uint32_t value = (0<<8) | (0<<7) | (1<<5) | (1<<2) | (1<<1) | (1<<0); + // Load the fixed setting in the SCC manager + // bits: 0:0 = 1'b1 - dqs bypass + // bits: 1:1 = 1'b1 - dq bypass + // bits: 4:2 = 3'b001 - rfifo_mode + // bits: 6:5 = 2'b01 - rfifo clock_select + // bits: 7:7 = 1'b0 - separate gating from ungating setting + // bits: 8:8 = 1'b0 - separate OE from Output delay setting + uint32_t value = (0 << 8) | (0 << 7) | (1 << 5) | (1 << 2) | (1 << 1) | (1 << 0); WRITE_SCC_HHP_EXTRAS(value); } -static void scc_mgr_set_dqs_out1_delay(uint32_t write_group, - uint32_t delay) +static inline void scc_mgr_set_hhp_dqse_map(void) +{ + // Load the fixed setting in the SCC manager + WRITE_SCC_HHP_DQSE_MAP(0); +} + +static inline void scc_mgr_set_dqs_out1_delay(uint32_t write_group, uint32_t delay) { ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQS_IO_OUT1_DELAY(delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_out_settings[curr_shadow_reg][write_group]. + dqs_out_delay1, delay); + } -static void scc_mgr_set_dqs_out2_delay(uint32_t write_group, uint32_t delay) +static inline void scc_mgr_set_dqs_out2_delay(uint32_t write_group, uint32_t delay) { ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DQS_IO_OUT2_DELAY(delay); + + // Make the setting in the TCL report + TCLRPT_SET(debug_cal_report->cal_dqs_out_settings[curr_shadow_reg][write_group]. + dqs_out_delay2, delay); + } -static void scc_mgr_set_dm_out1_delay(uint32_t write_group, - uint32_t dm, uint32_t delay) +static inline void scc_mgr_set_dm_out1_delay(uint32_t write_group, uint32_t dm, uint32_t delay) { ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); ALTERA_ASSERT(dm < RW_MGR_NUM_DM_PER_WRITE_GROUP); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DM_IO_OUT1_DELAY(dm, delay); + + // Make the setting in the TCL report + + if (RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP > 0) { + TCLRPT_SET(debug_cal_report->cal_dm_settings[curr_shadow_reg][write_group][dm]. + dm_out_delay1, delay); + } } -static void scc_mgr_set_dm_out2_delay(uint32_t write_group, uint32_t dm, - uint32_t delay) +static inline void scc_mgr_set_dm_out2_delay(uint32_t write_group, uint32_t dm, uint32_t delay) { ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); ALTERA_ASSERT(dm < RW_MGR_NUM_DM_PER_WRITE_GROUP); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DM_IO_OUT2_DELAY(dm, delay); + + // Make the setting in the TCL report + + if (RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP > 0) { + TCLRPT_SET(debug_cal_report->cal_dm_settings[curr_shadow_reg][write_group][dm]. + dm_out_delay2, delay); + } } -static void scc_mgr_set_dm_in_delay(uint32_t write_group, - uint32_t dm, uint32_t delay) +static inline void scc_mgr_set_dm_in_delay(uint32_t write_group, uint32_t dm, uint32_t delay) { ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); ALTERA_ASSERT(dm < RW_MGR_NUM_DM_PER_WRITE_GROUP); - /* Load the setting in the SCC manager */ + // Load the setting in the SCC manager WRITE_SCC_DM_IO_IN_DELAY(dm, delay); + + // Make the setting in the TCL report + + if (RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP > 0) { + TCLRPT_SET(debug_cal_report->cal_dm_settings[curr_shadow_reg][write_group][dm]. + dm_in_delay, delay); + } } -static void scc_mgr_load_dqs_for_write_group (uint32_t write_group) +static inline void scc_mgr_set_dm_bypass(uint32_t write_group, uint32_t dm, uint32_t bypass) { - uint32_t read_group; - - /* - * Although OCT affects only write data, the OCT delay is controlled - * by the DQS logic block which is instantiated once per read group. - * For protocols where a write group consists of multiple read groups, - * the setting must be scanned multiple times. - */ - for (read_group = write_group * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - read_group < (write_group + 1) * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - ++read_group) - IOWR_32DIRECT(SCC_MGR_DQS_ENA, 0, read_group); + // Load the setting in the SCC manager + WRITE_SCC_DM_BYPASS(dm, bypass); } -/* - * USER Zero all DQS config - * TODO: maybe rename to scc_mgr_zero_dqs_config (or something) - */ -static void scc_mgr_zero_all (void) +//USER Zero all DQS config +// TODO: maybe rename to scc_mgr_zero_dqs_config (or something) +static void scc_mgr_zero_all(void) { uint32_t i, r; - /* - * USER Zero all DQS config settings, across all groups and all - * shadow registers - */ + //USER Zero all DQS config settings, across all groups and all shadow registers for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + // Strictly speaking this should be called once per group to make + // sure each group's delay chain is refreshed from the SCC register file, + // but since we're resetting all delay chains anyway, we can save some + // runtime by calling select_shadow_regs_for_update just once to switch + // rank. + select_shadow_regs_for_update(r, 0, 1); + for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { - /* - * The phases actually don't exist on a per-rank basis, - * but there's no harm updating them several times, so - * let's keep the code simple. - */ + // The phases actually don't exist on a per-rank basis, but there's + // no harm updating them several times, so let's keep the code simple. scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE); scc_mgr_set_dqs_en_phase(i, 0); scc_mgr_set_dqs_en_delay(i, 0); @@ -676,235 +855,302 @@ static void scc_mgr_zero_all (void) for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { scc_mgr_set_dqdqs_output_phase(i, 0); -#if ARRIAV || CYCLONEV - /* av/cv don't have out2 */ + // av/cv don't have out2 scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE); -#else - scc_mgr_set_oct_out1_delay(i, 0); - scc_mgr_set_oct_out2_delay(i, IO_DQS_OUT_RESERVE); -#endif } - /* multicast to all DQS group enables */ + //USER multicast to all DQS group enables IOWR_32DIRECT(SCC_MGR_DQS_ENA, 0, 0xff); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); } } -static void scc_set_bypass_mode(uint32_t write_group) +static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode) { - /* only need to set once for all groups, pins, dq, dqs, dm */ + // mode = 0 : Do NOT bypass - Half Rate Mode + // mode = 1 : Bypass - Full Rate Mode + + // only need to set once for all groups, pins, dq, dqs, dm if (write_group == 0) { - pr_debug("Setting HHP Extras\n"); + DPRINT(1, "Setting HHP Extras"); scc_mgr_set_hhp_extras(); - pr_debug("Done Setting HHP Extras\n"); + DPRINT(1, "Done Setting HHP Extras"); } - /* multicast to all DQ enables */ + //USER multicast to all DQ enables IOWR_32DIRECT(SCC_MGR_DQ_ENA, 0, 0xff); IOWR_32DIRECT(SCC_MGR_DM_ENA, 0, 0xff); - /* update current DQS IO enable */ + //USER update current DQS IO enable IOWR_32DIRECT(SCC_MGR_DQS_IO_ENA, 0, 0); - /* update the DQS logic */ + //USER update the DQS logic IOWR_32DIRECT(SCC_MGR_DQS_ENA, 0, write_group); - /* hit update */ + //USER hit update IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); } -static void scc_mgr_zero_group (uint32_t write_group, uint32_t test_begin, - int32_t out_only) +// Moving up to avoid warnings +static void scc_mgr_load_dqs_for_write_group(uint32_t write_group) +{ + uint32_t read_group; + + // Although OCT affects only write data, the OCT delay is controlled by the DQS logic block + // which is instantiated once per read group. For protocols where a write group consists + // of multiple read groups, the setting must be scanned multiple times. + for (read_group = + write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + read_group < + (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + ++read_group) { + + IOWR_32DIRECT(SCC_MGR_DQS_ENA, 0, read_group); + } +} + +static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin, int32_t out_only) { uint32_t i, r; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += - NUM_RANKS_PER_SHADOW_REG) { + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + + select_shadow_regs_for_update(r, write_group, 1); - /* Zero all DQ config settings */ + //USER Zero all DQ config settings for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { scc_mgr_set_dq_out1_delay(write_group, i, 0); - scc_mgr_set_dq_out2_delay(write_group, i, - IO_DQ_OUT_RESERVE); + scc_mgr_set_dq_out2_delay(write_group, i, IO_DQ_OUT_RESERVE); if (!out_only) { scc_mgr_set_dq_in_delay(write_group, i, 0); } } - /* multicast to all DQ enables */ + //USER multicast to all DQ enables IOWR_32DIRECT(SCC_MGR_DQ_ENA, 0, 0xff); - /* Zero all DM config settings */ + //USER Zero all DM config settings for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { if (!out_only) { - /* Do we really need this? */ + // Do we really need this? scc_mgr_set_dm_in_delay(write_group, i, 0); } scc_mgr_set_dm_out1_delay(write_group, i, 0); - scc_mgr_set_dm_out2_delay(write_group, i, - IO_DM_OUT_RESERVE); + scc_mgr_set_dm_out2_delay(write_group, i, IO_DM_OUT_RESERVE); } - /* multicast to all DM enables */ + //USER multicast to all DM enables IOWR_32DIRECT(SCC_MGR_DM_ENA, 0, 0xff); - /* zero all DQS io settings */ + //USER zero all DQS io settings if (!out_only) { scc_mgr_set_dqs_io_in_delay(write_group, 0); } -#if ARRIAV || CYCLONEV - /* av/cv don't have out2 */ + // av/cv don't have out2 scc_mgr_set_dqs_out1_delay(write_group, IO_DQS_OUT_RESERVE); scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE); scc_mgr_load_dqs_for_write_group(write_group); -#else - scc_mgr_set_dqs_out1_delay(write_group, 0); - scc_mgr_set_dqs_out2_delay(write_group, IO_DQS_OUT_RESERVE); - scc_mgr_set_oct_out1_delay(write_group, 0); - scc_mgr_set_oct_out2_delay(write_group, IO_DQS_OUT_RESERVE); - scc_mgr_load_dqs_for_write_group(write_group); -#endif - /* multicast to all DQS IO enables (only 1) */ + //USER multicast to all DQS IO enables (only 1) IOWR_32DIRECT(SCC_MGR_DQS_IO_ENA, 0, 0); -#if USE_SHADOW_REGS - /* - * in shadow-register mode, SCC_UPDATE is done on a per-group basis - * unless we explicitly ask for a multicast via the group counter - */ - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); -#endif - /* hit update to zero everything */ + //USER hit update to zero everything IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); } } -/* load up dqs config settings */ +//USER load up dqs config settings -static void scc_mgr_load_dqs (uint32_t dqs) +static void scc_mgr_load_dqs(uint32_t dqs) { IOWR_32DIRECT(SCC_MGR_DQS_ENA, 0, dqs); } -/* load up dqs io config settings */ +//USER load up dqs io config settings -static void scc_mgr_load_dqs_io (void) +static void scc_mgr_load_dqs_io(void) { IOWR_32DIRECT(SCC_MGR_DQS_IO_ENA, 0, 0); } -/* load up dq config settings */ +//USER load up dq config settings -static void scc_mgr_load_dq (uint32_t dq_in_group) +static void scc_mgr_load_dq(uint32_t dq_in_group) { IOWR_32DIRECT(SCC_MGR_DQ_ENA, 0, dq_in_group); } -/* load up dm config settings */ +//USER load up dm config settings -static void scc_mgr_load_dm (uint32_t dm) +static void scc_mgr_load_dm(uint32_t dm) { IOWR_32DIRECT(SCC_MGR_DM_ENA, 0, dm); } -/* apply and load a particular input delay for the DQ pins in a group */ -/* group_bgn is the index of the first dq pin (in the write group) */ +//USER apply and load a particular input delay for the DQ pins in a group +//USER group_bgn is the index of the first dq pin (in the write group) -static void scc_mgr_apply_group_dq_in_delay (uint32_t write_group, - uint32_t group_bgn, uint32_t delay) +static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group, uint32_t group_bgn, + uint32_t delay) { uint32_t i, p; for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { scc_mgr_set_dq_in_delay(write_group, p, delay); - scc_mgr_load_dq (p); + scc_mgr_load_dq(p); } } -/* apply and load a particular output delay for the DQ pins in a group */ +//USER apply and load a particular output delay for the DQ pins in a group -static void scc_mgr_apply_group_dq_out1_delay (uint32_t write_group, uint32_t group_bgn, - uint32_t delay1) +static void scc_mgr_apply_group_dq_out1_delay(uint32_t write_group, uint32_t group_bgn, + uint32_t delay1) { uint32_t i, p; for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { scc_mgr_set_dq_out1_delay(write_group, i, delay1); - scc_mgr_load_dq (i); + scc_mgr_load_dq(i); + } +} + +static void scc_mgr_apply_group_dq_out2_delay(uint32_t write_group, uint32_t group_bgn, + uint32_t delay2) +{ + uint32_t i, p; + + for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { + scc_mgr_set_dq_out2_delay(write_group, i, delay2); + scc_mgr_load_dq(i); } } -/* apply and load a particular output delay for the DM pins in a group */ +//USER apply and load a particular output delay for the DM pins in a group -static void scc_mgr_apply_group_dm_out1_delay (uint32_t write_group, uint32_t delay1) +static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group, uint32_t delay1) { uint32_t i; for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { scc_mgr_set_dm_out1_delay(write_group, i, delay1); - scc_mgr_load_dm (i); + scc_mgr_load_dm(i); } } - -/* apply and load delay on both DQS and OCT out1 */ -static void scc_mgr_apply_group_dqs_io_and_oct_out1 (uint32_t write_group, uint32_t delay) +//USER apply and load delay on both DQS and OCT out1 +static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group, uint32_t delay) { scc_mgr_set_dqs_out1_delay(write_group, delay); - scc_mgr_load_dqs_io (); + scc_mgr_load_dqs_io(); scc_mgr_set_oct_out1_delay(write_group, delay); - scc_mgr_load_dqs_for_write_group (write_group); + scc_mgr_load_dqs_for_write_group(write_group); +} + +//USER apply and load delay on both DQS and OCT out2 +static void scc_mgr_apply_group_dqs_io_and_oct_out2(uint32_t write_group, uint32_t delay) +{ + scc_mgr_set_dqs_out2_delay(write_group, delay); + scc_mgr_load_dqs_io(); + + scc_mgr_set_oct_out2_delay(write_group, delay); + scc_mgr_load_dqs_for_write_group(write_group); +} + +//USER set delay on both DQS and OCT out1 by incrementally changing +//USER the settings one dtap at a time towards the target value, to avoid +//USER breaking the lock of the DLL/PLL on the memory device. +static void scc_mgr_set_group_dqs_io_and_oct_out1_gradual(uint32_t write_group, uint32_t delay) +{ + uint32_t d = READ_SCC_DQS_IO_OUT1_DELAY(); + + while (d > delay) { + --d; + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, d); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + if (QDRII) { + rw_mgr_mem_dll_lock_wait(); + } + } + while (d < delay) { + ++d; + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, d); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + if (QDRII) { + rw_mgr_mem_dll_lock_wait(); + } + } +} + +//USER set delay on both DQS and OCT out2 by incrementally changing +//USER the settings one dtap at a time towards the target value, to avoid +//USER breaking the lock of the DLL/PLL on the memory device. +static void scc_mgr_set_group_dqs_io_and_oct_out2_gradual(uint32_t write_group, uint32_t delay) +{ + uint32_t d = READ_SCC_DQS_IO_OUT2_DELAY(); + + while (d > delay) { + --d; + scc_mgr_apply_group_dqs_io_and_oct_out2(write_group, d); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + if (QDRII) { + rw_mgr_mem_dll_lock_wait(); + } + } + while (d < delay) { + ++d; + scc_mgr_apply_group_dqs_io_and_oct_out2(write_group, d); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + if (QDRII) { + rw_mgr_mem_dll_lock_wait(); + } + } } -/* apply a delay to the entire output side: DQ, DM, DQS, OCT */ +//USER apply a delay to the entire output side: DQ, DM, DQS, OCT -static void scc_mgr_apply_group_all_out_delay (uint32_t write_group, - uint32_t group_bgn, uint32_t delay) +static void scc_mgr_apply_group_all_out_delay(uint32_t write_group, uint32_t group_bgn, + uint32_t delay) { - /* dq shift */ + //USER dq shift - scc_mgr_apply_group_dq_out1_delay (write_group, group_bgn, delay); + scc_mgr_apply_group_dq_out1_delay(write_group, group_bgn, delay); - /* dm shift */ + //USER dm shift - scc_mgr_apply_group_dm_out1_delay (write_group, delay); + scc_mgr_apply_group_dm_out1_delay(write_group, delay); - /* dqs and oct shift */ + //USER dqs and oct shift - scc_mgr_apply_group_dqs_io_and_oct_out1 (write_group, delay); + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, delay); } -/* - * USER apply a delay to the entire output side (DQ, DM, DQS, OCT) - * and to all ranks - */ -static void scc_mgr_apply_group_all_out_delay_all_ranks (uint32_t write_group, - uint32_t group_bgn, uint32_t delay) +//USER apply a delay to the entire output side (DQ, DM, DQS, OCT) and to all ranks +static void scc_mgr_apply_group_all_out_delay_all_ranks(uint32_t write_group, uint32_t group_bgn, + uint32_t delay) { uint32_t r; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; - r += NUM_RANKS_PER_SHADOW_REG) { + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + + select_shadow_regs_for_update(r, write_group, 1); - scc_mgr_apply_group_all_out_delay (write_group, group_bgn, delay); + scc_mgr_apply_group_all_out_delay(write_group, group_bgn, delay); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); } } -/* apply a delay to the entire output side: DQ, DM, DQS, OCT */ +//USER apply a delay to the entire output side: DQ, DM, DQS, OCT -static void scc_mgr_apply_group_all_out_delay_add (uint32_t write_group, - uint32_t group_bgn, uint32_t delay) +static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group, uint32_t group_bgn, + uint32_t delay) { uint32_t i, p, new_delay; - /* dq shift */ + //USER dq shift for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { @@ -912,70 +1158,63 @@ static void scc_mgr_apply_group_all_out_delay_add (uint32_t write_group, new_delay += delay; if (new_delay > IO_IO_OUT2_DELAY_MAX) { - pr_debug("%s(%u, %u, %u) DQ[%u,%u]: %u >" - " %u => %u\n", __func__, write_group, - group_bgn, delay, i, p, - new_delay, - IO_IO_OUT2_DELAY_MAX, - IO_IO_OUT2_DELAY_MAX); + DPRINT(1, "%s(%lu, %lu, %lu) DQ[%lu,%lu]: %lu > %lu => %lu", + __func__, write_group, group_bgn, delay, i, p, + new_delay, (long unsigned int)IO_IO_OUT2_DELAY_MAX, + (long unsigned int)IO_IO_OUT2_DELAY_MAX); new_delay = IO_IO_OUT2_DELAY_MAX; } scc_mgr_set_dq_out2_delay(write_group, i, new_delay); - scc_mgr_load_dq (i); + scc_mgr_load_dq(i); } - /* dm shift */ + //USER dm shift for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { new_delay = READ_SCC_DM_IO_OUT2_DELAY(i); new_delay += delay; if (new_delay > IO_IO_OUT2_DELAY_MAX) { - pr_debug("%s(%u, %u, %u) DM[%u]: %u > %u => %u\n", - __func__, write_group, group_bgn, delay, i, - new_delay, - IO_IO_OUT2_DELAY_MAX, - IO_IO_OUT2_DELAY_MAX); + DPRINT(1, "%s(%lu, %lu, %lu) DM[%lu]: %lu > %lu => %lu", + __func__, write_group, group_bgn, delay, i, + new_delay, (long unsigned int)IO_IO_OUT2_DELAY_MAX, + (long unsigned int)IO_IO_OUT2_DELAY_MAX); new_delay = IO_IO_OUT2_DELAY_MAX; } scc_mgr_set_dm_out2_delay(write_group, i, new_delay); - scc_mgr_load_dm (i); + scc_mgr_load_dm(i); } - /* dqs shift */ + //USER dqs shift new_delay = READ_SCC_DQS_IO_OUT2_DELAY(); new_delay += delay; if (new_delay > IO_IO_OUT2_DELAY_MAX) { - pr_debug("%s(%u, %u, %u) DQS: %u > %d => %d;" - " adding %u to OUT1\n", - __func__, write_group, group_bgn, delay, - new_delay, IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, - new_delay - IO_IO_OUT2_DELAY_MAX); - scc_mgr_set_dqs_out1_delay(write_group, new_delay - - IO_IO_OUT2_DELAY_MAX); + DPRINT(1, "%s(%lu, %lu, %lu) DQS: %lu > %d => %d; adding %lu to OUT1", + __func__, write_group, group_bgn, delay, + new_delay, IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, + new_delay - IO_IO_OUT2_DELAY_MAX); + scc_mgr_set_dqs_out1_delay(write_group, new_delay - IO_IO_OUT2_DELAY_MAX); new_delay = IO_IO_OUT2_DELAY_MAX; } scc_mgr_set_dqs_out2_delay(write_group, new_delay); - scc_mgr_load_dqs_io (); + scc_mgr_load_dqs_io(); - /* oct shift */ + //USER oct shift new_delay = READ_SCC_OCT_OUT2_DELAY(write_group); new_delay += delay; if (new_delay > IO_IO_OUT2_DELAY_MAX) { - pr_debug("%s(%u, %u, %u) DQS: %u > %d => %d;" - " adding %u to OUT1\n", - __func__, write_group, group_bgn, delay, - new_delay, IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, - new_delay - IO_IO_OUT2_DELAY_MAX); - scc_mgr_set_oct_out1_delay(write_group, new_delay - - IO_IO_OUT2_DELAY_MAX); + DPRINT(1, "%s(%lu, %lu, %lu) DQS: %lu > %d => %d; adding %lu to OUT1", + __func__, write_group, group_bgn, delay, + new_delay, IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX, + new_delay - IO_IO_OUT2_DELAY_MAX); + scc_mgr_set_oct_out1_delay(write_group, new_delay - IO_IO_OUT2_DELAY_MAX); new_delay = IO_IO_OUT2_DELAY_MAX; } @@ -983,852 +1222,568 @@ static void scc_mgr_apply_group_all_out_delay_add (uint32_t write_group, scc_mgr_load_dqs_for_write_group(write_group); } -/* - * USER apply a delay to the entire output side (DQ, DM, DQS, OCT) - * and to all ranks - */ -static void scc_mgr_apply_group_all_out_delay_add_all_ranks (uint32_t write_group, - uint32_t group_bgn, uint32_t delay) +//USER apply a delay to the entire output side (DQ, DM, DQS, OCT) and to all ranks +static void scc_mgr_apply_group_all_out_delay_add_all_ranks(uint32_t write_group, + uint32_t group_bgn, uint32_t delay) { uint32_t r; for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { - scc_mgr_apply_group_all_out_delay_add (write_group, - group_bgn, delay); + + select_shadow_regs_for_update(r, write_group, 1); + + scc_mgr_apply_group_all_out_delay_add(write_group, group_bgn, delay); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); } } -static void scc_mgr_spread_out2_delay_all_ranks (uint32_t write_group, - uint32_t test_bgn) +static inline void scc_mgr_spread_out2_delay_all_ranks(uint32_t write_group, uint32_t test_bgn) { -#if STRATIXV || ARRIAVGZ - uint32_t found; - uint32_t i; - uint32_t p; - uint32_t d; - uint32_t r; - - const uint32_t delay_step = IO_IO_OUT2_DELAY_MAX / - (RW_MGR_MEM_DQ_PER_WRITE_DQS-1); - /* we start at zero, so have one less dq to devide among */ - - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; - r += NUM_RANKS_PER_SHADOW_REG) { - for (i = 0, p = test_bgn, d = 0; - i < RW_MGR_MEM_DQ_PER_WRITE_DQS; - i++, p++, d += delay_step) { - pr_debug("rw_mgr_mem_calibrate_vfifo_find" - "_dqs_en_phase_sweep_dq_in_delay: g=%u r=%u," - " i=%u p=%u d=%u\n", - write_group, r, i, p, d); - scc_mgr_set_dq_out2_delay(write_group, i, d); - scc_mgr_load_dq (i); - } - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - } -#endif } -#if DDR3 -/* optimization used to recover some slots in ddr3 inst_rom */ -/* could be applied to other protocols if we wanted to */ +// optimization used to recover some slots in ddr3 inst_rom +// could be applied to other protocols if we wanted to static void set_jump_as_return(void) { - /* - * to save space, we replace return with jump to special shared - * RETURN instruction so we set the counter to large value so that - * we always jump - */ + // to save space, we replace return with jump to special shared RETURN instruction + // so we set the counter to large value so that we always jump IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, 0xFF); IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_RETURN); } -#endif -/* - * should always use constants as argument to ensure all computations are - * performed at compile time - */ -static void delay_for_n_mem_clocks(const uint32_t clocks) +// should always use constants as argument to ensure all computations are performed at compile time +static inline void delay_for_n_mem_clocks(const uint32_t clocks) { uint32_t afi_clocks; uint8_t inner; uint8_t outer; uint16_t c_loop; - afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO; - /* scale (rounding up) to get afi clocks */ + TRACE_FUNC("clocks=%lu ... start", clocks); + + afi_clocks = (clocks + AFI_RATE_RATIO - 1) / AFI_RATE_RATIO; /* scale (rounding up) to get afi clocks */ - /* - * Note, we don't bother accounting for being off a little bit - * because of a few extra instructions in outer loops - * Note, the loops have a test at the end, and do the test before - * the decrement, and so always perform the loop - * 1 time more than the counter value - */ + // Note, we don't bother accounting for being off a little bit because of a few extra instructions in outer loops + // Note, the loops have a test at the end, and do the test before the decrement, and so always perform the loop + // 1 time more than the counter value if (afi_clocks == 0) { inner = outer = c_loop = 0; } else if (afi_clocks <= 0x100) { - inner = afi_clocks-1; + inner = afi_clocks - 1; outer = 0; c_loop = 0; } else if (afi_clocks <= 0x10000) { inner = 0xff; - outer = (afi_clocks-1) >> 8; + outer = (afi_clocks - 1) >> 8; c_loop = 0; } else { inner = 0xff; outer = 0xff; - c_loop = (afi_clocks-1) >> 16; - } - - /* - * rom instructions are structured as follows: - * - * IDLE_LOOP2: jnz cntr0, TARGET_A - * IDLE_LOOP1: jnz cntr1, TARGET_B - * return - * - * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and - * TARGET_B is set to IDLE_LOOP2 as well - * - * if we have no outer loop, though, then we can use IDLE_LOOP1 only, - * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely - * - * a little confusing, but it helps save precious space in the inst_rom - * and sequencer rom and keeps the delays more accurate and reduces - * overhead - */ + c_loop = (afi_clocks - 1) >> 16; + } + + // rom instructions are structured as follows: + // + // IDLE_LOOP2: jnz cntr0, TARGET_A + // IDLE_LOOP1: jnz cntr1, TARGET_B + // return + // + // so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and TARGET_B is + // set to IDLE_LOOP2 as well + // + // if we have no outer loop, though, then we can use IDLE_LOOP1 only, and set + // TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely + // + // a little confusing, but it helps save precious space in the inst_rom and sequencer rom + // and keeps the delays more accurate and reduces overhead if (afi_clocks <= 0x100) { - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner)); + + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner)); IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_IDLE_LOOP1); IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_IDLE_LOOP1); + } else { - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner)); - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer)); + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner)); + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer)); IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_IDLE_LOOP2); IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_IDLE_LOOP2); - /* hack to get around compiler not being smart enough */ + // hack to get around compiler not being smart enough if (afi_clocks <= 0x10000) { - /* only need to run once */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_IDLE_LOOP2); + // only need to run once + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_IDLE_LOOP2); } else { do { - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_IDLE_LOOP2); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_IDLE_LOOP2); } while (c_loop-- != 0); } } + + TRACE_FUNC("clocks=%lu ... end", clocks); } -/* Special routine to recover memory device from illegal state after */ -/* ck/dqs relationship is violated. */ -static void recover_mem_device_after_ck_dqs_violation(void) +// should always use constants as argument to ensure all computations are performed at compile time +static inline void delay_for_n_ns(const uint32_t nanoseconds) { - /* Current protocol doesn't require any special recovery */ + TRACE_FUNC("nanoseconds=%lu ... end", nanoseconds); + delay_for_n_mem_clocks((1000 * nanoseconds) / (1000000 / AFI_CLK_FREQ) * AFI_RATE_RATIO); } -static void rw_mgr_rdimm_initialize(void) { } - -#if DDR3 +// Special routine to recover memory device from illegal state after +// ck/dqs relationship is violated. +static inline void recover_mem_device_after_ck_dqs_violation(void) +{ + // Current protocol doesn't require any special recovery +} +static void rw_mgr_rdimm_initialize(void) +{ +} -static void rw_mgr_mem_initialize (void) +static void rw_mgr_mem_initialize(void) { uint32_t r; + TRACE_FUNC(); - /* The reset / cke part of initialization is broadcasted to all ranks */ + //USER The reset / cke part of initialization is broadcasted to all ranks IOWR_32DIRECT(RW_MGR_SET_CS_AND_ODT_MASK, 0, RW_MGR_RANK_ALL); - /* - * Here's how you load register for a loop - * Counters are located @ 0x800 - * Jump address are located @ 0xC00 - * For both, registers 0 to 3 are selected using bits 3 and 2, like - * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C - * I know this ain't pretty, but Avalon bus throws away the 2 least - * significant bits - */ - - /* start with memory RESET activated */ - - /* tINIT is typically 200us (but can be adjusted in the GUI) - * The total number of cycles required for this nested counter structure to - * complete is defined by: - * num_cycles = (CTR2 + 1) * [(CTR1 + 1) * (2 * (CTR0 + 1) + 1) + 1] + 1 - */ - - /* Load counters */ - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL)); - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL)); - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL)); - - /* Load jump address */ - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, - __RW_MGR_INIT_RESET_0_CKE_0); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, - __RW_MGR_INIT_RESET_0_CKE_0); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, - __RW_MGR_INIT_RESET_0_CKE_0); - - /* Execute count instruction */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_INIT_RESET_0_CKE_0); + // Here's how you load register for a loop + //USER Counters are located @ 0x800 + //USER Jump address are located @ 0xC00 + //USER For both, registers 0 to 3 are selected using bits 3 and 2, like in + //USER 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C + // I know this ain't pretty, but Avalon bus throws away the 2 least significant bits - /* indicate that memory is stable */ - IOWR_32DIRECT(PHY_MGR_RESET_MEM_STBL, 0, 1); + //USER start with memory RESET activated + + //USER tINIT is typically 200us (but can be adjusted in the GUI) + //USER The total number of cycles required for this nested counter structure to + //USER complete is defined by: + //USER num_cycles = (CTR2 + 1) * [(CTR1 + 1) * (2 * (CTR0 + 1) + 1) + 1] + 1 + + //USER Load counters + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL)); + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL)); + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL)); - /* transition the RESET to high */ - /* Wait for 500us */ + //USER Load jump address + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_INIT_RESET_0_CKE_0); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_INIT_RESET_0_CKE_0); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_INIT_RESET_0_CKE_0); - /* num_cycles = (CTR2 + 1) * [(CTR1 + 1) * (2 * (CTR0 + 1) + 1) + 1] + 1 */ + //USER Execute count instruction + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_INIT_RESET_0_CKE_0); + + //USER indicate that memory is stable + IOWR_32DIRECT(PHY_MGR_RESET_MEM_STBL, 0, 1); - /* Load counters */ - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL)); - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL)); - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL)); + //USER transition the RESET to high + //USER Wait for 500us + //USER num_cycles = (CTR2 + 1) * [(CTR1 + 1) * (2 * (CTR0 + 1) + 1) + 1] + 1 + //USER Load counters + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL)); + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL)); + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL)); - /* Load jump address */ + //USER Load jump address IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_INIT_RESET_1_CKE_0); IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_INIT_RESET_1_CKE_0); IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_INIT_RESET_1_CKE_0); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_INIT_RESET_1_CKE_0); - /* bring up clock enable */ + //USER bring up clock enable - /* tXRP < 250 ck cycles */ + //USER tXRP < 250 ck cycles delay_for_n_mem_clocks(250); - /* - * USER initialize RDIMM buffer so MRS and RZQ Calibrate commands will - * USER be propagated to discrete memory devices - */ + // USER initialize RDIMM buffer so MRS and RZQ Calibrate commands will be + // USER propagated to discrete memory devices rw_mgr_rdimm_initialize(); - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ + if (param->skip_ranks[r]) { + //USER request to skip the rank + + continue; + } + //USER set rank set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - /* - * USER Use Mirror-ed commands for odd ranks if address - * mirrorring is on - */ + //USER Use Mirror-ed commands for odd ranks if address mirrorring is on if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS2_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS2_MIRR); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS3_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS3_MIRR); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS1_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS1_MIRR); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS0_DLL_RESET_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS0_DLL_RESET_MIRR); } else { set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS2); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS2); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS3); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS3); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS1); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS1); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS0_DLL_RESET); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS0_DLL_RESET); } set_jump_as_return(); IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_ZQCL); - /* tZQinit = tDLLK = 512 ck cycles */ + //USER tZQinit = tDLLK = 512 ck cycles delay_for_n_mem_clocks(512); } } -#endif /* DDR3 */ - -#if DDR2 -static void rw_mgr_mem_initialize (void) -{ - uint32_t r; - - /* *** NOTE *** */ - /* The following STAGE (n) notation refers to the corresponding - stage in the Micron datasheet */ - - /* - *Here's how you load register for a loop - * Counters are located @ 0x800 - * Jump address are located @ 0xC00 - * For both, registers 0 to 3 are selected using bits 3 and 2, - like in - * 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C - * I know this ain't pretty, but Avalon bus throws away the 2 least - significant bits - */ - - /* *** STAGE (1, 2, 3) *** */ - - /* start with CKE low */ - - /* tINIT = 200us */ - - /* tINIT is typically 200us (but can be adjusted in the GUI) - * The total number of cycles required for this nested counter structure to - * complete is defined by: - * num_cycles = (CTR0 + 1) * [(CTR1 + 1) * (2 * (CTR2 + 1) + 1) + 1] + 1 - */ - - /*TODO: Need to manage multi-rank */ - - /* Load counters */ - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL)); - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL)); - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, - SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL)); - - /* Load jump address */ - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_INIT_CKE_0); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_INIT_CKE_0); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_INIT_CKE_0); - - /* Execute count instruction */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_INIT_CKE_0); - - /* indicate that memory is stable */ - IOWR_32DIRECT(PHY_MGR_RESET_MEM_STBL, 0, 1); - - /* Bring up CKE */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_NOP); - - /* *** STAGE (4) */ - - /* Wait for 400ns */ - delay_for_n_ns(400); - - /* Multi-rank section begins here */ - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ - set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - - /* - * * **** * - * * NOTE * - * * **** * - * The following commands must be spaced by tMRD or tRPA - *which are in the order - * of 2 to 4 full rate cycles. This is peanuts in the - *NIOS domain, so for now - * we can avoid redundant wait loops - */ - - /* Possible FIXME BEN: for HHP, we need to add delay loops - * to be sure although, the sequencer write interface by itself - * likely has enough delay - */ - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_PRECHARGE_ALL); - - /* *** STAGE (5) */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_EMR2); - - /* *** STAGE (6) */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_EMR3); - - /* *** STAGE (7) */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_EMR); - - /* *** STAGE (8) */ - /* DLL reset */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MR_DLL_RESET); - - /* *** STAGE (9) */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_PRECHARGE_ALL); - - /* *** STAGE (10) */ - - /* Issue 2 refresh commands spaced by tREF */ - - /* First REFRESH */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_REFRESH); - - /* tREF = 200ns */ - delay_for_n_ns(200); - - /* Second REFRESH */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_REFRESH); - - /* Second idle loop */ - delay_for_n_ns(200); - - /* *** STAGE (11) */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MR_CALIB); - - /* *** STAGE (12) */ - /* OCD defaults */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_EMR_OCD_ENABLE); - - /* *** STAGE (13) */ - /* OCD exit */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_EMR); - - /* *** STAGE (14) */ - - /* - * The memory is now initialized. Before being able to - *use it, we must still - * wait for the DLL to lock, 200 clock cycles after it - *was reset @ STAGE (8). - * Since we cannot keep track of time in any other way, - *let's start counting from now - */ - delay_for_n_mem_clocks(200); - } -} -#endif /* DDR2 */ -#if LPDDR2 -static void rw_mgr_mem_initialize (void) +static void rw_mgr_mem_dll_lock_wait(void) { - uint32_t r; - - /* *** NOTE *** */ - /* The following STAGE (n) notation refers to the corresponding - stage in the Micron datasheet */ - - /* - *Here's how you load register for a loop - * Counters are located @ 0x800 - * Jump address are located @ 0xC00 - * For both, registers 0 to 3 are selected using bits 3 and 2, - *like in - * 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C - *I know this ain't pretty, but Avalon bus throws away the 2 least - *significant bits - */ - - - /* *** STAGE (1, 2, 3) *** */ - - /* start with CKE low */ - - /* tINIT1 = 100ns */ - - /* - * 100ns @ 300MHz (3.333 ns) ~ 30 cycles - * If a is the number of iteration in a loop - * it takes the following number of cycles to complete the operation - * number_of_cycles = (2 + n) * a - * where n is the number of instruction in the inner loop - * One possible solution is n = 0 , a = 15 => a = 0x10 - */ - - /* Load counter */ - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, SKIP_DELAY_LOOP_VALUE_OR_ZERO(0x10)); - - /* Load jump address */ - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_INIT_CKE_0); - - /* Execute count instruction */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_INIT_CKE_0); - - /* tINIT3 = 200us */ - delay_for_n_ns(200000); - - /* indicate that memory is stable */ - IOWR_32DIRECT(PHY_MGR_RESET_MEM_STBL, 0, 1); - - /* Multi-rank section begins here */ - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ - set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - - /* MRW RESET */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR63_RESET); - } - - /* tINIT5 = 10us */ - delay_for_n_ns(10000); - - /* Multi-rank section begins here */ - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ - set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - - /* MRW ZQC */ - /* Note: We cannot calibrate other ranks when the current rank - is calibrating for tZQINIT */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR10_ZQC); - - /* tZQINIT = 1us */ - delay_for_n_ns(1000); - - /* - * * **** * - * * NOTE * - * * **** * - * The following commands must be spaced by tMRW which is - *in the order - * of 3 to 5 full rate cycles. This is peanuts in the NIOS - *domain, so for now - * we can avoid redundant wait loops - */ - - /* MRW MR1 */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR1_CALIB); - - /* MRW MR2 */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR2); - - /* MRW MR3 */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR3); - } } -#endif /* LPDDR2 */ -/* At the end of calibration we have to program the user settings in, and - USER hand off the memory to the user. */ +//USER At the end of calibration we have to program the user settings in, and +//USER hand off the memory to the user. -#if DDR3 -static void rw_mgr_mem_handoff (void) +static void rw_mgr_mem_handoff(void) { uint32_t r; + TRACE_FUNC(); for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ + if (param->skip_ranks[r]) { + //USER request to skip the rank + + continue; + } + //USER set rank set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - /* precharge all banks ... */ + //USER precharge all banks ... IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_PRECHARGE_ALL); - /* load up MR settings specified by user */ + //USER load up MR settings specified by user - /* Use Mirror-ed commands for odd ranks if address - mirrorring is on */ + //USER Use Mirror-ed commands for odd ranks if address mirrorring is on if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) { set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS2_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS2_MIRR); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS3_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS3_MIRR); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS1_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS1_MIRR); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS0_USER_MIRR); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS0_USER_MIRR); } else { set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS2); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS2); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS3); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS3); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS1); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS1); delay_for_n_mem_clocks(4); set_jump_as_return(); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_MRS0_USER); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MRS0_USER); } - /* USER need to wait tMOD (12CK or 15ns) time before issuing - * other commands, but we will have plenty of NIOS cycles before - * actual handoff so its okay. - */ - } - -} -#endif /* DDR3 */ - -#if DDR2 -static void rw_mgr_mem_handoff (void) -{ - uint32_t r; - - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ - set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - - /* precharge all banks ... */ - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_PRECHARGE_ALL); - - /* load up MR settings specified by user */ - - /* - * FIXME BEN: for HHP, we need to add delay loops to be sure - * We can check this with BFM perhaps - * Likely enough delay in RW_MGR though - */ - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_EMR2); - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_EMR3); - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_EMR); - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR_USER); - - /* - * USER need to wait tMOD (12CK or 15ns) time before issuing - * other commands, - * USER but we will have plenty of NIOS cycles before actual - * handoff so its okay. - */ + //USER need to wait tMOD (12CK or 15ns) time before issuing other commands, + //USER but we will have plenty of NIOS cycles before actual handoff so its okay. } -} -#endif /* DDR2 */ - -#if LPDDR2 -static void rw_mgr_mem_handoff (void) -{ - uint32_t r; - - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ - set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - /* precharge all banks... */ - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_PRECHARGE_ALL); - - /* load up MR settings specified by user */ - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR1_USER); - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR2); - - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_MR3); - } } -#endif /* LPDDR2 */ -/* - * performs a guaranteed read on the patterns we are going to use during a - * read test to ensure memory works - */ -static uint32_t rw_mgr_mem_calibrate_read_test_patterns (uint32_t rank_bgn, - uint32_t group, uint32_t num_tries, t_btfld *bit_chk, uint32_t all_ranks) +//USER performs a guaranteed read on the patterns we are going to use during a read test to ensure memory works +static uint32_t rw_mgr_mem_calibrate_read_test_patterns(uint32_t rank_bgn, uint32_t group, + uint32_t num_tries, t_btfld * bit_chk, + uint32_t all_ranks) { uint32_t r, vg; t_btfld correct_mask_vg; t_btfld tmp_bit_chk; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : - (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + uint32_t rank_end = + all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : (rank_bgn + NUM_RANKS_PER_SHADOW_REG); *bit_chk = param->read_correct_mask; correct_mask_vg = param->read_correct_mask_vg; for (r = rank_bgn; r < rank_end; r++) { - /* set rank */ + if (param->skip_ranks[r]) { + //USER request to skip the rank + + continue; + } + //USER set rank set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); - /* Load up a constant bursts of read commands */ + //USER Load up a constant bursts of read commands IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, 0x20); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, - __RW_MGR_GUARANTEED_READ); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_GUARANTEED_READ); IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, 0x20); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, - __RW_MGR_GUARANTEED_READ_CONT); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_GUARANTEED_READ_CONT); tmp_bit_chk = 0; - for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { - /* reset the fifos to get pointers to known state */ + for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1;; vg--) { + //USER reset the fifos to get pointers to known state IOWR_32DIRECT(PHY_MGR_CMD_FIFO_RESET, 0, 0); IOWR_32DIRECT(RW_MGR_RESET_READ_DATAPATH, 0, 0); - tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS - / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); + tmp_bit_chk = + tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS / + RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, - ((group*RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + - vg) << 2), __RW_MGR_GUARANTEED_READ); - tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & - ~(IORD_32DIRECT(BASE_RW_MGR, 0))); + ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + vg) << 2), + __RW_MGR_GUARANTEED_READ); + tmp_bit_chk = + tmp_bit_chk | (correct_mask_vg & ~(IORD_32DIRECT(BASE_RW_MGR, 0))); - if (vg == 0) + if (vg == 0) { break; + } } *bit_chk &= tmp_bit_chk; } - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, (group << 2), - __RW_MGR_CLEAR_DQS_ENABLE); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, (group << 2), __RW_MGR_CLEAR_DQS_ENABLE); set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - pr_debug("test_load_patterns(%u,ALL) => (%u == %u) => %u\n", - group, *bit_chk, param->read_correct_mask, - (*bit_chk == param->read_correct_mask)); + DPRINT(2, "test_load_patterns(%lu,ALL) => (%lu == %lu) => %lu", group, *bit_chk, + param->read_correct_mask, (long unsigned int)(*bit_chk == param->read_correct_mask)); return (*bit_chk == param->read_correct_mask); } -static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks - (uint32_t group, uint32_t num_tries, t_btfld *bit_chk) +static uint32_t rw_mgr_mem_calibrate_read_test_patterns_all_ranks(uint32_t group, + uint32_t num_tries, + t_btfld * bit_chk) { if (rw_mgr_mem_calibrate_read_test_patterns(0, group, num_tries, bit_chk, 1)) { return 1; } else { - /* case:139851 - if guaranteed read fails, we can retry using - * different dqs enable phases. It is possible that with the - * initial phase, dqs enable is asserted/deasserted too close - * to an dqs edge, truncating the read burst. - */ + // case:139851 - if guaranteed read fails, we can retry using different dqs enable phases. + // It is possible that with the initial phase, dqs enable is asserted/deasserted too close + // to an dqs edge, truncating the read burst. uint32_t p; for (p = 0; p <= IO_DQS_EN_PHASE_MAX; p++) { - scc_mgr_set_dqs_en_phase_all_ranks (group, p); - if (rw_mgr_mem_calibrate_read_test_patterns(0, - group, num_tries, bit_chk, 1)) { + scc_mgr_set_dqs_en_phase_all_ranks(group, p); + if (rw_mgr_mem_calibrate_read_test_patterns + (0, group, num_tries, bit_chk, 1)) { return 1; } } - return 0; } } -/* load up the patterns we are going to use during a read test */ -static void rw_mgr_mem_calibrate_read_load_patterns (uint32_t rank_bgn, - uint32_t all_ranks) +//USER load up the patterns we are going to use during a read test +static void rw_mgr_mem_calibrate_read_load_patterns(uint32_t rank_bgn, uint32_t all_ranks) { uint32_t r; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : - (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + uint32_t rank_end = + all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + + TRACE_FUNC(); for (r = rank_bgn; r < rank_end; r++) { - /* set rank */ + if (param->skip_ranks[r]) { + //USER request to skip the rank + + continue; + } + //USER set rank set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); - /* Load up a constant bursts */ + //USER Load up a constant bursts IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, 0x20); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, - __RW_MGR_GUARANTEED_WRITE_WAIT0); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_GUARANTEED_WRITE_WAIT0); IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, 0x20); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, - __RW_MGR_GUARANTEED_WRITE_WAIT1); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_GUARANTEED_WRITE_WAIT1); -#if QUARTER_RATE - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, 0x01); -#endif -#if HALF_RATE - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, 0x02); -#endif -#if FULL_RATE IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, 0x04); -#endif - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, - __RW_MGR_GUARANTEED_WRITE_WAIT2); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_GUARANTEED_WRITE_WAIT2); -#if QUARTER_RATE - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_3, 0, 0x01); -#endif -#if HALF_RATE - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_3, 0, 0x02); -#endif -#if FULL_RATE IOWR_32DIRECT(RW_MGR_LOAD_CNTR_3, 0, 0x04); -#endif - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, - __RW_MGR_GUARANTEED_WRITE_WAIT3); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_GUARANTEED_WRITE_WAIT3); - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_GUARANTEED_WRITE); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_GUARANTEED_WRITE); } set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); } -static void rw_mgr_mem_calibrate_read_load_patterns_all_ranks (void) +static inline void rw_mgr_mem_calibrate_read_load_patterns_all_ranks(void) { - rw_mgr_mem_calibrate_read_load_patterns (0, 1); + rw_mgr_mem_calibrate_read_load_patterns(0, 1); } -/* - * try a read and see if it returns correct data back. has dummy reads - * inserted into the mix used to align dqs enable. has more thorough checks - * than the regular read test. - */ - -static uint32_t rw_mgr_mem_calibrate_read_test (uint32_t rank_bgn, uint32_t group, - uint32_t num_tries, uint32_t all_correct, t_btfld *bit_chk, - uint32_t all_groups, uint32_t all_ranks) +// pe checkout pattern for harden managers +//void pe_checkout_pattern (void) +//{ +// // test RW manager +// +// // do some reads to check load buffer +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_1, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_READ_B2B_WAIT1); +// +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_2, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_READ_B2B_WAIT2); +// +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_0, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_READ_B2B); +// +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_3, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_READ_B2B); +// +// // clear error word +// IOWR_32DIRECT (RW_MGR_RESET_READ_DATAPATH, 0, 0); +// +// IOWR_32DIRECT (RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_READ_B2B); +// +// uint32_t readdata; +// +// // read error word +// readdata = IORD_32DIRECT(BASE_RW_MGR, 0); +// +// // read DI buffer +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 0*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 1*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 2*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 3*4, 0); +// +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_1, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_READ_B2B_WAIT1); +// +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_2, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_READ_B2B_WAIT2); +// +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_0, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_READ_B2B); +// +// IOWR_32DIRECT (RW_MGR_LOAD_CNTR_3, 0, 0x0); +// IOWR_32DIRECT (RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_READ_B2B); +// +// // clear error word +// IOWR_32DIRECT (RW_MGR_RESET_READ_DATAPATH, 0, 0); +// +// // do read +// IOWR_32DIRECT (RW_MGR_LOOPBACK_MODE, 0, __RW_MGR_READ_B2B); +// +// // read error word +// readdata = IORD_32DIRECT(BASE_RW_MGR, 0); +// +// // error word should be 0x00 +// +// // read DI buffer +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 0*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 1*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 2*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 3*4, 0); +// +// // clear error word +// IOWR_32DIRECT (RW_MGR_RESET_READ_DATAPATH, 0, 0); +// +// // do dm read +// IOWR_32DIRECT (RW_MGR_LOOPBACK_MODE, 0, __RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1); +// +// // read error word +// readdata = IORD_32DIRECT(BASE_RW_MGR, 0); +// +// // error word should be ff +// +// // read DI buffer +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 0*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 1*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 2*4, 0); +// readdata = IORD_32DIRECT(RW_MGR_DI_BASE + 3*4, 0); +// +// // exit loopback mode +// IOWR_32DIRECT (BASE_RW_MGR, 0, __RW_MGR_IDLE_LOOP2); +// +// // start of phy manager access +// +// readdata = IORD_32DIRECT (PHY_MGR_MAX_RLAT_WIDTH, 0); +// readdata = IORD_32DIRECT (PHY_MGR_MAX_AFI_WLAT_WIDTH, 0); +// readdata = IORD_32DIRECT (PHY_MGR_MAX_AFI_RLAT_WIDTH, 0); +// readdata = IORD_32DIRECT (PHY_MGR_CALIB_SKIP_STEPS, 0); +// readdata = IORD_32DIRECT (PHY_MGR_CALIB_VFIFO_OFFSET, 0); +// readdata = IORD_32DIRECT (PHY_MGR_CALIB_LFIFO_OFFSET, 0); +// +// // start of data manager test +// +// readdata = IORD_32DIRECT (DATA_MGR_DRAM_CFG , 0); +// readdata = IORD_32DIRECT (DATA_MGR_MEM_T_WL , 0); +// readdata = IORD_32DIRECT (DATA_MGR_MEM_T_ADD , 0); +// readdata = IORD_32DIRECT (DATA_MGR_MEM_T_RL , 0); +// readdata = IORD_32DIRECT (DATA_MGR_MEM_T_RFC , 0); +// readdata = IORD_32DIRECT (DATA_MGR_MEM_T_REFI , 0); +// readdata = IORD_32DIRECT (DATA_MGR_MEM_T_WR , 0); +// readdata = IORD_32DIRECT (DATA_MGR_MEM_T_MRD , 0); +// readdata = IORD_32DIRECT (DATA_MGR_COL_WIDTH , 0); +// readdata = IORD_32DIRECT (DATA_MGR_ROW_WIDTH , 0); +// readdata = IORD_32DIRECT (DATA_MGR_BANK_WIDTH , 0); +// readdata = IORD_32DIRECT (DATA_MGR_CS_WIDTH , 0); +// readdata = IORD_32DIRECT (DATA_MGR_ITF_WIDTH , 0); +// readdata = IORD_32DIRECT (DATA_MGR_DVC_WIDTH , 0); +// +//} + +//USER try a read and see if it returns correct data back. has dummy reads inserted into the mix +//USER used to align dqs enable. has more thorough checks than the regular read test. + +static uint32_t rw_mgr_mem_calibrate_read_test(uint32_t rank_bgn, uint32_t group, + uint32_t num_tries, uint32_t all_correct, + t_btfld * bit_chk, uint32_t all_groups, + uint32_t all_ranks) { uint32_t r, vg; - uint32_t quick_read_mode; t_btfld correct_mask_vg; t_btfld tmp_bit_chk; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : - (rank_bgn + NUM_RANKS_PER_SHADOW_REG); - + uint32_t rank_end = + all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : (rank_bgn + NUM_RANKS_PER_SHADOW_REG); *bit_chk = param->read_correct_mask; correct_mask_vg = param->read_correct_mask_vg; - quick_read_mode = (((STATIC_CALIB_STEPS) & - CALIB_SKIP_DELAY_SWEEPS) && ENABLE_SUPER_QUICK_CALIBRATION) || - BFM_MODE; + uint32_t quick_read_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_DELAY_SWEEPS) + && ENABLE_SUPER_QUICK_CALIBRATION) || BFM_MODE; for (r = rank_bgn; r < rank_end; r++) { - /* set rank */ + if (param->skip_ranks[r]) { + //USER request to skip the rank + + continue; + } + //USER set rank set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, 0x10); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, - __RW_MGR_READ_B2B_WAIT1); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_READ_B2B_WAIT1); IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, 0x10); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, - __RW_MGR_READ_B2B_WAIT2); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_READ_B2B_WAIT2); if (quick_read_mode) { - IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, 0x1); - /* need at least two (1+1) reads to capture failures */ + IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, 0x1); /* need at least two (1+1) reads to capture failures */ } else if (all_groups) { IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, 0x06); } else { @@ -1837,29 +1792,29 @@ static uint32_t rw_mgr_mem_calibrate_read_test (uint32_t rank_bgn, uint32_t grou IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_READ_B2B); if (all_groups) { IOWR_32DIRECT(RW_MGR_LOAD_CNTR_3, 0, - RW_MGR_MEM_IF_READ_DQS_WIDTH * - RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1); + RW_MGR_MEM_IF_READ_DQS_WIDTH * + RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1); } else { IOWR_32DIRECT(RW_MGR_LOAD_CNTR_3, 0, 0x0); } IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_READ_B2B); tmp_bit_chk = 0; - for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS-1; ; vg--) { - /* reset the fifos to get pointers to known state */ + for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS - 1;; vg--) { + //USER reset the fifos to get pointers to known state IOWR_32DIRECT(PHY_MGR_CMD_FIFO_RESET, 0, 0); IOWR_32DIRECT(RW_MGR_RESET_READ_DATAPATH, 0, 0); - tmp_bit_chk = tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS - / RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); + tmp_bit_chk = + tmp_bit_chk << (RW_MGR_MEM_DQ_PER_READ_DQS / + RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS); - IOWR_32DIRECT(all_groups ? RW_MGR_RUN_ALL_GROUPS : - RW_MGR_RUN_SINGLE_GROUP, ((group * - RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS+vg) - << 2), __RW_MGR_READ_B2B); - tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & - ~(IORD_32DIRECT(BASE_RW_MGR, 0))); + IOWR_32DIRECT(all_groups ? RW_MGR_RUN_ALL_GROUPS : RW_MGR_RUN_SINGLE_GROUP, + ((group * RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS + vg) << 2), + __RW_MGR_READ_B2B); + tmp_bit_chk = + tmp_bit_chk | (correct_mask_vg & ~(IORD_32DIRECT(BASE_RW_MGR, 0))); if (vg == 0) { break; @@ -1868,36 +1823,34 @@ static uint32_t rw_mgr_mem_calibrate_read_test (uint32_t rank_bgn, uint32_t grou *bit_chk &= tmp_bit_chk; } - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, (group << 2), - __RW_MGR_CLEAR_DQS_ENABLE); + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, (group << 2), __RW_MGR_CLEAR_DQS_ENABLE); if (all_correct) { set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - pr_debug("read_test(%u,ALL,%u) => (%u == %u) => %u\n", - group, all_groups, *bit_chk, param->read_correct_mask, - (*bit_chk == - param->read_correct_mask)); + DPRINT(2, "read_test(%lu,ALL,%lu) => (%lu == %lu) => %lu", group, all_groups, + *bit_chk, param->read_correct_mask, + (long unsigned int)(*bit_chk == param->read_correct_mask)); return (*bit_chk == param->read_correct_mask); - } else { + } else { set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - pr_debug("read_test(%u,ONE,%u) => (%u != %u) => %u\n", - group, all_groups, *bit_chk, 0, - (*bit_chk != 0x00)); + DPRINT(2, "read_test(%lu,ONE,%lu) => (%lu != %lu) => %lu", group, all_groups, + *bit_chk, (long unsigned int)0, (long unsigned int)(*bit_chk != 0x00)); return (*bit_chk != 0x00); } } -static uint32_t rw_mgr_mem_calibrate_read_test_all_ranks (uint32_t group, - uint32_t num_tries, uint32_t all_correct, t_btfld *bit_chk, - uint32_t all_groups) +static inline uint32_t rw_mgr_mem_calibrate_read_test_all_ranks(uint32_t group, uint32_t num_tries, + uint32_t all_correct, + t_btfld * bit_chk, + uint32_t all_groups) { - return rw_mgr_mem_calibrate_read_test (0, group, num_tries, all_correct, - bit_chk, all_groups, 1); + return rw_mgr_mem_calibrate_read_test(0, group, num_tries, all_correct, bit_chk, all_groups, + 1); } -static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) +static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t * v) { - /* fiddle with FIFO */ + //USER fiddle with FIFO if (HARD_PHY) { IOWR_32DIRECT(PHY_MGR_CMD_INC_VFIFO_HARD_PHY, 0, grp); } else if (QUARTER_RATE_MODE && !HARD_VFIFO) { @@ -1911,8 +1864,7 @@ static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) IOWR_32DIRECT(PHY_MGR_CMD_INC_VFIFO_FR, 0, grp); } } else if (HARD_VFIFO) { - /* Arria V & Cyclone V have a hard full-rate VFIFO that only - has a single incr signal */ + // Arria V & Cyclone V have a hard full-rate VFIFO that only has a single incr signal IOWR_32DIRECT(PHY_MGR_CMD_INC_VFIFO_FR, 0, grp); } else { if (!HALF_RATE_MODE || (*v & 1) == 1) { @@ -1923,22 +1875,44 @@ static void rw_mgr_incr_vfifo(uint32_t grp, uint32_t *v) } (*v)++; + BFM_INC_VFIFO; +} + +//Used in quick cal to properly loop through the duplicated VFIFOs in AV QDRII/RLDRAM +static inline void rw_mgr_incr_vfifo_all(uint32_t grp, uint32_t * v) +{ +#if VFIFO_CONTROL_WIDTH_PER_DQS == 1 + rw_mgr_incr_vfifo(grp, v); +#else + uint32_t i; + for (i = 0; i < VFIFO_CONTROL_WIDTH_PER_DQS; i++) { + rw_mgr_incr_vfifo(grp * VFIFO_CONTROL_WIDTH_PER_DQS + i, v); + if (i != 0) { + (*v)--; + } + } +#endif } -static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t *v) +static void rw_mgr_decr_vfifo(uint32_t grp, uint32_t * v) { uint32_t i; - for (i = 0; i < VFIFO_SIZE-1; i++) { + for (i = 0; i < VFIFO_SIZE - 1; i++) { rw_mgr_incr_vfifo(grp, v); } } -/* find a good dqs enable to use */ -static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) +//USER find a good dqs enable to use + +#if NEWVERSION_DQSEN + +// Navid's version + +static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) { - uint32_t i, d, v, p; + uint32_t i, d, v, p, sr; uint32_t max_working_cnt; uint32_t fail_cnt; t_btfld bit_chk; @@ -1948,6 +1922,8 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) uint32_t test_status; uint32_t found_passing_read, found_failing_read, initial_failing_dtap; + TRACE_FUNC("%lu", grp); + BFM_STAGE("find_dqs_en_phase"); ALTERA_ASSERT(grp < RW_MGR_MEM_IF_READ_DQS_WIDTH); reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); @@ -1957,8 +1933,8 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) fail_cnt = 0; - /* ************************************************************** */ - /* * Step 0 : Determine number of delay taps for each phase tap * */ + //USER ************************************************************** + //USER * Step 0 : Determine number of delay taps for each phase tap * dtaps_per_ptap = 0; tmp_delay = 0; @@ -1969,51 +1945,52 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) dtaps_per_ptap--; ALTERA_ASSERT(dtaps_per_ptap <= IO_DQS_EN_DELAY_MAX); tmp_delay = 0; + TCLRPT_SET(debug_summary_report->computed_dtap_per_ptap, dtaps_per_ptap); + + // VFIFO sweep - /* ********************************************************* */ - /* * Step 1 : First push vfifo until we get a failing read * */ - for (v = 0; v < VFIFO_SIZE; ) { - pr_debug("find_dqs_en_phase: vfifo %u\n", vfifo_idx); - test_status = rw_mgr_mem_calibrate_read_test_all_ranks - (grp, 1, PASS_ONE_BIT, &bit_chk, 0); + //USER ********************************************************* + //USER * Step 1 : First push vfifo until we get a failing read * + for (v = 0; v < VFIFO_SIZE;) { + DPRINT(2, "find_dqs_en_phase: vfifo %lu", BFM_GBL_GET(vfifo_idx)); + test_status = + rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, &bit_chk, 0); if (!test_status) { fail_cnt++; - if (fail_cnt == 2) + if (fail_cnt == 2) { break; + } } - - /* fiddle with FIFO */ + //USER fiddle with FIFO rw_mgr_incr_vfifo(grp, &v); } if (v >= VFIFO_SIZE) { - /* no failing read found!! Something must have gone wrong */ - pr_debug("find_dqs_en_phase: vfifo failed\n"); + //USER no failing read found!! Something must have gone wrong + DPRINT(2, "find_dqs_en_phase: vfifo failed"); return 0; } max_working_cnt = 0; - /* ******************************************************** */ - /* * step 2: find first working phase, increment in ptaps * */ + //USER ******************************************************** + //USER * step 2: find first working phase, increment in ptaps * found_begin = 0; work_bgn = 0; - for (d = 0; d <= dtaps_per_ptap; d++, tmp_delay += - IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { + for (d = 0; d <= dtaps_per_ptap; d++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { work_bgn = tmp_delay; scc_mgr_set_dqs_en_delay_all_ranks(grp, d); for (i = 0; i < VFIFO_SIZE; i++) { - for (p = 0; p <= IO_DQS_EN_PHASE_MAX; p++, work_bgn += - IO_DELAY_PER_OPA_TAP) { - pr_debug("find_dqs_en_phase: begin: vfifo=%u" - " ptap=%u dtap=%u\n", vfifo_idx, p, d); + for (p = 0; p <= IO_DQS_EN_PHASE_MAX; p++, work_bgn += IO_DELAY_PER_OPA_TAP) { + DPRINT(2, "find_dqs_en_phase: begin: vfifo=%lu ptap=%lu dtap=%lu", + BFM_GBL_GET(vfifo_idx), p, d); scc_mgr_set_dqs_en_phase_all_ranks(grp, p); test_status = - rw_mgr_mem_calibrate_read_test_all_ranks - (grp, 1, PASS_ONE_BIT, &bit_chk, 0); + rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, + &bit_chk, 0); if (test_status) { max_working_cnt = 1; @@ -2022,38 +1999,39 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) } } - if (found_begin) + if (found_begin) { break; + } if (p > IO_DQS_EN_PHASE_MAX) { - /* fiddle with FIFO */ + //USER fiddle with FIFO rw_mgr_incr_vfifo(grp, &v); } } - if (found_begin) + if (found_begin) { break; + } } if (i >= VFIFO_SIZE) { - /* cannot find working solution */ - pr_debug("find_dqs_en_phase: no vfifo/ptap/dtap\n"); + //USER cannot find working solution + DPRINT(2, "find_dqs_en_phase: no vfifo/ptap/dtap"); return 0; } work_end = work_bgn; - /* If d is 0 then the working window covers a phase tap and - we can follow the old procedure otherwise, we've found the beginning, - and we need to increment the dtaps until we find the end */ + //USER If d is 0 then the working window covers a phase tap and we can follow the old procedure + //USER otherwise, we've found the beginning, and we need to increment the dtaps until we find the end if (d == 0) { - /* ********************************************************* */ - /* * step 3a: if we have room, back off by one and - increment in dtaps * */ + //USER ******************************************************************** + //USER * step 3a: if we have room, back off by one and increment in dtaps * + COV(EN_PHASE_PTAP_OVERLAP); - /* Special case code for backing up a phase */ + //USER Special case code for backing up a phase if (p == 0) { - p = IO_DQS_EN_PHASE_MAX ; + p = IO_DQS_EN_PHASE_MAX; rw_mgr_decr_vfifo(grp, &v); } else { p = p - 1; @@ -2063,28 +2041,55 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) found_begin = 0; for (d = 0; d <= IO_DQS_EN_DELAY_MAX && tmp_delay < work_bgn; - d++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { + d++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { - pr_debug("find_dqs_en_phase: begin-2: vfifo=%u " - "ptap=%u dtap=%u\n", vfifo_idx, p, d); + DPRINT(2, "find_dqs_en_phase: begin-2: vfifo=%lu ptap=%lu dtap=%lu", + BFM_GBL_GET(vfifo_idx), p, d); scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - if (rw_mgr_mem_calibrate_read_test_all_ranks (grp, 1, - PASS_ONE_BIT, &bit_chk, 0)) { + if (rw_mgr_mem_calibrate_read_test_all_ranks + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { found_begin = 1; work_bgn = tmp_delay; break; } } - /* We have found a working dtap before the ptap found above */ + // Record the debug data + // Currently dqsen is the same for all ranks + for (sr = 0; sr < NUM_SHADOW_REGS; sr++) { + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].work_begin, + work_bgn); + if (found_begin) { + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].phase_begin, + p); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].delay_begin, + d); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].vfifo_begin, + v % VFIFO_SIZE); + } else if (p == IO_DQS_EN_PHASE_MAX) { + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].phase_begin, + 0); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].delay_begin, + 0); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].vfifo_begin, + (v + 1) % VFIFO_SIZE); + } else { + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].phase_begin, + p + 1); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].delay_begin, + 0); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].vfifo_begin, + v % VFIFO_SIZE); + } + } + + //USER We have found a working dtap before the ptap found above if (found_begin == 1) { max_working_cnt++; } - - /* Restore VFIFO to old state before we decremented it - (if needed) */ + //USER Restore VFIFO to old state before we decremented it (if needed) p = p + 1; if (p > IO_DQS_EN_PHASE_MAX) { p = 0; @@ -2093,54 +2098,52 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); - /* ********************************************************* */ - /* * step 4a: go forward from working phase to non working - phase, increment in ptaps * */ + //USER *********************************************************************************** + //USER * step 4a: go forward from working phase to non working phase, increment in ptaps * p = p + 1; work_end += IO_DELAY_PER_OPA_TAP; if (p > IO_DQS_EN_PHASE_MAX) { - /* fiddle with FIFO */ + //USER fiddle with FIFO p = 0; rw_mgr_incr_vfifo(grp, &v); } found_end = 0; for (; i < VFIFO_SIZE + 1; i++) { - for (; p <= IO_DQS_EN_PHASE_MAX; p++, work_end - += IO_DELAY_PER_OPA_TAP) { - pr_debug("find_dqs_en_phase: end: vfifo=%u " - "ptap=%u dtap=%u\n", vfifo_idx, p, 0); + for (; p <= IO_DQS_EN_PHASE_MAX; p++, work_end += IO_DELAY_PER_OPA_TAP) { + DPRINT(2, "find_dqs_en_phase: end: vfifo=%lu ptap=%lu dtap=%lu", + BFM_GBL_GET(vfifo_idx), p, (long unsigned int)0); scc_mgr_set_dqs_en_phase_all_ranks(grp, p); if (!rw_mgr_mem_calibrate_read_test_all_ranks - (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { found_end = 1; break; + } else { + max_working_cnt++; } - - max_working_cnt++; } - if (found_end) + if (found_end) { break; + } if (p > IO_DQS_EN_PHASE_MAX) { - /* fiddle with FIFO */ + //USER fiddle with FIFO rw_mgr_incr_vfifo(grp, &v); p = 0; } } if (i >= VFIFO_SIZE + 1) { - /* cannot see edge of failing read */ - pr_debug("find_dqs_en_phase: end: failed\n"); + //USER cannot see edge of failing read + DPRINT(2, "find_dqs_en_phase: end: failed"); return 0; } + //USER ********************************************************* + //USER * step 5a: back off one from last, increment in dtaps * - /* ********************************************************* */ - /* * step 5a: back off one from last, increment in dtaps * */ - - /* Special case code for backing up a phase */ + //USER Special case code for backing up a phase if (p == 0) { p = IO_DQS_EN_PHASE_MAX; rw_mgr_decr_vfifo(grp, &v); @@ -2151,134 +2154,134 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) work_end -= IO_DELAY_PER_OPA_TAP; scc_mgr_set_dqs_en_phase_all_ranks(grp, p); - /* * The actual increment of dtaps is done outside of - the if/else loop to share code */ + //USER * The actual increment of dtaps is done outside of the if/else loop to share code d = 0; - pr_debug("find_dqs_en_phase: found end v/p: vfifo=%u ptap=%u\n", - vfifo_idx, p); + DPRINT(2, "find_dqs_en_phase: found end v/p: vfifo=%lu ptap=%lu", + BFM_GBL_GET(vfifo_idx), p); } else { - /* ******************************************************* */ - /* * step 3-5b: Find the right edge of the window using - delay taps * */ + //USER ******************************************************************** + //USER * step 3-5b: Find the right edge of the window using delay taps * + COV(EN_PHASE_PTAP_NO_OVERLAP); - pr_debug("find_dqs_en_phase: begin found: vfifo=%u ptap=%u " - "dtap=%u begin=%u\n", vfifo_idx, p, d, - work_bgn); + DPRINT(2, "find_dqs_en_phase: begin found: vfifo=%lu ptap=%lu dtap=%lu begin=%lu", + BFM_GBL_GET(vfifo_idx), p, d, work_bgn); + BFM_GBL_SET(dqs_enable_left_edge[grp].v, BFM_GBL_GET(vfifo_idx)); + BFM_GBL_SET(dqs_enable_left_edge[grp].p, p); + BFM_GBL_SET(dqs_enable_left_edge[grp].d, d); + BFM_GBL_SET(dqs_enable_left_edge[grp].ps, work_bgn); work_end = work_bgn; - /* * The actual increment of dtaps is done outside of the - if/else loop to share code */ + //USER * The actual increment of dtaps is done outside of the if/else loop to share code - /* Only here to counterbalance a subtract later on which is - not needed if this branch of the algorithm is taken */ + //USER Only here to counterbalance a subtract later on which is not needed if this branch + //USER of the algorithm is taken max_working_cnt++; } - /* The dtap increment to find the failing edge is done here */ - for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += - IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { + //USER The dtap increment to find the failing edge is done here + for (; d <= IO_DQS_EN_DELAY_MAX; d++, work_end += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) { - pr_debug("find_dqs_en_phase: end-2: dtap=%u\n", d); - scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + DPRINT(2, "find_dqs_en_phase: end-2: dtap=%lu", d); + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - if (!rw_mgr_mem_calibrate_read_test_all_ranks (grp, 1, - PASS_ONE_BIT, &bit_chk, 0)) { - break; - } + if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + break; } + } - /* Go back to working dtap */ + //USER Go back to working dtap if (d != 0) { work_end -= IO_DELAY_PER_DQS_EN_DCHAIN_TAP; } - pr_debug("find_dqs_en_phase: found end v/p/d: vfifo=%u ptap=%u " - "dtap=%u end=%u\n", vfifo_idx, p, d-1, work_end); + DPRINT(2, "find_dqs_en_phase: found end v/p/d: vfifo=%lu ptap=%lu dtap=%lu end=%lu", + BFM_GBL_GET(vfifo_idx), p, d - 1, work_end); + BFM_GBL_SET(dqs_enable_right_edge[grp].v, BFM_GBL_GET(vfifo_idx)); + BFM_GBL_SET(dqs_enable_right_edge[grp].p, p); + BFM_GBL_SET(dqs_enable_right_edge[grp].d, d - 1); + BFM_GBL_SET(dqs_enable_right_edge[grp].ps, work_end); + + // Record the debug data + for (sr = 0; sr < NUM_SHADOW_REGS; sr++) { + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].work_end, work_end); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].phase_end, p); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].delay_end, d - 1); + TCLRPT_SET(debug_cal_report->cal_dqsen_margins[sr][grp].vfifo_end, v % VFIFO_SIZE); + } if (work_end >= work_bgn) { - /* we have a working range */ + //USER we have a working range } else { - /* nil range */ - pr_debug("find_dqs_en_phase: end-2: failed\n"); + //USER nil range + DPRINT(2, "find_dqs_en_phase: end-2: failed"); return 0; } - pr_debug("find_dqs_en_phase: found range [%u,%u]\n", - work_bgn, work_end); + DPRINT(2, "find_dqs_en_phase: found range [%lu,%lu]", work_bgn, work_end); -#if USE_DQS_TRACKING - /* *************************************************************** */ - /* - * * We need to calculate the number of dtaps that equal a ptap - * * To do that we'll back up a ptap and re-find the edge of the - * * window using dtaps - */ + // *************************************************************** + //USER * We need to calculate the number of dtaps that equal a ptap + //USER * To do that we'll back up a ptap and re-find the edge of the + //USER * window using dtaps - pr_debug("find_dqs_en_phase: calculate dtaps_per_ptap for tracking\n"); + DPRINT(2, "find_dqs_en_phase: calculate dtaps_per_ptap for tracking"); - /* Special case code for backing up a phase */ + //USER Special case code for backing up a phase if (p == 0) { p = IO_DQS_EN_PHASE_MAX; rw_mgr_decr_vfifo(grp, &v); - pr_debug("find_dqs_en_phase: backed up cycle/phase: " - "v=%u p=%u\n", vfifo_idx, p); + DPRINT(2, "find_dqs_en_phase: backed up cycle/phase: v=%lu p=%lu", + BFM_GBL_GET(vfifo_idx), p); } else { p = p - 1; - pr_debug("find_dqs_en_phase: backed up phase only: v=%u " - "p=%u\n", vfifo_idx, p); + DPRINT(2, "find_dqs_en_phase: backed up phase only: v=%lu p=%lu", + BFM_GBL_GET(vfifo_idx), p); } scc_mgr_set_dqs_en_phase_all_ranks(grp, p); - /* - * Increase dtap until we first see a passing read (in case the - * window is smaller than a ptap), - * and then a failing read to mark the edge of the window again - */ + //USER Increase dtap until we first see a passing read (in case the window is smaller than a ptap), + //USER and then a failing read to mark the edge of the window again - /* Find a passing read */ - pr_debug("find_dqs_en_phase: find passing read\n"); + //USER Find a passing read + DPRINT(2, "find_dqs_en_phase: find passing read"); found_passing_read = 0; found_failing_read = 0; initial_failing_dtap = d; for (; d <= IO_DQS_EN_DELAY_MAX; d++) { - pr_debug("find_dqs_en_phase: testing read d=%u\n", d); + DPRINT(2, "find_dqs_en_phase: testing read d=%lu", d); scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - if (rw_mgr_mem_calibrate_read_test_all_ranks (grp, 1, - PASS_ONE_BIT, &bit_chk, 0)) { + if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { found_passing_read = 1; break; } } if (found_passing_read) { - /* Find a failing read */ - pr_debug("find_dqs_en_phase: find failing read\n"); + //USER Find a failing read + DPRINT(2, "find_dqs_en_phase: find failing read"); for (d = d + 1; d <= IO_DQS_EN_DELAY_MAX; d++) { - pr_debug("find_dqs_en_phase: testing read d=%u\n", d); + DPRINT(2, "find_dqs_en_phase: testing read d=%lu", d); scc_mgr_set_dqs_en_delay_all_ranks(grp, d); if (!rw_mgr_mem_calibrate_read_test_all_ranks - (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { found_failing_read = 1; break; } } } else { - pr_debug("find_dqs_en_phase: failed to calculate dtaps " - "per ptap. Fall back on static value\n"); + DPRINT(1, + "find_dqs_en_phase: failed to calculate dtaps per ptap. Fall back on static value"); } - /* - * The dynamically calculated dtaps_per_ptap is only valid if we - * found a passing/failing read. If we didn't, it means d hit the max - * (IO_DQS_EN_DELAY_MAX). Otherwise, dtaps_per_ptap retains its - * statically calculated value. - */ + //USER The dynamically calculated dtaps_per_ptap is only valid if we found a passing/failing read + //USER If we didn't, it means d hit the max (IO_DQS_EN_DELAY_MAX). + //USER Otherwise, dtaps_per_ptap retains its statically calculated value. if (found_passing_read && found_failing_read) { dtaps_per_ptap = d - initial_failing_dtap; } @@ -2286,233 +2289,657 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase (uint32_t grp) ALTERA_ASSERT(dtaps_per_ptap <= IO_DQS_EN_DELAY_MAX); IOWR_32DIRECT(REG_FILE_DTAPS_PER_PTAP, 0, dtaps_per_ptap); - pr_debug("find_dqs_en_phase: dtaps_per_ptap=%u - %u = %u\n", d, - initial_failing_dtap, dtaps_per_ptap); -#endif + DPRINT(2, "find_dqs_en_phase: dtaps_per_ptap=%lu - %lu = %lu", d, initial_failing_dtap, + dtaps_per_ptap); - /* ******************************************** */ - /* * step 6: Find the centre of the window * */ + //USER ******************************************** + //USER * step 6: Find the centre of the window * work_mid = (work_bgn + work_end) / 2; tmp_delay = 0; - pr_debug("work_bgn=%d work_end=%d work_mid=%d\n", work_bgn, - work_end, work_mid); - /* Get the middle delay to be less than a VFIFO delay */ - for (p = 0; p <= IO_DQS_EN_PHASE_MAX; - p++, tmp_delay += IO_DELAY_PER_OPA_TAP) - ; - pr_debug("vfifo ptap delay %d\n", tmp_delay); + DPRINT(2, "work_bgn=%ld work_end=%ld work_mid=%ld", work_bgn, work_end, work_mid); + //USER Get the middle delay to be less than a VFIFO delay + for (p = 0; p <= IO_DQS_EN_PHASE_MAX; p++, tmp_delay += IO_DELAY_PER_OPA_TAP) ; + DPRINT(2, "vfifo ptap delay %ld", tmp_delay); while (work_mid > tmp_delay) work_mid -= tmp_delay; - pr_debug("new work_mid %d\n", work_mid); + DPRINT(2, "new work_mid %ld", work_mid); tmp_delay = 0; for (p = 0; p <= IO_DQS_EN_PHASE_MAX && tmp_delay < work_mid; - p++, tmp_delay += IO_DELAY_PER_OPA_TAP) - ; + p++, tmp_delay += IO_DELAY_PER_OPA_TAP) ; tmp_delay -= IO_DELAY_PER_OPA_TAP; - pr_debug("new p %d, tmp_delay=%d\n", p-1, tmp_delay); - for (d = 0; d <= IO_DQS_EN_DELAY_MAX && tmp_delay < work_mid; d++, - tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - ; - pr_debug("new d %d, tmp_delay=%d\n", d, tmp_delay); + DPRINT(2, "new p %ld, tmp_delay=%ld", p - 1, tmp_delay); + for (d = 0; d <= IO_DQS_EN_DELAY_MAX && tmp_delay < work_mid; + d++, tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) ; + DPRINT(2, "new d %ld, tmp_delay=%ld", d, tmp_delay); + + // DQSEN same for all shadow reg + for (sr = 0; sr < NUM_SHADOW_REGS; sr++) { + TCLRPT_SET(debug_cal_report->cal_dqs_in_margins[sr][grp].dqsen_margin, + max_working_cnt - 1); + } - scc_mgr_set_dqs_en_phase_all_ranks(grp, p-1); + scc_mgr_set_dqs_en_phase_all_ranks(grp, p - 1); scc_mgr_set_dqs_en_delay_all_ranks(grp, d); - /* push vfifo until we can successfully calibrate. We can do this - because the largest possible margin in 1 VFIFO cycle */ + //USER push vfifo until we can successfully calibrate. We can do this because + //USER the largest possible margin in 1 VFIFO cycle for (i = 0; i < VFIFO_SIZE; i++) { - pr_debug("find_dqs_en_phase: center: vfifo=%u\n", vfifo_idx); - if (rw_mgr_mem_calibrate_read_test_all_ranks (grp, 1, - PASS_ONE_BIT, &bit_chk, 0)) { + DPRINT(2, "find_dqs_en_phase: center: vfifo=%lu", BFM_GBL_GET(vfifo_idx)); + if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { break; } - - /* fiddle with FIFO */ + //USER fiddle with FIFO rw_mgr_incr_vfifo(grp, &v); } if (i >= VFIFO_SIZE) { - pr_debug("find_dqs_en_phase: center: failed\n"); + DPRINT(2, "find_dqs_en_phase: center: failed"); return 0; } - pr_debug("find_dqs_en_phase: center found: vfifo=%u ptap=%u " - "dtap=%u\n", vfifo_idx, p-1, d); + DPRINT(2, "find_dqs_en_phase: center found: vfifo=%li ptap=%lu dtap=%lu", + BFM_GBL_GET(vfifo_idx), p - 1, d); + BFM_GBL_SET(dqs_enable_mid[grp].v, BFM_GBL_GET(vfifo_idx)); + BFM_GBL_SET(dqs_enable_mid[grp].p, p - 1); + BFM_GBL_SET(dqs_enable_mid[grp].d, d); + BFM_GBL_SET(dqs_enable_mid[grp].ps, work_mid); return 1; } -/* Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different -dq_in_delay values */ -static uint32_t -rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay(uint32_t write_group, uint32_t read_group, uint32_t test_bgn) -{ -#if STRATIXV || ARRIAV || CYCLONEV || ARRIAVGZ - uint32_t found; - uint32_t i; - uint32_t p; - uint32_t d; - uint32_t r; - const uint32_t delay_step = IO_IO_IN_DELAY_MAX / (RW_MGR_MEM_DQ_PER_READ_DQS - 1); - /* we start at zero, so have one less dq to devide among */ +#if 0 +// Ryan's algorithm - /* try different dq_in_delays since the dq path is shorter than dqs */ +static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) +{ + uint32_t i, d, v, p; + uint32_t min_working_p, max_working_p, min_working_d, max_working_d, max_working_cnt; + uint32_t fail_cnt; + t_btfld bit_chk; + uint32_t dtaps_per_ptap; + uint32_t found_begin, found_end; + uint32_t tmp_delay; - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { - for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; - i++, p++, d += delay_step) { - pr_debug("rw_mgr_mem_calibrate_vfifo_find_dqs_" - "en_phase_sweep_dq_in_delay: g=%u/%u " - "r=%u, i=%u p=%u d=%u\n", - write_group, read_group, r, i, p, d); - scc_mgr_set_dq_in_delay(write_group, p, d); - scc_mgr_load_dq (p); - } - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - } + TRACE_FUNC("%lu", grp); - found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); + reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); - pr_debug("rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq" - "_in_delay: g=%u/%u found=%u; Reseting delay chain to zero\n", - write_group, read_group, found); + scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); + scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); - for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; - r += NUM_RANKS_PER_SHADOW_REG) { - for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; - i++, p++) { - scc_mgr_set_dq_in_delay(write_group, p, 0); - scc_mgr_load_dq (p); - } - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - } + fail_cnt = 0; - return found; -#else - return rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); -#endif -} + //USER ************************************************************** + //USER * Step 0 : Determine number of delay taps for each phase tap * -/* per-bit deskew DQ and center */ -static uint32_t rw_mgr_mem_calibrate_vfifo_center (uint32_t rank_bgn, - uint32_t write_group, uint32_t read_group, uint32_t test_bgn, - uint32_t use_read_test, uint32_t update_fom) -{ - uint32_t i, p, d, min_index; - /* Store these as signed since there are comparisons with - signed numbers */ - t_btfld bit_chk; - t_btfld sticky_bit_chk; - int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; - int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; - int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; - int32_t mid; - int32_t orig_mid_min, mid_min; - int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, - final_dqs_en; - int32_t dq_margin, dqs_margin; - uint32_t stop; + dtaps_per_ptap = 0; + tmp_delay = 0; + while (tmp_delay < IO_DELAY_PER_OPA_TAP) { + dtaps_per_ptap++; + tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP; + } + dtaps_per_ptap--; - ALTERA_ASSERT(read_group < RW_MGR_MEM_IF_READ_DQS_WIDTH); - ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); + //USER ********************************************************* + //USER * Step 1 : First push vfifo until we get a failing read * + for (v = 0; v < VFIFO_SIZE;) { + if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + fail_cnt++; - start_dqs = READ_SCC_DQS_IN_DELAY(read_group); - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { - start_dqs_en = READ_SCC_DQS_EN_DELAY(read_group); + if (fail_cnt == 2) { + break; + } + } + //USER fiddle with FIFO + rw_mgr_incr_vfifo(grp, &v); } - /* per-bit deskew */ - - /* set the left and right edge of each bit to an illegal value */ - /* use (IO_IO_IN_DELAY_MAX + 1) as an illegal value */ - sticky_bit_chk = 0; - for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { - left_edge[i] = IO_IO_IN_DELAY_MAX + 1; - right_edge[i] = IO_IO_IN_DELAY_MAX + 1; + if (i >= VFIFO_SIZE) { + //USER no failing read found!! Something must have gone wrong + return 0; } - /* Search for the left edge of the window for each bit */ - for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { - scc_mgr_apply_group_dq_in_delay (write_group, test_bgn, d); + max_working_cnt = 0; + min_working_p = 0; - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + //USER ******************************************************** + //USER * step 2: find first working phase, increment in ptaps * + found_begin = 0; + for (d = 0; d <= dtaps_per_ptap; d++) { + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + for (i = 0; i < VFIFO_SIZE; i++) { + for (p = 0; p <= IO_DQS_EN_PHASE_MAX; p++) { + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + + if (rw_mgr_mem_calibrate_read_test_all_ranks + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + max_working_cnt = 1; + found_begin = 1; + break; + } + } + + if (found_begin) { + break; + } + + if (p > IO_DQS_EN_PHASE_MAX) { + //USER fiddle with FIFO + rw_mgr_incr_vfifo(grp, &v); + } + } + + if (found_begin) { + break; + } + } + + if (i >= VFIFO_SIZE) { + //USER cannot find working solution + return 0; + } + + min_working_p = p; + + //USER If d is 0 then the working window covers a phase tap and we can follow the old procedure + //USER otherwise, we've found the beginning, and we need to increment the dtaps until we find the end + if (d == 0) { + //USER ******************************************************************** + //USER * step 3a: if we have room, back off by one and increment in dtaps * + min_working_d = 0; + + //USER Special case code for backing up a phase + if (p == 0) { + p = IO_DQS_EN_PHASE_MAX; + rw_mgr_decr_vfifo(grp, &v); + } else { + p = p - 1; + } + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + + found_begin = 0; + for (d = 0; d <= dtaps_per_ptap; d++) { + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + if (rw_mgr_mem_calibrate_read_test_all_ranks + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + found_begin = 1; + min_working_d = d; + break; + } + } + + //USER We have found a working dtap before the ptap found above + if (found_begin == 1) { + min_working_p = p; + max_working_cnt++; + } + //USER Restore VFIFO to old state before we decremented it + p = p + 1; + if (p > IO_DQS_EN_PHASE_MAX) { + p = 0; + rw_mgr_incr_vfifo(grp, &v); + } + + scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); + + //USER *********************************************************************************** + //USER * step 4a: go forward from working phase to non working phase, increment in ptaps * + p = p + 1; + if (p > IO_DQS_EN_PHASE_MAX) { + //USER fiddle with FIFO + p = 0; + rw_mgr_incr_vfifo(grp, &v); + } + + found_end = 0; + for (; i < VFIFO_SIZE + 1; i++) { + for (; p <= IO_DQS_EN_PHASE_MAX; p++) { + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + + if (!rw_mgr_mem_calibrate_read_test_all_ranks + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + found_end = 1; + break; + } else { + max_working_cnt++; + } + } + + if (found_end) { + break; + } + + if (p > IO_DQS_EN_PHASE_MAX) { + //USER fiddle with FIFO + rw_mgr_incr_vfifo(grp, &v); + p = 0; + } + } + + if (i >= VFIFO_SIZE + 1) { + //USER cannot see edge of failing read + return 0; + } + //USER ********************************************************* + //USER * step 5a: back off one from last, increment in dtaps * + max_working_d = 0; + + //USER Special case code for backing up a phase + if (p == 0) { + p = IO_DQS_EN_PHASE_MAX; + rw_mgr_decr_vfifo(grp, &v); + } else { + p = p - 1; + } + + max_working_p = p; + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + + for (d = 0; d <= IO_DQS_EN_DELAY_MAX; d++) { + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + if (!rw_mgr_mem_calibrate_read_test_all_ranks + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + break; + } + } + + //USER Go back to working dtap + if (d != 0) { + max_working_d = d - 1; + } + + } else { + + //USER ******************************************************************** + //USER * step 3-5b: Find the right edge of the window using delay taps * + + max_working_p = min_working_p; + min_working_d = d; + + for (; d <= IO_DQS_EN_DELAY_MAX; d++) { + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + if (!rw_mgr_mem_calibrate_read_test_all_ranks + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + break; + } + } + + //USER Go back to working dtap + if (d != 0) { + max_working_d = d - 1; + } + //USER Only here to counterbalance a subtract later on which is not needed if this branch + //USER of the algorithm is taken + max_working_cnt++; + } + + //USER ******************************************** + //USER * step 6: Find the centre of the window * + + //USER If the number of working phases is even we will step back a phase and find the + //USER edge with a larger delay chain tap + if ((max_working_cnt & 1) == 0) { + p = min_working_p + (max_working_cnt - 1) / 2; + + //USER Special case code for backing up a phase + if (max_working_p == 0) { + max_working_p = IO_DQS_EN_PHASE_MAX; + rw_mgr_decr_vfifo(grp, &v); + } else { + max_working_p = max_working_p - 1; + } + + scc_mgr_set_dqs_en_phase_all_ranks(grp, max_working_p); + + //USER Code to determine at which dtap we should start searching again for a failure + //USER If we've moved back such that the max and min p are the same, we should start searching + //USER from where the window actually exists + if (max_working_p == min_working_p) { + d = min_working_d; + } else { + d = max_working_d; + } + + for (; d <= IO_DQS_EN_DELAY_MAX; d++) { + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + if (!rw_mgr_mem_calibrate_read_test_all_ranks + (grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + break; + } + } + + //USER Go back to working dtap + if (d != 0) { + max_working_d = d - 1; + } + } else { + p = min_working_p + (max_working_cnt) / 2; + } + + while (p > IO_DQS_EN_PHASE_MAX) { + p -= (IO_DQS_EN_PHASE_MAX + 1); + } + + d = (min_working_d + max_working_d) / 2; + + scc_mgr_set_dqs_en_phase_all_ranks(grp, p); + scc_mgr_set_dqs_en_delay_all_ranks(grp, d); + + //USER push vfifo until we can successfully calibrate + + for (i = 0; i < VFIFO_SIZE; i++) { + if (rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + break; + } + //USER fiddle with FIFO + rw_mgr_incr_vfifo(grp, &v); + } + + if (i >= VFIFO_SIZE) { + return 0; + } + + return 1; +} + +#endif + +#else +// Val's original version + +static uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(uint32_t grp) +{ + uint32_t i, j, v, d; + uint32_t min_working_d, max_working_cnt; + uint32_t fail_cnt; + t_btfld bit_chk; + uint32_t delay_per_ptap_mid; + + TRACE_FUNC("%lu", grp); + + reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); + + scc_mgr_set_dqs_en_delay_all_ranks(grp, 0); + scc_mgr_set_dqs_en_phase_all_ranks(grp, 0); + + fail_cnt = 0; + + //USER first push vfifo until we get a failing read + v = 0; + for (i = 0; i < VFIFO_SIZE; i++) { + if (!rw_mgr_mem_calibrate_read_test_all_ranks(grp, 1, PASS_ONE_BIT, &bit_chk, 0)) { + fail_cnt++; + + if (fail_cnt == 2) { + break; + } + } + //USER fiddle with FIFO + rw_mgr_incr_vfifo(grp, &v); + } + + if (v >= VFIFO_SIZE) { + //USER no failing read found!! Something must have gone wrong + + return 0; + } + + max_working_cnt = 0; + min_working_d = 0; + + for (i = 0; i < VFIFO_SIZE + 1; i++) { + for (d = 0; d <= IO_DQS_EN_PHASE_MAX; d++) { + scc_mgr_set_dqs_en_phase_all_ranks(grp, d); + + rw_mgr_mem_calibrate_read_test_all_ranks(grp, NUM_READ_PB_TESTS, + PASS_ONE_BIT, &bit_chk, 0); + if (bit_chk) { + //USER passing read + + if (max_working_cnt == 0) { + min_working_d = d; + } + + max_working_cnt++; + } else { + if (max_working_cnt > 0) { + //USER already have one working value + break; + } + } + } + + if (d > IO_DQS_EN_PHASE_MAX) { + //USER fiddle with FIFO + rw_mgr_incr_vfifo(grp, &v); + } else { + //USER found working solution! + + d = min_working_d + (max_working_cnt - 1) / 2; + + while (d > IO_DQS_EN_PHASE_MAX) { + d -= (IO_DQS_EN_PHASE_MAX + 1); + } + + break; + } + } + + if (i >= VFIFO_SIZE + 1) { + //USER cannot find working solution or cannot see edge of failing read + + return 0; + } + //USER in the case the number of working steps is even, use 50ps taps to further center the window + + if ((max_working_cnt & 1) == 0) { + delay_per_ptap_mid = IO_DELAY_PER_OPA_TAP / 2; + + //USER increment in 50ps taps until we reach the required amount + + for (i = 0, j = 0; i <= IO_DQS_EN_DELAY_MAX && j < delay_per_ptap_mid; + i++, j += IO_DELAY_PER_DQS_EN_DCHAIN_TAP) ; + + scc_mgr_set_dqs_en_delay_all_ranks(grp, i - 1); + } + + scc_mgr_set_dqs_en_phase_all_ranks(grp, d); + + //USER push vfifo until we can successfully calibrate + + for (i = 0; i < VFIFO_SIZE; i++) { + if (rw_mgr_mem_calibrate_read_test_all_ranks + (grp, NUM_READ_PB_TESTS, PASS_ONE_BIT, &bit_chk, 0)) { + break; + } + //USER fiddle with FIFO + rw_mgr_incr_vfifo(grp, &v); + } + + if (i >= VFIFO_SIZE) { + return 0; + } + + return 1; +} + +#endif + +// Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different dq_in_delay values +static inline uint32_t rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay(uint32_t + write_group, + uint32_t + read_group, + uint32_t + test_bgn) +{ + uint32_t found; + uint32_t i; + uint32_t p; + uint32_t d; + uint32_t r; + + const uint32_t delay_step = IO_IO_IN_DELAY_MAX / (RW_MGR_MEM_DQ_PER_READ_DQS - 1); /* we start at zero, so have one less dq to devide among */ + + TRACE_FUNC("(%lu,%lu,%lu)", write_group, read_group, test_bgn); + + // try different dq_in_delays since the dq path is shorter than dqs + + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + select_shadow_regs_for_update(r, write_group, 1); + for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; + i++, p++, d += delay_step) { + DPRINT(1, + "rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay: g=%lu/%lu r=%lu, i=%lu p=%lu d=%lu", + write_group, read_group, r, i, p, d); + scc_mgr_set_dq_in_delay(write_group, p, d); + scc_mgr_load_dq(p); + } + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + } + + found = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(read_group); + + DPRINT(1, + "rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay: g=%lu/%lu found=%lu; Reseting delay chain to zero", + write_group, read_group, found); + + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + select_shadow_regs_for_update(r, write_group, 1); + for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { + scc_mgr_set_dq_in_delay(write_group, p, 0); + scc_mgr_load_dq(p); + } + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + } + + return found; +} + +//USER per-bit deskew DQ and center + +#if NEWVERSION_RDDESKEW + +static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, uint32_t write_group, + uint32_t read_group, uint32_t test_bgn, + uint32_t use_read_test, uint32_t update_fom) +{ + uint32_t i, p, d, min_index; + //USER Store these as signed since there are comparisons with signed numbers + t_btfld bit_chk; + t_btfld sticky_bit_chk; + int32_t left_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; + int32_t right_edge[RW_MGR_MEM_DQ_PER_READ_DQS]; + int32_t final_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; + int32_t mid; + int32_t orig_mid_min, mid_min; + int32_t new_dqs, start_dqs, start_dqs_en, shift_dq, final_dqs, final_dqs_en; + int32_t dq_margin, dqs_margin; + uint32_t stop; + + TRACE_FUNC("%lu %lu", read_group, test_bgn); + + ALTERA_ASSERT(read_group < RW_MGR_MEM_IF_READ_DQS_WIDTH); + ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); + + start_dqs = READ_SCC_DQS_IN_DELAY(read_group); + if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { + start_dqs_en = READ_SCC_DQS_EN_DELAY(read_group); + } + + select_curr_shadow_reg_using_rank(rank_bgn); + + //USER per-bit deskew + + //USER set the left and right edge of each bit to an illegal value + //USER use (IO_IO_IN_DELAY_MAX + 1) as an illegal value + sticky_bit_chk = 0; + for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { + left_edge[i] = IO_IO_IN_DELAY_MAX + 1; + right_edge[i] = IO_IO_IN_DELAY_MAX + 1; + } + + //USER Search for the left edge of the window for each bit + for (d = 0; d <= IO_IO_IN_DELAY_MAX; d++) { + scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); - /* Stop searching when the read test doesn't pass AND when - we've seen a passing read on every bit */ + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + //USER Stop searching when the read test doesn't pass AND when we've seen a passing read on every bit if (use_read_test) { - stop = !rw_mgr_mem_calibrate_read_test (rank_bgn, - read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, - &bit_chk, 0, 0); + stop = + !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group, NUM_READ_PB_TESTS, + PASS_ONE_BIT, &bit_chk, 0, 0); } else { - rw_mgr_mem_calibrate_write_test (rank_bgn, write_group, - 0, PASS_ONE_BIT, &bit_chk, 0); - bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * - (read_group - (write_group * RW_MGR_NUM_DQS_PER_WRITE_GROUP))); + rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0, PASS_ONE_BIT, + &bit_chk, 0); + bit_chk = + bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * + (read_group - + (write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); stop = (bit_chk == 0); } sticky_bit_chk = sticky_bit_chk | bit_chk; stop = stop && (sticky_bit_chk == param->read_correct_mask); - pr_debug("vfifo_center(left): dtap=%u => " BTFLD_FMT " == " - BTFLD_FMT " && %u\n", d, sticky_bit_chk, - param->read_correct_mask, stop); + DPRINT(2, "vfifo_center(left): dtap=%lu => " BTFLD_FMT " == " BTFLD_FMT " && %lu", + d, sticky_bit_chk, param->read_correct_mask, stop); - if (stop == 1) + if (stop == 1) { break; - for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { - if (bit_chk & 1) { - /* Remember a passing test as the - left_edge */ - left_edge[i] = d; - } else { - /* If a left edge has not been seen yet, - then a future passing test will mark - this edge as the right edge */ - if (left_edge[i] == - IO_IO_IN_DELAY_MAX + 1) { - right_edge[i] = -(d + 1); + } else { + for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { + if (bit_chk & 1) { + //USER Remember a passing test as the left_edge + left_edge[i] = d; + } else { + //USER If a left edge has not been seen yet, then a future passing test will mark this edge as the right edge + if (left_edge[i] == IO_IO_IN_DELAY_MAX + 1) { + right_edge[i] = -(d + 1); + } } + DPRINT(2, + "vfifo_center[l,d=%lu]: bit_chk_test=%d left_edge[%lu]: %ld right_edge[%lu]: %ld", + d, (int)(bit_chk & 1), i, left_edge[i], i, right_edge[i]); + bit_chk = bit_chk >> 1; } - pr_debug("vfifo_center[l,d=%u]: " - "bit_chk_test=%d left_edge[%u]: " - "%d right_edge[%u]: %d\n", - d, (int)(bit_chk & 1), i, left_edge[i], - i, right_edge[i]); - bit_chk = bit_chk >> 1; } } - /* Reset DQ delay chains to 0 */ - scc_mgr_apply_group_dq_in_delay (write_group, test_bgn, 0); + //USER Reset DQ delay chains to 0 + scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, 0); sticky_bit_chk = 0; for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) { - pr_debug("vfifo_center: left_edge[%u]: %d right_edge[%u]: " - "%d\n", i, left_edge[i], i, right_edge[i]); + DPRINT(2, "vfifo_center: left_edge[%lu]: %ld right_edge[%lu]: %ld", i, left_edge[i], + i, right_edge[i]); - /* Check for cases where we haven't found the left edge, - which makes our assignment of the the right edge invalid. - Reset it to the illegal value. */ - if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) && ( - right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { + //USER Check for cases where we haven't found the left edge, which makes our assignment of the the + //USER right edge invalid. Reset it to the illegal value. + if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) + && (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { right_edge[i] = IO_IO_IN_DELAY_MAX + 1; - pr_debug("vfifo_center: reset right_edge[%u]: %d\n", - i, right_edge[i]); + DPRINT(2, "vfifo_center: reset right_edge[%lu]: %ld", i, right_edge[i]); } - - /* Reset sticky bit (except for bits where we have seen - both the left and right edge) */ + //USER Reset sticky bit (except for bits where we have seen both the left and right edge) sticky_bit_chk = sticky_bit_chk << 1; - if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) && - (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { + if ((left_edge[i] != IO_IO_IN_DELAY_MAX + 1) + && (right_edge[i] != IO_IO_IN_DELAY_MAX + 1)) { sticky_bit_chk = sticky_bit_chk | 1; } - if (i == 0) + if (i == 0) { break; + } } - /* Search for the right edge of the window for each bit */ + //USER Search for the right edge of the window for each bit for (d = 0; d <= IO_DQS_IN_DELAY_MAX - start_dqs; d++) { scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs); if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { @@ -2522,110 +2949,101 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_center (uint32_t rank_bgn, } scc_mgr_set_dqs_en_delay(read_group, delay); } - scc_mgr_load_dqs (read_group); + scc_mgr_load_dqs(read_group); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - /* Stop searching when the read test doesn't pass AND when - we've seen a passing read on every bit */ + //USER Stop searching when the read test doesn't pass AND when we've seen a passing read on every bit if (use_read_test) { - stop = !rw_mgr_mem_calibrate_read_test (rank_bgn, - read_group, NUM_READ_PB_TESTS, PASS_ONE_BIT, - &bit_chk, 0, 0); + stop = + !rw_mgr_mem_calibrate_read_test(rank_bgn, read_group, NUM_READ_PB_TESTS, + PASS_ONE_BIT, &bit_chk, 0, 0); } else { - rw_mgr_mem_calibrate_write_test (rank_bgn, write_group, - 0, PASS_ONE_BIT, &bit_chk, 0); - bit_chk = bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * - (read_group - (write_group * RW_MGR_NUM_DQS_PER_WRITE_GROUP))); + rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0, PASS_ONE_BIT, + &bit_chk, 0); + bit_chk = + bit_chk >> (RW_MGR_MEM_DQ_PER_READ_DQS * + (read_group - + (write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH))); stop = (bit_chk == 0); } sticky_bit_chk = sticky_bit_chk | bit_chk; stop = stop && (sticky_bit_chk == param->read_correct_mask); - pr_debug("vfifo_center(right): dtap=%u => " BTFLD_FMT " == " - BTFLD_FMT " && %u\n", d, sticky_bit_chk, - param->read_correct_mask, stop); + DPRINT(2, "vfifo_center(right): dtap=%lu => " BTFLD_FMT " == " BTFLD_FMT " && %lu", + d, sticky_bit_chk, param->read_correct_mask, stop); if (stop == 1) { break; } else { for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { if (bit_chk & 1) { - /* Remember a passing test as - the right_edge */ + //USER Remember a passing test as the right_edge right_edge[i] = d; } else { if (d != 0) { - /* If a right edge has not been - seen yet, then a future passing - test will mark this edge as the - left edge */ - if (right_edge[i] == - IO_IO_IN_DELAY_MAX + 1) { + //USER If a right edge has not been seen yet, then a future passing test will mark this edge as the left edge + if (right_edge[i] == IO_IO_IN_DELAY_MAX + 1) { left_edge[i] = -(d + 1); } } else { - /* d = 0 failed, but it passed - when testing the left edge, - so it must be marginal, - set it to -1 */ - if (right_edge[i] == - IO_IO_IN_DELAY_MAX + 1 - && left_edge[i] != - IO_IO_IN_DELAY_MAX - + 1) { + //USER d = 0 failed, but it passed when testing the left edge, so it must be marginal, set it to -1 + if (right_edge[i] == IO_IO_IN_DELAY_MAX + 1 + && left_edge[i] != IO_IO_IN_DELAY_MAX + 1) { right_edge[i] = -1; } - /* If a right edge has not been - seen yet, then a future passing - test will mark this edge as the - left edge */ - else if (right_edge[i] == - IO_IO_IN_DELAY_MAX + - 1) { + //USER If a right edge has not been seen yet, then a future passing test will mark this edge as the left edge + else if (right_edge[i] == IO_IO_IN_DELAY_MAX + 1) { left_edge[i] = -(d + 1); } } } - pr_debug("vfifo_center[r,d=%u]: " - "bit_chk_test=%d left_edge[%u]: %d " - "right_edge[%u]: %d\n", - d, (int)(bit_chk & 1), i, left_edge[i], - i, right_edge[i]); + DPRINT(2, + "vfifo_center[r,d=%lu]: bit_chk_test=%d left_edge[%lu]: %ld right_edge[%lu]: %ld", + d, (int)(bit_chk & 1), i, left_edge[i], i, right_edge[i]); bit_chk = bit_chk >> 1; } } } - /* Store all observed margins */ + // Store all observed margins - /* Check that all bits have a window */ + //USER Check that all bits have a window for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { - pr_debug("vfifo_center: left_edge[%u]: %d right_edge[%u]:" - " %d\n", i, left_edge[i], i, right_edge[i]); - if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i] - == IO_IO_IN_DELAY_MAX + 1)) { - - /* Restore delay chain settings before letting the loop - in rw_mgr_mem_calibrate_vfifo to retry different - dqs/ck relationships */ + DPRINT(2, "vfifo_center: left_edge[%lu]: %ld right_edge[%lu]: %ld", i, left_edge[i], + i, right_edge[i]); + BFM_GBL_SET(dq_read_left_edge[read_group][i], left_edge[i]); + BFM_GBL_SET(dq_read_right_edge[read_group][i], right_edge[i]); + if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) + || (right_edge[i] == IO_IO_IN_DELAY_MAX + 1)) { + + //USER Restore delay chain settings before letting the loop in + //USER rw_mgr_mem_calibrate_vfifo to retry different dqs/ck relationships scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs); if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { - scc_mgr_set_dqs_en_delay(read_group, - start_dqs_en); + scc_mgr_set_dqs_en_delay(read_group, start_dqs_en); } - scc_mgr_load_dqs (read_group); + scc_mgr_load_dqs(read_group); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - pr_debug("vfifo_center: failed to find edge [%u]: " - "%d %d\n", i, left_edge[i], right_edge[i]); + DPRINT(1, "vfifo_center: failed to find edge [%lu]: %ld %ld", i, + left_edge[i], right_edge[i]); + if (use_read_test) { + set_failing_group_stage(read_group * RW_MGR_MEM_DQ_PER_READ_DQS + i, + CAL_STAGE_VFIFO, CAL_SUBSTAGE_VFIFO_CENTER); + } else { + set_failing_group_stage(read_group * RW_MGR_MEM_DQ_PER_READ_DQS + i, + CAL_STAGE_VFIFO_AFTER_WRITES, + CAL_SUBSTAGE_VFIFO_CENTER); + } return 0; } } - /* Find middle of window for each DQ bit */ + //USER Find middle of window for each DQ bit mid_min = left_edge[0] - right_edge[0]; min_index = 0; for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { @@ -2636,58 +3054,64 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_center (uint32_t rank_bgn, } } - /* -mid_min/2 represents the amount that we need to move DQS. - If mid_min is odd and positive we'll need to add one to - make sure the rounding in further calculations is correct - (always bias to the right), so just add 1 for all positive values */ + //USER -mid_min/2 represents the amount that we need to move DQS. If mid_min is odd and positive we'll need to add one to + //USER make sure the rounding in further calculations is correct (always bias to the right), so just add 1 for all positive values if (mid_min > 0) { mid_min++; } mid_min = mid_min / 2; - pr_debug("vfifo_center: mid_min=%d (index=%u)\n", mid_min, min_index); + DPRINT(1, "vfifo_center: mid_min=%ld (index=%lu)", mid_min, min_index); - /* Determine the amount we can change DQS (which is -mid_min) */ + //USER Determine the amount we can change DQS (which is -mid_min) orig_mid_min = mid_min; - new_dqs = start_dqs; - mid_min = 0; + new_dqs = start_dqs - mid_min; + if (new_dqs > IO_DQS_IN_DELAY_MAX) { + new_dqs = IO_DQS_IN_DELAY_MAX; + } else if (new_dqs < 0) { + new_dqs = 0; + } + mid_min = start_dqs - new_dqs; + DPRINT(1, "vfifo_center: new mid_min=%ld new_dqs=%ld", mid_min, new_dqs); + + if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { + if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX) { + mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX; + } else if (start_dqs_en - mid_min < 0) { + mid_min += start_dqs_en - mid_min; + } + } + new_dqs = start_dqs - mid_min; - pr_debug("vfifo_center: start_dqs=%d start_dqs_en=%d " - "new_dqs=%d mid_min=%d\n", - start_dqs, IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, - new_dqs, mid_min); + DPRINT(1, "vfifo_center: start_dqs=%ld start_dqs_en=%ld new_dqs=%ld mid_min=%ld", + start_dqs, IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1, new_dqs, mid_min); - /* Initialize data for export structures */ + //USER Initialize data for export structures dqs_margin = IO_IO_IN_DELAY_MAX + 1; - dq_margin = IO_IO_IN_DELAY_MAX + 1; + dq_margin = IO_IO_IN_DELAY_MAX + 1; - /* add delay to bring centre of all DQ windows to the same "level" */ + //USER add delay to bring centre of all DQ windows to the same "level" for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { - /* Use values before divide by 2 to reduce round off error */ - shift_dq = (left_edge[i] - right_edge[i] - - (left_edge[min_index] - right_edge[min_index]))/2 + - (orig_mid_min - mid_min); - - pr_debug("vfifo_center: before: shift_dq[%u]=%d\n", i, - shift_dq); - - if (shift_dq + (int32_t)READ_SCC_DQ_IN_DELAY(p) > - (int32_t)IO_IO_IN_DELAY_MAX) { - shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - - READ_SCC_DQ_IN_DELAY(i); - } else if (shift_dq + (int32_t)READ_SCC_DQ_IN_DELAY(p) < 0) { - shift_dq = -(int32_t)READ_SCC_DQ_IN_DELAY(p); - } - pr_debug("vfifo_center: after: shift_dq[%u]=%d\n", i, - shift_dq); + //USER Use values before divide by 2 to reduce round off error + shift_dq = + (left_edge[i] - right_edge[i] - + (left_edge[min_index] - right_edge[min_index])) / 2 + (orig_mid_min - mid_min); + + DPRINT(2, "vfifo_center: before: shift_dq[%lu]=%ld", i, shift_dq); + + if (shift_dq + (int32_t) READ_SCC_DQ_IN_DELAY(p) > (int32_t) IO_IO_IN_DELAY_MAX) { + shift_dq = (int32_t) IO_IO_IN_DELAY_MAX - READ_SCC_DQ_IN_DELAY(i); + } else if (shift_dq + (int32_t) READ_SCC_DQ_IN_DELAY(p) < 0) { + shift_dq = -(int32_t) READ_SCC_DQ_IN_DELAY(p); + } + DPRINT(2, "vfifo_center: after: shift_dq[%lu]=%ld", i, shift_dq); final_dq[i] = READ_SCC_DQ_IN_DELAY(p) + shift_dq; scc_mgr_set_dq_in_delay(write_group, p, final_dq[i]); - scc_mgr_load_dq (p); + scc_mgr_load_dq(p); - pr_debug("vfifo_center: margin[%u]=[%d,%d]\n", i, - left_edge[i] - shift_dq + (-mid_min), - right_edge[i] + shift_dq - (-mid_min)); - /* To determine values for export structures */ + DPRINT(2, "vfifo_center: margin[%lu]=[%ld,%ld]", i, + left_edge[i] - shift_dq + (-mid_min), right_edge[i] + shift_dq - (-mid_min)); + //USER To determine values for export structures if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) { dq_margin = left_edge[i] - shift_dq + (-mid_min); } @@ -2696,352 +3120,593 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_center (uint32_t rank_bgn, } } -#if ENABLE_DQS_IN_CENTERING final_dqs = new_dqs; if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { final_dqs_en = start_dqs_en - mid_min; } -#else - final_dqs = start_dqs; - if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { - final_dqs_en = start_dqs_en; - } -#endif - - /* Move DQS-en */ + //USER Move DQS-en if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) { scc_mgr_set_dqs_en_delay(read_group, final_dqs_en); - scc_mgr_load_dqs (read_group); + scc_mgr_load_dqs(read_group); } - - /* Move DQS */ + //USER Move DQS scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs); - scc_mgr_load_dqs (read_group); + scc_mgr_load_dqs(read_group); if (update_fom) { - /* Export values */ - gbl->fom_in += (dq_margin + dqs_margin) / RW_MGR_NUM_DQS_PER_WRITE_GROUP; + //USER Export values + gbl->fom_in += + (dq_margin + + dqs_margin) / (RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH); + TCLRPT_SET(debug_summary_report->fom_in, + debug_summary_report->fom_in + (dq_margin + + dqs_margin) / + (RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH)); + TCLRPT_SET(debug_cal_report->cal_status_per_group[curr_shadow_reg][write_group]. + fom_in, + debug_cal_report->cal_status_per_group[curr_shadow_reg][write_group]. + fom_in + (dq_margin + + dqs_margin) / (RW_MGR_MEM_IF_READ_DQS_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH)); } - pr_debug("vfifo_center: dq_margin=%d dqs_margin=%d\n", - dq_margin, dqs_margin); + TCLRPT_SET(debug_cal_report->cal_dqs_in_margins[curr_shadow_reg][read_group].dqs_margin, + dqs_margin); + TCLRPT_SET(debug_cal_report->cal_dqs_in_margins[curr_shadow_reg][read_group].dq_margin, + dq_margin); - /* Do not remove this line as it makes sure all of our decisions - have been applied */ + DPRINT(2, "vfifo_center: dq_margin=%ld dqs_margin=%ld", dq_margin, dqs_margin); + + //USER Do not remove this line as it makes sure all of our decisions have been applied IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); return (dq_margin >= 0) && (dqs_margin >= 0); } -/* - * calibrate the read valid prediction FIFO. - * - * - read valid prediction will consist of finding a good DQS enable phase, - * DQS enable delay, DQS input phase, and DQS input delay. - * - we also do a per-bit deskew on the DQ lines. - */ +#else + +static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn, uint32_t grp, + uint32_t test_bgn, uint32_t use_read_test) +{ + uint32_t i, p, d; + uint32_t mid; + t_btfld bit_chk; + uint32_t max_working_dq[RW_MGR_MEM_DQ_PER_READ_DQS]; + uint32_t dq_margin, dqs_margin; + uint32_t start_dqs; + + TRACE_FUNC("%lu %lu", grp, test_bgn); + + //USER per-bit deskew. + //USER start of the per-bit sweep with the minimum working delay setting for + //USER all bits. + + for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { + max_working_dq[i] = 0; + } + + for (d = 1; d <= IO_IO_IN_DELAY_MAX; d++) { + scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, d); + + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + if (!rw_mgr_mem_calibrate_read_test + (rank_bgn, grp, NUM_READ_PB_TESTS, PASS_ONE_BIT, &bit_chk, 0, 0)) { + break; + } else { + for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { + if (bit_chk & 1) { + max_working_dq[i] = d; + } + bit_chk = bit_chk >> 1; + } + } + } + + //USER determine minimum working value for DQ + + dq_margin = IO_IO_IN_DELAY_MAX; + + for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) { + if (max_working_dq[i] < dq_margin) { + dq_margin = max_working_dq[i]; + } + } + + //USER add delay to bring all DQ windows to the same "level" + + for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { + if (max_working_dq[i] > dq_margin) { + scc_mgr_set_dq_in_delay(write_group, i, max_working_dq[i] - dq_margin); + } else { + scc_mgr_set_dq_in_delay(write_group, i, 0); + } + + scc_mgr_load_dq(p, p); + } + + //USER sweep DQS window, may potentially have more window due to per-bit-deskew that was done + //USER in the previous step. + + start_dqs = READ_SCC_DQS_IN_DELAY(grp); + + for (d = start_dqs + 1; d <= IO_DQS_IN_DELAY_MAX; d++) { + scc_mgr_set_dqs_bus_in_delay(grp, d); + scc_mgr_load_dqs(grp); + + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + if (!rw_mgr_mem_calibrate_read_test + (rank_bgn, grp, NUM_READ_TESTS, PASS_ALL_BITS, &bit_chk, 0, 0)) { + break; + } + } + + scc_mgr_set_dqs_bus_in_delay(grp, start_dqs); + //USER margin on the DQS pin + dqs_margin = d - start_dqs - 1; -/* VFIFO Calibration -- Full Calibration */ -static uint32_t rw_mgr_mem_calibrate_vfifo (uint32_t read_group, uint32_t test_bgn) + //USER find mid point, +1 so that we don't go crazy pushing DQ + + mid = (dq_margin + dqs_margin + 1) / 2; + + gbl->fom_in += dq_margin + dqs_margin; +// TCLRPT_SET(debug_summary_report->fom_in, debug_summary_report->fom_in + (dq_margin + dqs_margin)); +// TCLRPT_SET(debug_cal_report->cal_status_per_group[grp].fom_in, (dq_margin + dqs_margin)); + + //USER center DQS ... if the headroom is setup properly we shouldn't need to + + if (dqs_margin > mid) { + scc_mgr_set_dqs_bus_in_delay(grp, READ_SCC_DQS_IN_DELAY(grp) + dqs_margin - mid); + + if (DDRX) { + uint32_t delay = READ_SCC_DQS_EN_DELAY(grp) + dqs_margin - mid; + + if (delay > IO_DQS_EN_DELAY_MAX) { + delay = IO_DQS_EN_DELAY_MAX; + } + + scc_mgr_set_dqs_en_delay(grp, delay); + } + } + + scc_mgr_load_dqs(grp); + + //USER center DQ + + if (dq_margin > mid) { + for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) { + scc_mgr_set_dq_in_delay(write_group, i, + READ_SCC_DQ_IN_DELAY(i) + dq_margin - mid); + scc_mgr_load_dq(p, p); + } + + dqs_margin += dq_margin - mid; + dq_margin -= dq_margin - mid; + } + + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + return (dq_margin + dqs_margin) > 0; +} + +#endif + +//USER calibrate the read valid prediction FIFO. +//USER +//USER - read valid prediction will consist of finding a good DQS enable phase, DQS enable delay, DQS input phase, and DQS input delay. +//USER - we also do a per-bit deskew on the DQ lines. + +#if NEWVERSION_GW + +//USER VFIFO Calibration -- Full Calibration +static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group, uint32_t test_bgn) { - uint32_t p, d, rank_bgn; + uint32_t p, d, rank_bgn, sr; uint32_t dtaps_per_ptap; uint32_t tmp_delay; t_btfld bit_chk; uint32_t grp_calibrated; uint32_t write_group, write_test_bgn; + uint32_t failed_substage; + + TRACE_FUNC("%lu %lu", read_group, test_bgn); - /* update info for sims */ + //USER update info for sims reg_file_set_stage(CAL_STAGE_VFIFO); - write_group = read_group; - write_test_bgn = test_bgn; + if (DDRX) { + write_group = read_group; + write_test_bgn = test_bgn; + } else { + write_group = + read_group / (RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH); + write_test_bgn = read_group * RW_MGR_MEM_DQ_PER_READ_DQS; + } - /* USER Determine number of delay taps for each phase tap */ + // USER Determine number of delay taps for each phase tap dtaps_per_ptap = 0; tmp_delay = 0; - - while (tmp_delay < IO_DELAY_PER_OPA_TAP) { - dtaps_per_ptap++; - tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP; + if (!QDRII) { + while (tmp_delay < IO_DELAY_PER_OPA_TAP) { + dtaps_per_ptap++; + tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP; + } + dtaps_per_ptap--; + tmp_delay = 0; } - dtaps_per_ptap--; - tmp_delay = 0; - - /* update info for sims */ + //USER update info for sims reg_file_set_group(read_group); grp_calibrated = 0; reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); + failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; for (d = 0; d <= dtaps_per_ptap && grp_calibrated == 0; d += 2) { - /* In RLDRAMX we may be messing the delay of pins in - the same write group but outside of the current read - group, but that's ok because we haven't calibrated the - output side yet. */ - if (d > 0) { - scc_mgr_apply_group_all_out_delay_add_all_ranks - (write_group, write_test_bgn, d); + if (DDRX || RLDRAMX) { + // In RLDRAMX we may be messing the delay of pins in the same write group but outside of + // the current read group, but that's ok because we haven't calibrated the output side yet. + if (d > 0) { + scc_mgr_apply_group_all_out_delay_add_all_ranks(write_group, + write_test_bgn, d); + } } for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0; p++) { - /* set a particular dqdqs phase */ - scc_mgr_set_dqdqs_output_phase_all_ranks( - read_group, p); - - /* Previous iteration may have failed as a result of - ck/dqs or ck/dk violation, in which case the device may - require special recovery. */ - if (d != 0 || p != 0) - recover_mem_device_after_ck_dqs_violation(); + //USER set a particular dqdqs phase + if (DDRX) { + scc_mgr_set_dqdqs_output_phase_all_ranks(read_group, p); + } + //USER Previous iteration may have failed as a result of ck/dqs or ck/dk violation, + //USER in which case the device may require special recovery. + if (DDRX || RLDRAMX) { + if (d != 0 || p != 0) { + recover_mem_device_after_ck_dqs_violation(); + } + } - pr_debug("calibrate_vfifo: g=%u p=%u d=%u\n", - read_group, p, d); + DPRINT(1, "calibrate_vfifo: g=%lu p=%lu d=%lu", read_group, p, d); + BFM_GBL_SET(gwrite_pos[read_group].p, p); + BFM_GBL_SET(gwrite_pos[read_group].d, d); - /* Load up the patterns used by read calibration - using current DQDQS phase */ + //USER Load up the patterns used by read calibration using current DQDQS phase - rw_mgr_mem_calibrate_read_load_patterns_all_ranks (); + rw_mgr_mem_calibrate_read_load_patterns_all_ranks(); - if (!(gbl->phy_debug_mode_flags & - PHY_DEBUG_DISABLE_GUARANTEED_READ)) { - if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks - (read_group, 1, &bit_chk)) { - pr_debug("Guaranteed read test failed:" - " g=%u p=%u d=%u\n", - read_group, p, d); + if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)) { + if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks + (read_group, 1, &bit_chk)) { + DPRINT(1, "Guaranteed read test failed: g=%lu p=%lu d=%lu", + read_group, p, d); break; } } -/* case:56390 */ +// case:56390 grp_calibrated = 1; - - if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay(write_group, read_group, test_bgn)) { - /* USER Read per-bit deskew can be done on a - per shadow register basis */ - for (rank_bgn = 0; - rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; - rank_bgn += NUM_RANKS_PER_SHADOW_REG) { - /* Determine if this set of ranks - should be skipped entirely */ - /* If doing read after write - calibration, do not update FOM - now - do it then */ - if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn, write_group, read_group, test_bgn, 1, 0)) { - grp_calibrated = 0; + if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay + (write_group, read_group, test_bgn)) { + // USER Read per-bit deskew can be done on a per shadow register basis + for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; + rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { + //USER Determine if this set of ranks should be skipped entirely + if (!param->skip_shadow_regs[sr]) { + + //USER Select shadow register set + select_shadow_regs_for_update(rank_bgn, read_group, + 1); + + // If doing read after write calibration, do not update FOM now - do it then + if (!rw_mgr_mem_calibrate_vfifo_center + (rank_bgn, write_group, read_group, test_bgn, 1, + 0)) { + grp_calibrated = 0; + failed_substage = CAL_SUBSTAGE_VFIFO_CENTER; + } } } } else { grp_calibrated = 0; + failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; + } + } + } + + if (grp_calibrated == 0) { + set_failing_group_stage(write_group, CAL_STAGE_VFIFO, failed_substage); + + return 0; + } + //USER Reset the delay chains back to zero if they have moved > 1 (check for > 1 because loop will increase d even when pass in first case) + if (DDRX || RLDRAMII) { + if (d > 2) { + scc_mgr_zero_group(write_group, write_test_bgn, 1); + } + } + + return 1; +} + +#else + +//USER VFIFO Calibration -- Full Calibration +static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t g, uint32_t test_bgn) +{ + uint32_t p, rank_bgn, sr; + uint32_t grp_calibrated; + uint32_t failed_substage; + + TRACE_FUNC("%lu %lu", g, test_bgn); + + //USER update info for sims + + reg_file_set_stage(CAL_STAGE_VFIFO); + + reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ); + + failed_substage = CAL_SUBSTAGE_GUARANTEED_READ; + + //USER update info for sims + + reg_file_set_group(g); + + grp_calibrated = 0; + + for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0; p++) { + //USER set a particular dqdqs phase + if (DDRX) { + scc_mgr_set_dqdqs_output_phase_all_ranks(g, p); + } + //USER Load up the patterns used by read calibration using current DQDQS phase + + rw_mgr_mem_calibrate_read_load_patterns_all_ranks(); + if (!(gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)) { + if (!rw_mgr_mem_calibrate_read_test_patterns_all_ranks + (read_group, 1, &bit_chk)) { + break; + } + } + + grp_calibrated = 1; + if (rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay(g, g, test_bgn)) { + // USER Read per-bit deskew can be done on a per shadow register basis + for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; + rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { + + //USER Determine if this set of ranks should be skipped entirely + if (!param->skip_shadow_regs[sr]) { + + //USER Select shadow register set + select_shadow_regs_for_update(rank_bgn, read_group, 1); + + if (!rw_mgr_mem_calibrate_vfifo_center + (rank_bgn, g, test_bgn, 1)) { + grp_calibrated = 0; + failed_substage = CAL_SUBSTAGE_VFIFO_CENTER; + } + } } + } else { + grp_calibrated = 0; + failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE; } } - /* Reset the delay chains back to zero if they have moved > 1 - (check for > 1 because loop will increase d even when pass in - first case) */ - if (d > 2) - scc_mgr_zero_group(write_group, write_test_bgn, 1); + if (grp_calibrated == 0) { + set_failing_group_stage(g, CAL_STAGE_VFIFO, failed_substage); + return 0; + } return 1; } -/* VFIFO Calibration -- Read Deskew Calibration after write deskew */ -static uint32_t rw_mgr_mem_calibrate_vfifo_end (uint32_t read_group, uint32_t test_bgn) +#endif + +//USER VFIFO Calibration -- Read Deskew Calibration after write deskew +static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group, uint32_t test_bgn) { - uint32_t rank_bgn; + uint32_t rank_bgn, sr; uint32_t grp_calibrated; uint32_t write_group; - /* update info for sims */ + TRACE_FUNC("%lu %lu", read_group, test_bgn); + + //USER update info for sims reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES); reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER); - write_group = read_group; + if (DDRX) { + write_group = read_group; + } else { + write_group = + read_group / (RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH); + } - /* update info for sims */ + //USER update info for sims reg_file_set_group(read_group); grp_calibrated = 1; + // USER Read per-bit deskew can be done on a per shadow register basis + for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; + rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { - /* Read per-bit deskew can be done on a per shadow register basis */ - for (rank_bgn = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; - rank_bgn += NUM_RANKS_PER_SHADOW_REG) { + //USER Determine if this set of ranks should be skipped entirely + if (!param->skip_shadow_regs[sr]) { - /* This is the last calibration round, update FOM here */ - if (!rw_mgr_mem_calibrate_vfifo_center (rank_bgn, - write_group, read_group, test_bgn, 0, 1)) + //USER Select shadow register set + select_shadow_regs_for_update(rank_bgn, read_group, 1); + + // This is the last calibration round, update FOM here + if (!rw_mgr_mem_calibrate_vfifo_center + (rank_bgn, write_group, read_group, test_bgn, 0, 1)) { grp_calibrated = 0; + } + } } - if (grp_calibrated == 0) + if (grp_calibrated == 0) { + set_failing_group_stage(write_group, CAL_STAGE_VFIFO_AFTER_WRITES, + CAL_SUBSTAGE_VFIFO_CENTER); return 0; + } return 1; } +//USER Calibrate LFIFO to find smallest read latency -/* Calibrate LFIFO to find smallest read latency */ - -static uint32_t rw_mgr_mem_calibrate_lfifo (void) +static uint32_t rw_mgr_mem_calibrate_lfifo(void) { uint32_t found_one; t_btfld bit_chk; + uint32_t g; + + TRACE_FUNC(); + BFM_STAGE("lfifo"); - /* update info for sims */ + //USER update info for sims reg_file_set_stage(CAL_STAGE_LFIFO); reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY); - /* Load up the patterns used by read calibration for all ranks */ + //USER Load up the patterns used by read calibration for all ranks - rw_mgr_mem_calibrate_read_load_patterns_all_ranks (); + rw_mgr_mem_calibrate_read_load_patterns_all_ranks(); found_one = 0; do { IOWR_32DIRECT(PHY_MGR_PHY_RLAT, 0, gbl->curr_read_lat); - pr_debug("lfifo: read_lat=%u\n", gbl->curr_read_lat); + DPRINT(2, "lfifo: read_lat=%lu", gbl->curr_read_lat); - if (!rw_mgr_mem_calibrate_read_test_all_ranks (0, - NUM_READ_TESTS, PASS_ALL_BITS, &bit_chk, 1)) { + if (!rw_mgr_mem_calibrate_read_test_all_ranks + (0, NUM_READ_TESTS, PASS_ALL_BITS, &bit_chk, 1)) { break; } found_one = 1; - /* reduce read latency and see if things are working */ - /* correctly */ + //USER reduce read latency and see if things are working + //USER correctly gbl->curr_read_lat--; } while (gbl->curr_read_lat > 0); - /* reset the fifos to get pointers to known state */ + //USER reset the fifos to get pointers to known state IOWR_32DIRECT(PHY_MGR_CMD_FIFO_RESET, 0, 0); if (found_one) { - /* add a fudge factor to the read latency that was determined */ + //USER add a fudge factor to the read latency that was determined gbl->curr_read_lat += 2; IOWR_32DIRECT(PHY_MGR_PHY_RLAT, 0, gbl->curr_read_lat); - pr_debug("lfifo: success: using read_lat=%u\n", - gbl->curr_read_lat); + + DPRINT(2, "lfifo: success: using read_lat=%lu", gbl->curr_read_lat); return 1; } else { - pr_debug("lfifo: failed at initial read_lat=%u\n", - gbl->curr_read_lat); + set_failing_group_stage(0xff, CAL_STAGE_LFIFO, CAL_SUBSTAGE_READ_LATENCY); + + for (g = 0; g < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; g++) { + TCLRPT_SET(debug_cal_report->cal_status_per_group[curr_shadow_reg][g]. + error_stage, CAL_STAGE_LFIFO); + TCLRPT_SET(debug_cal_report->cal_status_per_group[curr_shadow_reg][g]. + error_sub_stage, CAL_SUBSTAGE_READ_LATENCY); + } + + DPRINT(2, "lfifo: failed at initial read_lat=%lu", gbl->curr_read_lat); return 0; } } -/* - * issue write test command. - * two variants are provided. one that just tests a write pattern and - * another that tests datamask functionality. - */ +//USER issue write test command. +//USER two variants are provided. one that just tests a write pattern and another that +//USER tests datamask functionality. -static void rw_mgr_mem_calibrate_write_test_issue (uint32_t group, uint32_t test_dm) +static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group, uint32_t test_dm) { uint32_t mcc_instruction; uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) - && ENABLE_SUPER_QUICK_CALIBRATION) || BFM_MODE; + && ENABLE_SUPER_QUICK_CALIBRATION) || BFM_MODE; uint32_t rw_wl_nop_cycles; - /* - * Set counter and jump addresses for the right - * number of NOP cycles. - * The number of supported NOP cycles can range from -1 to infinity - * Three different cases are handled: - * - * 1. For a number of NOP cycles greater than 0, the RW Mgr looping - * mechanism will be used to insert the right number of NOPs - * - * 2. For a number of NOP cycles equals to 0, the micro-instruction - * issuing the write command will jump straight to the - * micro-instruction that turns on DQS (for DDRx), or outputs write - * data (for RLD), skipping - * the NOP micro-instruction all together - * - * 3. A number of NOP cycles equal to -1 indicates that DQS must be - * turned on in the same micro-instruction that issues the write - * command. Then we need - * to directly jump to the micro-instruction that sends out the data - * - * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters - * (2 and 3). One jump-counter (0) is used to perform multiple - * write-read operations. - * one counter left to issue this command in "multiple-group" mode - */ - -#if MULTIPLE_AFI_WLAT - rw_wl_nop_cycles = gbl->rw_wl_nop_cycles_per_group[group]; -#else + //USER Set counter and jump addresses for the right + //USER number of NOP cycles. + //USER The number of supported NOP cycles can range from -1 to infinity + //USER Three different cases are handled: + //USER + //USER 1. For a number of NOP cycles greater than 0, the RW Mgr looping + //USER mechanism will be used to insert the right number of NOPs + //USER + //USER 2. For a number of NOP cycles equals to 0, the micro-instruction + //USER issuing the write command will jump straight to the micro-instruction + //USER that turns on DQS (for DDRx), or outputs write data (for RLD), skipping + //USER the NOP micro-instruction all together + //USER + //USER 3. A number of NOP cycles equal to -1 indicates that DQS must be turned + //USER on in the same micro-instruction that issues the write command. Then we need + //USER to directly jump to the micro-instruction that sends out the data + //USER + //USER NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters (2 and 3). One + //USER jump-counter (0) is used to perform multiple write-read operations. + //USER one counter left to issue this command in "multiple-group" mode. + rw_wl_nop_cycles = gbl->rw_wl_nop_cycles; -#endif if (rw_wl_nop_cycles == -1) { - /* CNTR 2 - We want to execute the special write operation that - turns on DQS right away and then skip directly to the - instruction that sends out the data. We set the counter to a - large number so that the jump is always taken */ + //USER CNTR 2 - We want to execute the special write operation that + //USER turns on DQS right away and then skip directly to the instruction that + //USER sends out the data. We set the counter to a large number so that the + //USER jump is always taken IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, 0xFF); - /* CNTR 3 - Not used */ + //USER CNTR 3 - Not used if (test_dm) { mcc_instruction = __RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1; IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, - __RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, - __RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP); + __RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP); } else { mcc_instruction = __RW_MGR_LFSR_WR_RD_BANK_0_WL_1; - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, - __RW_MGR_LFSR_WR_RD_BANK_0_DATA); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, - __RW_MGR_LFSR_WR_RD_BANK_0_NOP); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_LFSR_WR_RD_BANK_0_DATA); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_LFSR_WR_RD_BANK_0_NOP); } + } else if (rw_wl_nop_cycles == 0) { - /* CNTR 2 - We want to skip the NOP operation and go straight to - the DQS enable instruction. We set the counter to a large number - so that the jump is always taken */ + //USER CNTR 2 - We want to skip the NOP operation and go straight to + //USER the DQS enable instruction. We set the counter to a large number so that the + //USER jump is always taken IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, 0xFF); - /* CNTR 3 - Not used */ + //USER CNTR 3 - Not used if (test_dm) { mcc_instruction = __RW_MGR_LFSR_WR_RD_DM_BANK_0; - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, - __RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS); } else { mcc_instruction = __RW_MGR_LFSR_WR_RD_BANK_0; - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, - __RW_MGR_LFSR_WR_RD_BANK_0_DQS); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, __RW_MGR_LFSR_WR_RD_BANK_0_DQS); } + } else { - /* CNTR 2 - In this case we want to execute the next instruction - and NOT take the jump. So we set the counter to 0. The jump - address doesn't count */ + //USER CNTR 2 - In this case we want to execute the next instruction and NOT + //USER take the jump. So we set the counter to 0. The jump address doesn't count IOWR_32DIRECT(RW_MGR_LOAD_CNTR_2, 0, 0x0); IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_2, 0, 0x0); - /* CNTR 3 - Set the nop counter to the number of cycles we - need to loop for, minus 1 */ + //USER CNTR 3 - Set the nop counter to the number of cycles we need to loop for, minus 1 IOWR_32DIRECT(RW_MGR_LOAD_CNTR_3, 0, rw_wl_nop_cycles - 1); if (test_dm) { mcc_instruction = __RW_MGR_LFSR_WR_RD_DM_BANK_0; - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, - __RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP); } else { mcc_instruction = __RW_MGR_LFSR_WR_RD_BANK_0; - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, - __RW_MGR_LFSR_WR_RD_BANK_0_NOP); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_3, 0, __RW_MGR_LFSR_WR_RD_BANK_0_NOP); } } @@ -3054,172 +3719,180 @@ static void rw_mgr_mem_calibrate_write_test_issue (uint32_t group, uint32_t test } IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, mcc_instruction); - /* CNTR 1 - This is used to ensure enough time elapses - for read data to come back. */ + //USER CNTR 1 - This is used to ensure enough time elapses for read data to come back. IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, 0x30); if (test_dm) { - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, - __RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT); } else { - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, - __RW_MGR_LFSR_WR_RD_BANK_0_WAIT); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_LFSR_WR_RD_BANK_0_WAIT); } IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, (group << 2), mcc_instruction); } -/* Test writes, can check for a single bit pass or multiple bit pass */ +//USER Test writes, can check for a single bit pass or multiple bit pass -static uint32_t rw_mgr_mem_calibrate_write_test (uint32_t rank_bgn, - uint32_t write_group, uint32_t use_dm, uint32_t all_correct, - t_btfld *bit_chk, uint32_t all_ranks) +static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn, uint32_t write_group, + uint32_t use_dm, uint32_t all_correct, + t_btfld * bit_chk, uint32_t all_ranks) { uint32_t r; t_btfld correct_mask_vg; t_btfld tmp_bit_chk; uint32_t vg; - uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : - (rank_bgn + NUM_RANKS_PER_SHADOW_REG); + uint32_t rank_end = + all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS : (rank_bgn + NUM_RANKS_PER_SHADOW_REG); *bit_chk = param->write_correct_mask; correct_mask_vg = param->write_correct_mask_vg; for (r = rank_bgn; r < rank_end; r++) { - /* set rank */ + if (param->skip_ranks[r]) { + //USER request to skip the rank + + continue; + } + //USER set rank set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE); tmp_bit_chk = 0; - for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS - 1; ; vg--) { + for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS - 1;; vg--) { - /* reset the fifos to get pointers to known state */ + //USER reset the fifos to get pointers to known state IOWR_32DIRECT(PHY_MGR_CMD_FIFO_RESET, 0, 0); - tmp_bit_chk = tmp_bit_chk << - (RW_MGR_MEM_DQ_PER_WRITE_DQS / - RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); - rw_mgr_mem_calibrate_write_test_issue (write_group * - RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS + vg, - use_dm); - - tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & - ~(IORD_32DIRECT(BASE_RW_MGR, 0))); - pr_debug("write_test(%u,%u,%u) :[%u,%u] " - BTFLD_FMT " & ~%x => " BTFLD_FMT " => " - BTFLD_FMT, write_group, use_dm, all_correct, - r, vg, correct_mask_vg, - IORD_32DIRECT(BASE_RW_MGR, 0), correct_mask_vg - & ~IORD_32DIRECT(BASE_RW_MGR, 0), - tmp_bit_chk); - - if (vg == 0) + tmp_bit_chk = + tmp_bit_chk << (RW_MGR_MEM_DQ_PER_WRITE_DQS / + RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS); + rw_mgr_mem_calibrate_write_test_issue(write_group * + RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS + + vg, use_dm); + + tmp_bit_chk = + tmp_bit_chk | (correct_mask_vg & ~(IORD_32DIRECT(BASE_RW_MGR, 0))); + DPRINT(2, + "write_test(%lu,%lu,%lu) :[%lu,%lu] " BTFLD_FMT " & ~%x => " + BTFLD_FMT " => " BTFLD_FMT, write_group, use_dm, all_correct, r, vg, + correct_mask_vg, IORD_32DIRECT(BASE_RW_MGR, 0), + correct_mask_vg & ~IORD_32DIRECT(BASE_RW_MGR, 0), tmp_bit_chk); + + if (vg == 0) { break; + } } *bit_chk &= tmp_bit_chk; } - set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); - - if (all_correct) + if (all_correct) { + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + DPRINT(2, "write_test(%lu,%lu,ALL) : " BTFLD_FMT " == " BTFLD_FMT " => %lu", + write_group, use_dm, *bit_chk, param->write_correct_mask, + (long unsigned int)(*bit_chk == param->write_correct_mask)); return (*bit_chk == param->write_correct_mask); - else + } else { + set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF); + DPRINT(2, "write_test(%lu,%lu,ONE) : " BTFLD_FMT " != " BTFLD_FMT " => %lu", + write_group, use_dm, *bit_chk, (long unsigned int)0, + (long unsigned int)(*bit_chk != 0)); return (*bit_chk != 0x00); + } } -static uint32_t rw_mgr_mem_calibrate_write_test_all_ranks -(uint32_t write_group, uint32_t use_dm, uint32_t all_correct, t_btfld *bit_chk) +static inline uint32_t rw_mgr_mem_calibrate_write_test_all_ranks(uint32_t write_group, + uint32_t use_dm, + uint32_t all_correct, + t_btfld * bit_chk) { - return rw_mgr_mem_calibrate_write_test (0, write_group, - use_dm, all_correct, bit_chk, 1); + return rw_mgr_mem_calibrate_write_test(0, write_group, use_dm, all_correct, bit_chk, 1); } -/* level the write operations */ -/* Write Levelling -- Full Calibration */ -static uint32_t rw_mgr_mem_calibrate_wlevel (uint32_t g, uint32_t test_bgn) +//USER level the write operations + +#if NEWVERSION_WL + +//USER Write Levelling -- Full Calibration +static uint32_t rw_mgr_mem_calibrate_wlevel(uint32_t g, uint32_t test_bgn) { - uint32_t p, d; + uint32_t p, d, sr; + uint32_t num_additional_fr_cycles = 0; + t_btfld bit_chk; uint32_t work_bgn, work_end, work_mid; uint32_t tmp_delay; uint32_t found_begin; uint32_t dtaps_per_ptap; - /* update info for sims */ + TRACE_FUNC("%lu %lu", g, test_bgn); + BFM_STAGE("wlevel"); + + //USER update info for sims reg_file_set_stage(CAL_STAGE_WLEVEL); reg_file_set_sub_stage(CAL_SUBSTAGE_WORKING_DELAY); - /* maximum phases for the sweep */ + //USER maximum phases for the sweep -#if USE_DQS_TRACKING dtaps_per_ptap = IORD_32DIRECT(REG_FILE_DTAPS_PER_PTAP, 0); -#else - dtaps_per_ptap = 0; - tmp_delay = 0; - while (tmp_delay < IO_DELAY_PER_OPA_TAP) { - dtaps_per_ptap++; - tmp_delay += IO_DELAY_PER_DCHAIN_TAP; - } - dtaps_per_ptap--; -#endif - /* starting phases */ + //USER starting phases - /* update info for sims */ + //USER update info for sims reg_file_set_group(g); - /* starting and end range where writes work */ + //USER starting and end range where writes work - scc_mgr_spread_out2_delay_all_ranks (g, test_bgn); + scc_mgr_spread_out2_delay_all_ranks(g, test_bgn); work_bgn = 0; work_end = 0; - /* step 1: find first working phase, increment in ptaps, and then in - dtaps if ptaps doesn't find a working phase */ + //USER step 1: find first working phase, increment in ptaps, and then in dtaps if ptaps doesn't find a working phase found_begin = 0; tmp_delay = 0; - for (d = 0; d <= dtaps_per_ptap; d++, tmp_delay += - IO_DELAY_PER_DCHAIN_TAP) { - scc_mgr_apply_group_all_out_delay_all_ranks (g, test_bgn, d); + for (d = 0; d <= dtaps_per_ptap; d++, tmp_delay += IO_DELAY_PER_DCHAIN_TAP) { + scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, d); work_bgn = tmp_delay; - for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX + - num_additional_fr_cycles*IO_DLL_CHAIN_LENGTH; - p++, work_bgn += IO_DELAY_PER_OPA_TAP) { - pr_debug("wlevel: begin-1: p=%u d=%u\n", p, d); + for (p = 0; + p <= IO_DQDQS_OUT_PHASE_MAX + num_additional_fr_cycles * IO_DLL_CHAIN_LENGTH; + p++, work_bgn += IO_DELAY_PER_OPA_TAP) { + DPRINT(2, "wlevel: begin-1: p=%lu d=%lu", p, d); scc_mgr_set_dqdqs_output_phase_all_ranks(g, p); - if (rw_mgr_mem_calibrate_write_test_all_ranks (g, 0, - PASS_ONE_BIT, &bit_chk)) { + if (rw_mgr_mem_calibrate_write_test_all_ranks(g, 0, PASS_ONE_BIT, &bit_chk)) { found_begin = 1; break; } } - if (found_begin) + if (found_begin) { break; + } } - if (p > IO_DQDQS_OUT_PHASE_MAX + num_additional_fr_cycles * IO_DLL_CHAIN_LENGTH) - /* fail, cannot find first working phase */ + if (p > IO_DQDQS_OUT_PHASE_MAX + num_additional_fr_cycles * IO_DLL_CHAIN_LENGTH) { + //USER fail, cannot find first working phase + + set_failing_group_stage(g, CAL_STAGE_WLEVEL, CAL_SUBSTAGE_WORKING_DELAY); + return 0; + } - pr_debug("wlevel: first valid p=%u d=%u\n", p, d); + DPRINT(2, "wlevel: first valid p=%lu d=%lu", p, d); reg_file_set_sub_stage(CAL_SUBSTAGE_LAST_WORKING_DELAY); - /* If d is 0 then the working window covers a phase tap and we can - follow the old procedure otherwise, we've found the beginning, and we - need to increment the dtaps until we find the end */ + //USER If d is 0 then the working window covers a phase tap and we can follow the old procedure + //USER otherwise, we've found the beginning, and we need to increment the dtaps until we find the end if (d == 0) { + COV(WLEVEL_PHASE_PTAP_OVERLAP); work_end = work_bgn + IO_DELAY_PER_OPA_TAP; - /* step 2: if we have room, back off by one and increment - in dtaps */ + //USER step 2: if we have room, back off by one and increment in dtaps if (p > 0) { int found = 0; @@ -3227,46 +3900,63 @@ static uint32_t rw_mgr_mem_calibrate_wlevel (uint32_t g, uint32_t test_bgn) tmp_delay = work_bgn - IO_DELAY_PER_OPA_TAP; - for (d = 0; d <= IO_IO_OUT1_DELAY_MAX && - tmp_delay < work_bgn; d++, - tmp_delay += IO_DELAY_PER_DCHAIN_TAP) { - pr_debug("wlevel: begin-2: p=%u d=%u\n", - (p - 1), d); + for (d = 0; d <= IO_IO_OUT1_DELAY_MAX && tmp_delay < work_bgn; + d++, tmp_delay += IO_DELAY_PER_DCHAIN_TAP) { + DPRINT(2, "wlevel: begin-2: p=%lu d=%lu", (p - 1), d); scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, d); - if (rw_mgr_mem_calibrate_write_test_all_ranks(g, 0, PASS_ONE_BIT, &bit_chk)) { + if (rw_mgr_mem_calibrate_write_test_all_ranks + (g, 0, PASS_ONE_BIT, &bit_chk)) { found = 1; work_bgn = tmp_delay; break; } } - scc_mgr_apply_group_all_out_delay_all_ranks (g, - test_bgn, 0); + { + uint32_t d2; + uint32_t p2; + if (found) { + d2 = d; + p2 = p - 1; + } else { + d2 = 0; + p2 = p; + } + + DPRINT(2, "wlevel: found begin-A: p=%lu d=%lu ps=%lu", p2, d2, + work_bgn); + + BFM_GBL_SET(dqs_wlevel_left_edge[g].p, p2); + BFM_GBL_SET(dqs_wlevel_left_edge[g].d, d2); + BFM_GBL_SET(dqs_wlevel_left_edge[g].ps, work_bgn); + } + + scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, 0); } else { - pr_debug("wlevel: found begin-B: p=%u d=%u ps=%u\n", - p, d, work_bgn); + DPRINT(2, "wlevel: found begin-B: p=%lu d=%lu ps=%lu", p, d, work_bgn); + + BFM_GBL_SET(dqs_wlevel_left_edge[g].p, p); + BFM_GBL_SET(dqs_wlevel_left_edge[g].d, d); + BFM_GBL_SET(dqs_wlevel_left_edge[g].ps, work_bgn); } - /* step 3: go forward from working phase to non working phase, - increment in ptaps */ + //USER step 3: go forward from working phase to non working phase, increment in ptaps - for (p = p + 1; p <= IO_DQDQS_OUT_PHASE_MAX + - num_additional_fr_cycles * IO_DLL_CHAIN_LENGTH; p++, - work_end += IO_DELAY_PER_OPA_TAP) { - pr_debug("wlevel: end-0: p=%u d=%u\n", p, - 0); + for (p = p + 1; + p <= IO_DQDQS_OUT_PHASE_MAX + num_additional_fr_cycles * IO_DLL_CHAIN_LENGTH; + p++, work_end += IO_DELAY_PER_OPA_TAP) { + DPRINT(2, "wlevel: end-0: p=%lu d=%lu", p, (long unsigned int)0); scc_mgr_set_dqdqs_output_phase_all_ranks(g, p); - if (!rw_mgr_mem_calibrate_write_test_all_ranks (g, 0, - PASS_ONE_BIT, &bit_chk)) { + if (!rw_mgr_mem_calibrate_write_test_all_ranks + (g, 0, PASS_ONE_BIT, &bit_chk)) { break; } } - /* step 4: back off one from last, increment in dtaps */ - /* The actual increment is done outside the if/else statement - since it is shared with other code */ + //USER step 4: back off one from last, increment in dtaps + //USER The actual increment is done outside the if/else statement since it is shared with other code p = p - 1; @@ -3276,52 +3966,61 @@ static uint32_t rw_mgr_mem_calibrate_wlevel (uint32_t g, uint32_t test_bgn) d = 0; } else { - /* step 5: Window doesn't cover phase tap, just increment - dtaps until failure */ - /* The actual increment is done outside the if/else statement - since it is shared with other code */ + //USER step 5: Window doesn't cover phase tap, just increment dtaps until failure + //USER The actual increment is done outside the if/else statement since it is shared with other code + COV(WLEVEL_PHASE_PTAP_NO_OVERLAP); work_end = work_bgn; - pr_debug("wlevel: found begin-C: p=%u d=%u ps=%u\n", p, - d, work_bgn); + DPRINT(2, "wlevel: found begin-C: p=%lu d=%lu ps=%lu", p, d, work_bgn); + BFM_GBL_SET(dqs_wlevel_left_edge[g].p, p); + BFM_GBL_SET(dqs_wlevel_left_edge[g].d, d); + BFM_GBL_SET(dqs_wlevel_left_edge[g].ps, work_bgn); + } - /* The actual increment until failure */ - for (; d <= IO_IO_OUT1_DELAY_MAX; d++, work_end += - IO_DELAY_PER_DCHAIN_TAP) { - pr_debug("wlevel: end: p=%u d=%u\n", p, d); - scc_mgr_apply_group_all_out_delay_all_ranks (g, test_bgn, d); + //USER The actual increment until failure + for (; d <= IO_IO_OUT1_DELAY_MAX; d++, work_end += IO_DELAY_PER_DCHAIN_TAP) { + DPRINT(2, "wlevel: end: p=%lu d=%lu", p, d); + scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, d); - if (!rw_mgr_mem_calibrate_write_test_all_ranks (g, 0, - PASS_ONE_BIT, &bit_chk)) { + if (!rw_mgr_mem_calibrate_write_test_all_ranks(g, 0, PASS_ONE_BIT, &bit_chk)) { break; } } - scc_mgr_zero_group (g, test_bgn, 1); + scc_mgr_zero_group(g, test_bgn, 1); work_end -= IO_DELAY_PER_DCHAIN_TAP; if (work_end >= work_bgn) { - /* we have a working range */ + //USER we have a working range } else { - /* nil range */ + //USER nil range + + set_failing_group_stage(g, CAL_STAGE_WLEVEL, CAL_SUBSTAGE_LAST_WORKING_DELAY); + return 0; } - pr_debug("wlevel: found end: p=%u d=%u; range: [%u,%u]\n", p, - d-1, work_bgn, work_end); + DPRINT(2, "wlevel: found end: p=%lu d=%lu; range: [%lu,%lu]", p, d - 1, work_bgn, work_end); + BFM_GBL_SET(dqs_wlevel_right_edge[g].p, p); + BFM_GBL_SET(dqs_wlevel_right_edge[g].d, d - 1); + BFM_GBL_SET(dqs_wlevel_right_edge[g].ps, work_end); + + for (sr = 0; sr < NUM_SHADOW_REGS; sr++) { + TCLRPT_SET(debug_cal_report->cal_dqs_out_margins[sr][g].dqdqs_start, work_bgn); + TCLRPT_SET(debug_cal_report->cal_dqs_out_margins[sr][g].dqdqs_end, work_end); + } - /* center */ + //USER center work_mid = (work_bgn + work_end) / 2; - pr_debug("wlevel: work_mid=%d\n", work_mid); + DPRINT(2, "wlevel: work_mid=%ld", work_mid); tmp_delay = 0; - for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX + - num_additional_fr_cycles * IO_DLL_CHAIN_LENGTH && - tmp_delay < work_mid; p++, tmp_delay += IO_DELAY_PER_OPA_TAP) - ; + for (p = 0; + p <= IO_DQDQS_OUT_PHASE_MAX + num_additional_fr_cycles * IO_DLL_CHAIN_LENGTH + && tmp_delay < work_mid; p++, tmp_delay += IO_DELAY_PER_OPA_TAP) ; if (tmp_delay > work_mid) { tmp_delay -= IO_DELAY_PER_OPA_TAP; @@ -3335,38 +4034,169 @@ static uint32_t rw_mgr_mem_calibrate_wlevel (uint32_t g, uint32_t test_bgn) scc_mgr_set_dqdqs_output_phase_all_ranks(g, p); - pr_debug("wlevel: p=%u tmp_delay=%u left=%u\n", p, tmp_delay, - work_mid - tmp_delay); + DPRINT(2, "wlevel: p=%lu tmp_delay=%lu left=%lu", p, tmp_delay, work_mid - tmp_delay); - for (d = 0; d <= IO_IO_OUT1_DELAY_MAX && tmp_delay < work_mid; d++, - tmp_delay += IO_DELAY_PER_DCHAIN_TAP) - ; + for (d = 0; d <= IO_IO_OUT1_DELAY_MAX && tmp_delay < work_mid; + d++, tmp_delay += IO_DELAY_PER_DCHAIN_TAP) ; if (tmp_delay > work_mid) { tmp_delay -= IO_DELAY_PER_DCHAIN_TAP; d--; } - pr_debug("wlevel: p=%u d=%u tmp_delay=%u left=%u\n", p, d, - tmp_delay, work_mid - tmp_delay); + DPRINT(2, "wlevel: p=%lu d=%lu tmp_delay=%lu left=%lu", p, d, tmp_delay, + work_mid - tmp_delay); + + scc_mgr_apply_group_all_out_delay_add_all_ranks(g, test_bgn, d); + + DPRINT(2, "wlevel: found middle: p=%lu d=%lu", p, d); + BFM_GBL_SET(dqs_wlevel_mid[g].p, p); + BFM_GBL_SET(dqs_wlevel_mid[g].d, d); + BFM_GBL_SET(dqs_wlevel_mid[g].ps, work_mid); + + return 1; +} + +#else + +//USER Write Levelling -- Full Calibration +static uint32_t rw_mgr_mem_calibrate_wlevel(uint32_t g, uint32_t test_bgn) +{ + uint32_t p, d; + t_btfld bit_chk; + uint32_t work_bgn, work_end, work_mid; + uint32_t tmp_delay; + + TRACE_FUNC("%lu %lu", g, test_bgn); + + //USER update info for sims + + reg_file_set_stage(CAL_STAGE_WLEVEL); + reg_file_set_sub_stage(CAL_SUBSTAGE_WORKING_DELAY); + + //USER maximum phases for the sweep + + //USER starting phases + + //USER update info for sims + + reg_file_set_group(g); + + //USER starting and end range where writes work + + work_bgn = 0; + work_end = 0; + + //USER step 1: find first working phase, increment in ptaps + + for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX; p++, work_bgn += IO_DELAY_PER_OPA_TAP) { + scc_mgr_set_dqdqs_output_phase_all_ranks(g, p); + + if (rw_mgr_mem_calibrate_write_test_all_ranks(g, 0, PASS_ONE_BIT, &bit_chk)) { + break; + } + } + + if (p > IO_DQDQS_OUT_PHASE_MAX) { + //USER fail, cannot find first working phase + + set_failing_group_stage(g, CAL_STAGE_WLEVEL, CAL_SUBSTAGE_WORKING_DELAY); + + return 0; + } + + work_end = work_bgn + IO_DELAY_PER_OPA_TAP; + + reg_file_set_sub_stage(CAL_SUBSTAGE_LAST_WORKING_DELAY); + + //USER step 2: if we have room, back off by one and increment in dtaps + + if (p > 0) { + scc_mgr_set_dqdqs_output_phase_all_ranks(g, p - 1); + + tmp_delay = work_bgn - IO_DELAY_PER_OPA_TAP; + + for (d = 0; d <= IO_IO_OUT1_DELAY_MAX && tmp_delay < work_bgn; + d++, tmp_delay += IO_DELAY_PER_DCHAIN_TAP) { + scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, d); + + if (rw_mgr_mem_calibrate_write_test_all_ranks(g, 0, PASS_ONE_BIT, &bit_chk)) { + work_bgn = tmp_delay; + break; + } + } + + scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, 0); + } + //USER step 3: go forward from working phase to non working phase, increment in ptaps + + for (p = p + 1; p <= IO_DQDQS_OUT_PHASE_MAX; p++, work_end += IO_DELAY_PER_OPA_TAP) { + scc_mgr_set_dqdqs_output_phase_all_ranks(g, p); + + if (!rw_mgr_mem_calibrate_write_test_all_ranks(g, 0, PASS_ONE_BIT, &bit_chk)) { + break; + } + } + + //USER step 4: back off one from last, increment in dtaps + + scc_mgr_set_dqdqs_output_phase_all_ranks(g, p - 1); + + work_end -= IO_DELAY_PER_OPA_TAP; + + for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++, work_end += IO_DELAY_PER_DCHAIN_TAP) { + scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, d); + + if (!rw_mgr_mem_calibrate_write_test_all_ranks(g, 0, PASS_ONE_BIT, &bit_chk)) { + break; + } + } + + scc_mgr_apply_group_all_out_delay_all_ranks(g, test_bgn, 0); + + if (work_end > work_bgn) { + //USER we have a working range + } else { + //USER nil range + + set_failing_group_stage(g, CAL_STAGE_WLEVEL, CAL_SUBSTAGE_LAST_WORKING_DELAY); + + return 0; + } + + //USER center - scc_mgr_apply_group_all_out_delay_add_all_ranks (g, test_bgn, d); + work_mid = (work_bgn + work_end) / 2; + + tmp_delay = 0; + + for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && tmp_delay < work_mid; + p++, tmp_delay += IO_DELAY_PER_OPA_TAP) ; + + tmp_delay -= IO_DELAY_PER_OPA_TAP; + + scc_mgr_set_dqdqs_output_phase_all_ranks(g, p - 1); + + for (d = 0; d <= IO_IO_OUT1_DELAY_MAX && tmp_delay < work_mid; + d++, tmp_delay += IO_DELAY_PER_DCHAIN_TAP) ; - pr_debug("wlevel: found middle: p=%u d=%u\n", p, d); + scc_mgr_apply_group_all_out_delay_add_all_ranks(g, test_bgn, d - 1); return 1; } -/* center all windows. do per-bit-deskew to possibly increase size of -certain windows */ +#endif + +//USER center all windows. do per-bit-deskew to possibly increase size of certain windows -static uint32_t rw_mgr_mem_calibrate_writes_center (uint32_t rank_bgn, - uint32_t write_group, uint32_t test_bgn) +#if NEWVERSION_WRDESKEW + +static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, uint32_t write_group, + uint32_t test_bgn) { uint32_t i, p, min_index; int32_t d; - /* Store these as signed since there are comparisons with - signed numbers */ + //USER Store these as signed since there are comparisons with signed numbers t_btfld bit_chk; t_btfld sticky_bit_chk; int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS]; @@ -3376,11 +4206,9 @@ static uint32_t rw_mgr_mem_calibrate_writes_center (uint32_t rank_bgn, int32_t new_dqs, start_dqs, shift_dq; int32_t dq_margin, dqs_margin, dm_margin; uint32_t stop; - int32_t bgn_curr; - int32_t end_curr; - int32_t bgn_best; - int32_t end_best; - int32_t win_best; + + TRACE_FUNC("%lu %lu", write_group, test_bgn); + BFM_STAGE("writes_center"); ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); @@ -3388,120 +4216,109 @@ static uint32_t rw_mgr_mem_calibrate_writes_center (uint32_t rank_bgn, start_dqs = READ_SCC_DQS_IO_OUT1_DELAY(); - /* per-bit deskew */ + select_curr_shadow_reg_using_rank(rank_bgn); - /* set the left and right edge of each bit to an illegal value */ - /* use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value */ + //USER per-bit deskew + + //USER set the left and right edge of each bit to an illegal value + //USER use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value sticky_bit_chk = 0; for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { - left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; + left_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; } - /* Search for the left edge of the window for each bit */ + //USER Search for the left edge of the window for each bit for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) { - scc_mgr_apply_group_dq_out1_delay (write_group, test_bgn, d); + scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, d); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - /* Stop searching when the read test doesn't pass AND when - we've seen a passing read on every bit */ - stop = !rw_mgr_mem_calibrate_write_test (rank_bgn, write_group, - 0, PASS_ONE_BIT, &bit_chk, 0); + //USER Stop searching when the read test doesn't pass AND when we've seen a passing read on every bit + stop = + !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0, PASS_ONE_BIT, + &bit_chk, 0); sticky_bit_chk = sticky_bit_chk | bit_chk; stop = stop && (sticky_bit_chk == param->write_correct_mask); - pr_debug("write_center(left): dtap=%u => " BTFLD_FMT - " == " BTFLD_FMT " && %u [bit_chk=" BTFLD_FMT "]\n", - d, sticky_bit_chk, param->write_correct_mask, - stop, bit_chk); + DPRINT(2, + "write_center(left): dtap=%lu => " BTFLD_FMT " == " BTFLD_FMT + " && %lu [bit_chk=" BTFLD_FMT "]", d, sticky_bit_chk, + param->write_correct_mask, stop, bit_chk); if (stop == 1) { break; } else { for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { if (bit_chk & 1) { - /* Remember a passing test as the - left_edge */ + //USER Remember a passing test as the left_edge left_edge[i] = d; } else { - /* If a left edge has not been seen yet, - then a future passing test will mark - this edge as the right edge */ - if (left_edge[i] == - IO_IO_OUT1_DELAY_MAX + 1) { + //USER If a left edge has not been seen yet, then a future passing test will mark this edge as the right edge + if (left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) { right_edge[i] = -(d + 1); } } - pr_debug("write_center[l,d=%u): " - "bit_chk_test=%d left_edge[%u]: %d " - "right_edge[%u]: %d\n", - d, (int)(bit_chk & 1), i, left_edge[i], - i, right_edge[i]); + DPRINT(2, + "write_center[l,d=%lu): bit_chk_test=%d left_edge[%lu]: %ld right_edge[%lu]: %ld", + d, (int)(bit_chk & 1), i, left_edge[i], i, right_edge[i]); bit_chk = bit_chk >> 1; } } } - /* Reset DQ delay chains to 0 */ - scc_mgr_apply_group_dq_out1_delay (write_group, test_bgn, 0); + //USER Reset DQ delay chains to 0 + scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, 0); sticky_bit_chk = 0; for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) { - pr_debug("write_center: left_edge[%u]: %d right_edge[%u]: " - "%d\n", i, left_edge[i], i, right_edge[i]); + DPRINT(2, "write_center: left_edge[%lu]: %ld right_edge[%lu]: %ld", i, left_edge[i], + i, right_edge[i]); - /* Check for cases where we haven't found the left edge, - which makes our assignment of the the right edge invalid. - Reset it to the illegal value. */ - if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) && - (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { + //USER Check for cases where we haven't found the left edge, which makes our assignment of the the + //USER right edge invalid. Reset it to the illegal value. + if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) + && (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1; - pr_debug("write_center: reset right_edge[%u]: %d\n", - i, right_edge[i]); + DPRINT(2, "write_center: reset right_edge[%lu]: %ld", i, right_edge[i]); } - - /* Reset sticky bit (except for bits where we have - seen the left edge) */ + //USER Reset sticky bit (except for bits where we have seen the left edge) sticky_bit_chk = sticky_bit_chk << 1; - if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) + if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) { sticky_bit_chk = sticky_bit_chk | 1; + } - if (i == 0) + if (i == 0) { break; + } } - /* Search for the right edge of the window for each bit */ + //USER Search for the right edge of the window for each bit for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) { - scc_mgr_apply_group_dqs_io_and_oct_out1 (write_group, - d + start_dqs); + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, d + start_dqs); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - - /* Stop searching when the read test doesn't pass AND when - we've seen a passing read on every bit */ - stop = !rw_mgr_mem_calibrate_write_test (rank_bgn, write_group, - 0, PASS_ONE_BIT, &bit_chk, 0); + if (QDRII) { + rw_mgr_mem_dll_lock_wait(); + } + //USER Stop searching when the read test doesn't pass AND when we've seen a passing read on every bit + stop = + !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 0, PASS_ONE_BIT, + &bit_chk, 0); if (stop) { recover_mem_device_after_ck_dqs_violation(); } sticky_bit_chk = sticky_bit_chk | bit_chk; stop = stop && (sticky_bit_chk == param->write_correct_mask); - pr_debug("write_center (right): dtap=%u => " BTFLD_FMT " == " - BTFLD_FMT " && %u\n", d, sticky_bit_chk, - param->write_correct_mask, stop); + DPRINT(2, "write_center (right): dtap=%lu => " BTFLD_FMT " == " BTFLD_FMT " && %lu", + d, sticky_bit_chk, param->write_correct_mask, stop); if (stop == 1) { if (d == 0) { - for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; - i++) { - /* d = 0 failed, but it passed when - testing the left edge, so it must be - marginal, set it to -1 */ - if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX + 1 && - left_edge[i] != - IO_IO_OUT1_DELAY_MAX + 1) { + for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { + //USER d = 0 failed, but it passed when testing the left edge, so it must be marginal, set it to -1 + if (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1 + && left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1) { right_edge[i] = -1; } } @@ -3510,63 +4327,49 @@ static uint32_t rw_mgr_mem_calibrate_writes_center (uint32_t rank_bgn, } else { for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { if (bit_chk & 1) { - /* Remember a passing test as - the right_edge */ + //USER Remember a passing test as the right_edge right_edge[i] = d; } else { if (d != 0) { - /* If a right edge has not - been seen yet, then a future - passing test will mark this - edge as the left edge */ - if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX - + 1) { + //USER If a right edge has not been seen yet, then a future passing test will mark this edge as the left edge + if (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) { left_edge[i] = -(d + 1); } } else { - /* d = 0 failed, but it passed - when testing the left edge, so - it must be marginal, set it - to -1 */ - if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX + - 1 && left_edge[i] != - IO_IO_OUT1_DELAY_MAX + - 1) { + //USER d = 0 failed, but it passed when testing the left edge, so it must be marginal, set it to -1 + if (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1 + && left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1) { right_edge[i] = -1; } - /* If a right edge has not been - seen yet, then a future passing - test will mark this edge as the - left edge */ - else if (right_edge[i] == - IO_IO_OUT1_DELAY_MAX + - 1) { + //USER If a right edge has not been seen yet, then a future passing test will mark this edge as the left edge + else if (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) { left_edge[i] = -(d + 1); } } } - pr_debug("write_center[r,d=%u): " - "bit_chk_test=%d left_edge[%u]: %d " - "right_edge[%u]: %d\n", - d, (int)(bit_chk & 1), i, left_edge[i], - i, right_edge[i]); + DPRINT(2, + "write_center[r,d=%lu): bit_chk_test=%d left_edge[%lu]: %ld right_edge[%lu]: %ld", + d, (int)(bit_chk & 1), i, left_edge[i], i, right_edge[i]); bit_chk = bit_chk >> 1; } } } - /* Check that all bits have a window */ + //USER Check that all bits have a window for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { - pr_debug("write_center: left_edge[%u]: %d right_edge[%u]: " - "%d\n", i, left_edge[i], i, right_edge[i]); - if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) || - (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) + DPRINT(2, "write_center: left_edge[%lu]: %ld right_edge[%lu]: %ld", i, left_edge[i], + i, right_edge[i]); + BFM_GBL_SET(dq_write_left_edge[write_group][i], left_edge[i]); + BFM_GBL_SET(dq_write_right_edge[write_group][i], right_edge[i]); + if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) + || (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) { + set_failing_group_stage(test_bgn + i, CAL_STAGE_WRITES, + CAL_SUBSTAGE_WRITES_CENTER); return 0; + } } - /* Find middle of window for each DQ bit */ + //USER Find middle of window for each DQ bit mid_min = left_edge[0] - right_edge[0]; min_index = 0; for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { @@ -3577,369 +4380,686 @@ static uint32_t rw_mgr_mem_calibrate_writes_center (uint32_t rank_bgn, } } - /* -mid_min/2 represents the amount that we need to move DQS. - If mid_min is odd and positive we'll need to add one to - make sure the rounding in further calculations is correct - (always bias to the right), so just add 1 for all positive values */ - if (mid_min > 0) - mid_min++; - - mid_min = mid_min / 2; + //USER -mid_min/2 represents the amount that we need to move DQS. If mid_min is odd and positive we'll need to add one to + //USER make sure the rounding in further calculations is correct (always bias to the right), so just add 1 for all positive values + if (mid_min > 0) { + mid_min++; + } + mid_min = mid_min / 2; + + DPRINT(1, "write_center: mid_min=%ld", mid_min); + + //USER Determine the amount we can change DQS (which is -mid_min) + orig_mid_min = mid_min; + new_dqs = start_dqs; + mid_min = 0; + + DPRINT(1, "write_center: start_dqs=%ld new_dqs=%ld mid_min=%ld", start_dqs, new_dqs, + mid_min); + + //USER Initialize data for export structures + dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; + dq_margin = IO_IO_OUT1_DELAY_MAX + 1; + + //USER add delay to bring centre of all DQ windows to the same "level" + for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { + //USER Use values before divide by 2 to reduce round off error + shift_dq = + (left_edge[i] - right_edge[i] - + (left_edge[min_index] - right_edge[min_index])) / 2 + (orig_mid_min - mid_min); + + DPRINT(2, "write_center: before: shift_dq[%lu]=%ld", i, shift_dq); + + if (shift_dq + (int32_t) READ_SCC_DQ_OUT1_DELAY(i) > (int32_t) IO_IO_OUT1_DELAY_MAX) { + shift_dq = (int32_t) IO_IO_OUT1_DELAY_MAX - READ_SCC_DQ_OUT1_DELAY(i); + } else if (shift_dq + (int32_t) READ_SCC_DQ_OUT1_DELAY(i) < 0) { + shift_dq = -(int32_t) READ_SCC_DQ_OUT1_DELAY(i); + } + DPRINT(2, "write_center: after: shift_dq[%lu]=%ld", i, shift_dq); + scc_mgr_set_dq_out1_delay(write_group, i, READ_SCC_DQ_OUT1_DELAY(i) + shift_dq); + scc_mgr_load_dq(i); + + DPRINT(2, "write_center: margin[%lu]=[%ld,%ld]", i, + left_edge[i] - shift_dq + (-mid_min), right_edge[i] + shift_dq - (-mid_min)); + //USER To determine values for export structures + if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) { + dq_margin = left_edge[i] - shift_dq + (-mid_min); + } + if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) { + dqs_margin = right_edge[i] + shift_dq - (-mid_min); + } + } + + //USER Move DQS + if (QDRII) { + scc_mgr_set_group_dqs_io_and_oct_out1_gradual(write_group, new_dqs); + } else { + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + } + + ////////////////////// + ////////////////////// + //USER Centre DM + ////////////////////// + ////////////////////// + + BFM_STAGE("dm_center"); + + DPRINT(2, "write_center: DM"); + + //USER set the left and right edge of each bit to an illegal value + //USER use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value + left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; + right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; + int32_t bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; + int32_t end_curr = IO_IO_OUT1_DELAY_MAX + 1; + int32_t bgn_best = IO_IO_OUT1_DELAY_MAX + 1; + int32_t end_best = IO_IO_OUT1_DELAY_MAX + 1; + int32_t win_best = 0; + + //USER Search for the/part of the window with DM shift + for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { + scc_mgr_apply_group_dm_out1_delay(write_group, d); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + if (rw_mgr_mem_calibrate_write_test + (rank_bgn, write_group, 1, PASS_ALL_BITS, &bit_chk, 0)) { + + //USE Set current end of the window + end_curr = -d; + //USER If a starting edge of our window has not been seen this is our current start of the DM window + if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) { + bgn_curr = -d; + } + //USER If current window is bigger than best seen. Set best seen to be current window + if ((end_curr - bgn_curr + 1) > win_best) { + win_best = end_curr - bgn_curr + 1; + bgn_best = bgn_curr; + end_best = end_curr; + } + } else { + //USER We just saw a failing test. Reset temp edge + bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; + end_curr = IO_IO_OUT1_DELAY_MAX + 1; + } + + } + + //USER Reset DM delay chains to 0 + scc_mgr_apply_group_dm_out1_delay(write_group, 0); + + //USER Check to see if the current window nudges up aganist 0 delay. If so we need to continue the search by shifting DQS otherwise DQS search begins as a new search + if (end_curr != 0) { + bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; + end_curr = IO_IO_OUT1_DELAY_MAX + 1; + } + //USER Search for the/part of the window with DQS shifts + for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { + // Note: This only shifts DQS, so are we limiting ourselve to + // width of DQ unnecessarily + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, d + new_dqs); + + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + if (rw_mgr_mem_calibrate_write_test + (rank_bgn, write_group, 1, PASS_ALL_BITS, &bit_chk, 0)) { + + //USE Set current end of the window + end_curr = d; + //USER If a beginning edge of our window has not been seen this is our current begin of the DM window + if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) { + bgn_curr = d; + } + //USER If current window is bigger than best seen. Set best seen to be current window + if ((end_curr - bgn_curr + 1) > win_best) { + win_best = end_curr - bgn_curr + 1; + bgn_best = bgn_curr; + end_best = end_curr; + } + } else { + //USER We just saw a failing test. Reset temp edge + recover_mem_device_after_ck_dqs_violation(); + bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; + end_curr = IO_IO_OUT1_DELAY_MAX + 1; + + //USER Early exit optimization: if ther remaining delay chain space is less than already seen largest window we can exit + if ((win_best - 1) > (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) { + break; + } + + } + } + + //USER assign left and right edge for cal and reporting; + left_edge[0] = -1 * bgn_best; + right_edge[0] = end_best; + + DPRINT(2, "dm_calib: left=%ld right=%ld", left_edge[0], right_edge[0]); + BFM_GBL_SET(dm_left_edge[write_group][0], left_edge[0]); + BFM_GBL_SET(dm_right_edge[write_group][0], right_edge[0]); + + //USER Move DQS (back to orig) + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, new_dqs); + + //USER Move DM + + //USER Find middle of window for the DM bit + mid = (left_edge[0] - right_edge[0]) / 2; + + //USER only move right, since we are not moving DQS/DQ + if (mid < 0) { + mid = 0; + } + //dm_marign should fail if we never find a window + if (win_best == 0) { + dm_margin = -1; + } else { + dm_margin = left_edge[0] - mid; + } + + scc_mgr_apply_group_dm_out1_delay(write_group, mid); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + DPRINT(2, "dm_calib: left=%ld right=%ld mid=%ld dm_margin=%ld", + left_edge[0], right_edge[0], mid, dm_margin); + + // Store observed DM margins + for (i = 0; i < RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP; i++) { + TCLRPT_SET(debug_cal_report->cal_dm_margins[curr_shadow_reg][write_group][i]. + left_edge, left_edge[i]); + TCLRPT_SET(debug_cal_report->cal_dm_margins[curr_shadow_reg][write_group][i]. + right_edge, right_edge[i]); + } + + //USER Export values + gbl->fom_out += dq_margin + dqs_margin; + + TCLRPT_SET(debug_cal_report->cal_dqs_out_margins[curr_shadow_reg][write_group].dqs_margin, + dqs_margin); + TCLRPT_SET(debug_cal_report->cal_dqs_out_margins[curr_shadow_reg][write_group].dq_margin, + dq_margin); + + TCLRPT_SET(debug_cal_report->cal_dqs_out_margins[curr_shadow_reg][write_group].dm_margin, + dm_margin); + TCLRPT_SET(debug_summary_report->fom_out, + debug_summary_report->fom_out + (dq_margin + dqs_margin)); + TCLRPT_SET(debug_cal_report->cal_status_per_group[curr_shadow_reg][write_group].fom_out, + (dq_margin + dqs_margin)); + + DPRINT(2, "write_center: dq_margin=%ld dqs_margin=%ld dm_margin=%ld", dq_margin, dqs_margin, + dm_margin); + + //USER Do not remove this line as it makes sure all of our decisions have been applied + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); +} + +#else // !NEWVERSION_WRDESKEW + +static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn, uint32_t write_group, + uint32_t test_bgn) +{ + uint32_t i, p, d; + uint32_t mid; + t_btfld bit_chk, sticky_bit_chk; + uint32_t max_working_dq[RW_MGR_MEM_DQ_PER_WRITE_DQS]; + uint32_t max_working_dm[RW_MGR_MEM_DATA_MASK_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH]; + uint32_t dq_margin, dqs_margin, dm_margin; + uint32_t start_dqs; + uint32_t stop; + + TRACE_FUNC("%lu %lu", write_group, test_bgn); + + ALTERA_ASSERT(write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH); + + //USER per-bit deskew + + for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { + max_working_dq[i] = 0; + } + + for (d = 1; d <= IO_IO_OUT1_DELAY_MAX; d++) { + scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, d); + + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + if (!rw_mgr_mem_calibrate_write_test + (rank_bgn, write_group, 0, PASS_ONE_BIT, &bit_chk, 0)) { + break; + } else { + for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { + if (bit_chk & 1) { + max_working_dq[i] = d; + } + bit_chk = bit_chk >> 1; + } + } + } + + scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, 0); - pr_debug("write_center: mid_min=%d\n", mid_min); + //USER determine minimum of maximums - /* Determine the amount we can change DQS (which is -mid_min) */ - orig_mid_min = mid_min; - new_dqs = start_dqs; - mid_min = 0; + dq_margin = IO_IO_OUT1_DELAY_MAX; - pr_debug("write_center: start_dqs=%d new_dqs=%d mid_min=%d\n", start_dqs, new_dqs, mid_min); + for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) { + if (max_working_dq[i] < dq_margin) { + dq_margin = max_working_dq[i]; + } + } - /* Initialize data for export structures */ - dqs_margin = IO_IO_OUT1_DELAY_MAX + 1; - dq_margin = IO_IO_OUT1_DELAY_MAX + 1; + //USER add delay to center DQ windows - /* add delay to bring centre of all DQ windows to the same "level" */ for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { - /* Use values before divide by 2 to reduce round off error */ - shift_dq = (left_edge[i] - right_edge[i] - - (left_edge[min_index] - right_edge[min_index]))/2 + - (orig_mid_min - mid_min); - - pr_debug("write_center: before: shift_dq[%u]=%d\n", i, - shift_dq); - - if (shift_dq + (int32_t)READ_SCC_DQ_OUT1_DELAY(i) > - (int32_t)IO_IO_OUT1_DELAY_MAX) { - shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - - READ_SCC_DQ_OUT1_DELAY(i); - } else if (shift_dq + (int32_t)READ_SCC_DQ_OUT1_DELAY(i) < 0) { - shift_dq = -(int32_t)READ_SCC_DQ_OUT1_DELAY(i); - } - pr_debug("write_center: after: shift_dq[%u]=%d\n", - i, shift_dq); - scc_mgr_set_dq_out1_delay(write_group, i, - READ_SCC_DQ_OUT1_DELAY(i) + shift_dq); - scc_mgr_load_dq (i); - - pr_debug("write_center: margin[%u]=[%d,%d]\n", i, - left_edge[i] - shift_dq + (-mid_min), - right_edge[i] + shift_dq - (-mid_min)); - /* To determine values for export structures */ - if (left_edge[i] - shift_dq + (-mid_min) < dq_margin) - dq_margin = left_edge[i] - shift_dq + (-mid_min); - if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin) - dqs_margin = right_edge[i] + shift_dq - (-mid_min); - } + if (max_working_dq[i] > dq_margin) { + scc_mgr_set_dq_out1_delay(write_group, i, max_working_dq[i] - dq_margin); + } else { + scc_mgr_set_dq_out1_delay(write_group, i, 0); + } - /* Move DQS */ - scc_mgr_apply_group_dqs_io_and_oct_out1 (write_group, new_dqs); - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + scc_mgr_load_dq(p, i); + } - /* Centre DM */ + //USER sweep DQS window, may potentially have more window due to per-bit-deskew - pr_debug("write_center: DM\n"); + start_dqs = READ_SCC_DQS_IO_OUT1_DELAY(); - /* set the left and right edge of each bit to an illegal value */ - /* use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value */ - left_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; - right_edge[0] = IO_IO_OUT1_DELAY_MAX + 1; - bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - end_curr = IO_IO_OUT1_DELAY_MAX + 1; - bgn_best = IO_IO_OUT1_DELAY_MAX + 1; - end_best = IO_IO_OUT1_DELAY_MAX + 1; - win_best = 0; + for (d = start_dqs + 1; d <= IO_IO_OUT1_DELAY_MAX; d++) { + scc_mgr_apply_group_dqs_io_and_oct_out1(write_group, d); - /* Search for the/part of the window with DM shift */ - for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) { - scc_mgr_apply_group_dm_out1_delay (write_group, d); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - if (rw_mgr_mem_calibrate_write_test (rank_bgn, write_group, 1, - PASS_ALL_BITS, &bit_chk, 0)) { - - /*USE Set current end of the window */ - end_curr = -d; - /* If a starting edge of our window has not been seen - this is our current start of the DM window */ - if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) - bgn_curr = -d; + if (QDRII) { + rw_mgr_mem_dll_lock_wait(); + } - /* If current window is bigger than best seen. - Set best seen to be current window */ - if ((end_curr-bgn_curr+1) > win_best) { - win_best = end_curr-bgn_curr+1; - bgn_best = bgn_curr; - end_best = end_curr; - } - } else { - /* We just saw a failing test. Reset temp edge */ - bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - end_curr = IO_IO_OUT1_DELAY_MAX + 1; + if (!rw_mgr_mem_calibrate_write_test + (rank_bgn, write_group, 0, PASS_ALL_BITS, &bit_chk, 0)) { + break; } } - /* Reset DM delay chains to 0 */ - scc_mgr_apply_group_dm_out1_delay (write_group, 0); + scc_mgr_set_dqs_out1_delay(write_group, start_dqs); + scc_mgr_set_oct_out1_delay(write_group, start_dqs); - /* Check to see if the current window nudges up aganist 0 delay. - If so we need to continue the search by shifting DQS otherwise DQS - search begins as a new search */ - if (end_curr != 0) { - bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - end_curr = IO_IO_OUT1_DELAY_MAX + 1; - } + dqs_margin = d - start_dqs - 1; - /* Search for the/part of the window with DQS shifts */ - for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - new_dqs; d += DELTA_D) { - /* Note: This only shifts DQS, so are we limiting ourselve to */ - /* width of DQ unnecessarily */ - scc_mgr_apply_group_dqs_io_and_oct_out1 (write_group, - d + new_dqs); + //USER time to center, +1 so that we don't go crazy centering DQ - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + mid = (dq_margin + dqs_margin + 1) / 2; - if (rw_mgr_mem_calibrate_write_test (rank_bgn, write_group, 1, - PASS_ALL_BITS, &bit_chk, 0)) { + gbl->fom_out += dq_margin + dqs_margin; + TCLRPT_SET(debug_summary_report->fom_out, + debug_summary_report->fom_out + (dq_margin + dqs_margin)); + TCLRPT_SET(debug_cal_report->cal_status_per_group[curr_shadow_reg][grp].fom_out, + (dq_margin + dqs_margin)); - /*USE Set current end of the window */ - end_curr = d; - /* If a beginning edge of our window has not been seen - this is our current begin of the DM window */ - if (bgn_curr == IO_IO_OUT1_DELAY_MAX + 1) - bgn_curr = d; + scc_mgr_load_dqs_io(); + scc_mgr_load_dqs_for_write_group(write_group); - /* If current window is bigger than best seen. Set best - seen to be current window */ - if ((end_curr-bgn_curr+1) > win_best) { - win_best = end_curr-bgn_curr+1; - bgn_best = bgn_curr; - end_best = end_curr; - } - } else { - /* We just saw a failing test. Reset temp edge */ - recover_mem_device_after_ck_dqs_violation(); - bgn_curr = IO_IO_OUT1_DELAY_MAX + 1; - end_curr = IO_IO_OUT1_DELAY_MAX + 1; + //USER center dq - /* Early exit optimization: if ther remaining delay - chain space is less than already seen largest window - we can exit */ - if ((win_best - 1) > (IO_IO_OUT1_DELAY_MAX - new_dqs - d)) - break; + if (dq_margin > mid) { + for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) { + scc_mgr_set_dq_out1_delay(write_group, i, + READ_SCC_DQ_OUT1_DELAY(i) + dq_margin - mid); + scc_mgr_load_dq(p, i); } + dqs_margin += dq_margin - mid; + dq_margin -= dq_margin - mid; } + //USER do dm centering - /* assign left and right edge for cal and reporting; */ - left_edge[0] = -1*bgn_best; - right_edge[0] = end_best; + if (!RLDRAMX) { + dm_margin = IO_IO_OUT1_DELAY_MAX; + + if (QDRII) { + sticky_bit_chk = 0; + for (i = 0; i < RW_MGR_MEM_DATA_MASK_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + i++) { + max_working_dm[i] = 0; + } + } - pr_debug("dm_calib: left=%d right=%d\n", left_edge[0], right_edge[0]); + for (d = 1; d <= IO_IO_OUT1_DELAY_MAX; d++) { + scc_mgr_apply_group_dm_out1_delay(write_group, d); + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - /* Move DQS (back to orig) */ - scc_mgr_apply_group_dqs_io_and_oct_out1 (write_group, new_dqs); + if (DDRX) { + if (rw_mgr_mem_calibrate_write_test + (rank_bgn, write_group, 1, PASS_ALL_BITS, &bit_chk, 0)) { + max_working_dm[0] = d; + } else { + break; + } + } else { + stop = + !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1, + PASS_ALL_BITS, &bit_chk, 0); + sticky_bit_chk = sticky_bit_chk | bit_chk; + stop = stop && (sticky_bit_chk == param->read_correct_mask); - /* Move DM */ + if (stop == 1) { + break; + } else { + for (i = 0; + i < + RW_MGR_MEM_DATA_MASK_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { + if ((bit_chk & param->dm_correct_mask) == + param->dm_correct_mask) { + max_working_dm[i] = d; + } + bit_chk = + bit_chk >> (RW_MGR_MEM_DATA_WIDTH / + RW_MGR_MEM_DATA_MASK_WIDTH); + } + } + } + } - /* Find middle of window for the DM bit */ - mid = (left_edge[0] - right_edge[0]) / 2; + i = 0; + for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) { + if (max_working_dm[i] > mid) { + scc_mgr_set_dm_out1_delay(write_group, i, max_working_dm[i] - mid); + } else { + scc_mgr_set_dm_out1_delay(write_group, i, 0); + } - /* only move right, since we are not moving DQS/DQ */ - if (mid < 0) - mid = 0; + scc_mgr_load_dm(i); - /*dm_marign should fail if we never find a window */ - if (win_best == 0) { - dm_margin = -1; + if (max_working_dm[i] < dm_margin) { + dm_margin = max_working_dm[i]; + } + } } else { - dm_margin = left_edge[0] - mid; + dm_margin = 0; } - scc_mgr_apply_group_dm_out1_delay(write_group, mid); IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - pr_debug("dm_calib: left=%d right=%d mid=%d dm_margin=%d\n", - left_edge[0], right_edge[0], mid, dm_margin); + return (dq_margin + dqs_margin) > 0; +} - /* Export values */ - gbl->fom_out += dq_margin + dqs_margin; +#endif - pr_debug("write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n", - dq_margin, dqs_margin, dm_margin); +//USER calibrate the write operations - /* Do not remove this line as it makes sure all of our - decisions have been applied */ - IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); - return (dq_margin >= 0) && (dqs_margin >= 0) && (dm_margin >= 0); -} +static uint32_t rw_mgr_mem_calibrate_writes(uint32_t rank_bgn, uint32_t g, uint32_t test_bgn) +{ + //USER update info for sims -/* calibrate the write operations */ + TRACE_FUNC("%lu %lu", g, test_bgn); -static uint32_t rw_mgr_mem_calibrate_writes (uint32_t rank_bgn, uint32_t g, - uint32_t test_bgn) -{ reg_file_set_stage(CAL_STAGE_WRITES); reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER); + //USER starting phases + + //USER update info for sims + reg_file_set_group(g); - return rw_mgr_mem_calibrate_writes_center (rank_bgn, g, test_bgn); + if (!rw_mgr_mem_calibrate_writes_center(rank_bgn, g, test_bgn)) { + set_failing_group_stage(g, CAL_STAGE_WRITES, CAL_SUBSTAGE_WRITES_CENTER); + return 0; + } + + return 1; +} + +// helpful for creating eye diagrams +// TODO: This is for the TCL DBG... but obviously it serves no purpose... +// Decide what to do with it! + +static void rw_mgr_mem_calibrate_eye_diag_aid(void) +{ + // no longer exists +} + +// TODO: This needs to be update to properly handle the number of failures +// Right now it only checks if the write test was successful or not +static uint32_t rw_mgr_mem_calibrate_full_test(uint32_t min_correct, t_btfld * bit_chk, + uint32_t test_dm) +{ + uint32_t g; + uint32_t success = 0; + uint32_t run_groups = ~param->skip_groups; + + TRACE_FUNC("%lu %lu", min_correct, test_dm); + + for (g = 0; g < RW_MGR_MEM_IF_READ_DQS_WIDTH; g++) { + if (run_groups & ((1 << RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1)) { + success = + rw_mgr_mem_calibrate_write_test_all_ranks(g, test_dm, PASS_ALL_BITS, + bit_chk); + } + run_groups = run_groups >> RW_MGR_NUM_DQS_PER_WRITE_GROUP; + } + + return success; } -/* precharge all banks and activate row 0 in bank "000..." and bank "111..." */ -static void mem_precharge_and_activate (void) +//USER precharge all banks and activate row 0 in bank "000..." and bank "111..." +static void mem_precharge_and_activate(void) { uint32_t r; for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) { - /* set rank */ + if (param->skip_ranks[r]) { + //USER request to skip the rank + + continue; + } + //USER set rank set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF); - /* precharge all banks ... */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_PRECHARGE_ALL); + //USER precharge all banks ... + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_PRECHARGE_ALL); IOWR_32DIRECT(RW_MGR_LOAD_CNTR_0, 0, 0x0F); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, - __RW_MGR_ACTIVATE_0_AND_1_WAIT1); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_0, 0, __RW_MGR_ACTIVATE_0_AND_1_WAIT1); IOWR_32DIRECT(RW_MGR_LOAD_CNTR_1, 0, 0x0F); - IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, - __RW_MGR_ACTIVATE_0_AND_1_WAIT2); + IOWR_32DIRECT(RW_MGR_LOAD_JUMP_ADD_1, 0, __RW_MGR_ACTIVATE_0_AND_1_WAIT2); - /* activate rows */ - IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, - __RW_MGR_ACTIVATE_0_AND_1); + //USER activate rows + IOWR_32DIRECT(RW_MGR_RUN_SINGLE_GROUP, 0, __RW_MGR_ACTIVATE_0_AND_1); } } -/* perform all refreshes necessary over all ranks */ +//USER perform all refreshes necessary over all ranks -/* Configure various memory related parameters. */ -static void mem_config (void) +//USER Configure various memory related parameters. + +static void mem_config(void) { uint32_t rlat, wlat; uint32_t rw_wl_nop_cycles; uint32_t max_latency; - /* read in write and read latency */ + TRACE_FUNC(); - wlat = IORD_32DIRECT (MEM_T_WL_ADD, 0); - wlat += IORD_32DIRECT (DATA_MGR_MEM_T_ADD, 0); - /* WL for hard phy does not include additive latency */ + //USER read in write and read latency - /* - * YYONG: add addtional write latency to offset the address/command extra clock cycle - * YYONG: We change the AC mux setting causing AC to be delayed by one mem clock cycle - * YYONG: only do this for DDR3 - */ -#if DDR3 || DDR2 - wlat += 1; -#endif - rlat = IORD_32DIRECT (MEM_T_RL_ADD, 0); + wlat = IORD_32DIRECT(MEM_T_WL_ADD, 0); + wlat += IORD_32DIRECT(DATA_MGR_MEM_T_ADD, 0); /* WL for hard phy does not include additive latency */ + + // YYONG: add addtional write latency to offset the address/command extra clock cycle + // YYONG: We change the AC mux setting causing AC to be delayed by one mem clock cycle + // YYONG: only do this for DDR3 + wlat = wlat + 1; + + rlat = IORD_32DIRECT(MEM_T_RL_ADD, 0); if (QUARTER_RATE_MODE) { - /* In Quarter-Rate the WL-to-nop-cycles works like this */ - /* 0,1 -> 0 */ - /* 2,3,4,5 -> 1 */ - /* 6,7,8,9 -> 2 */ - /* etc... */ + //USER In Quarter-Rate the WL-to-nop-cycles works like this + //USER 0,1 -> 0 + //USER 2,3,4,5 -> 1 + //USER 6,7,8,9 -> 2 + //USER etc... rw_wl_nop_cycles = (wlat + 6) / 4 - 1; - } else if (HALF_RATE_MODE) { - /* In Half-Rate the WL-to-nop-cycles works like this */ - /* 0,1 -> -1 */ - /* 2,3 -> 0 */ - /* 4,5 -> 1 */ - /* etc... */ - if (wlat % 2) + } else if (HALF_RATE_MODE) { + //USER In Half-Rate the WL-to-nop-cycles works like this + //USER 0,1 -> -1 + //USER 2,3 -> 0 + //USER 4,5 -> 1 + //USER etc... + if (wlat % 2) { rw_wl_nop_cycles = ((wlat - 1) / 2) - 1; - else + } else { rw_wl_nop_cycles = (wlat / 2) - 1; + } } else { rw_wl_nop_cycles = wlat - 2; -#if LPDDR2 - rw_wl_nop_cycles = rw_wl_nop_cycles + 1; -#endif } -#if MULTIPLE_AFI_WLAT - for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { - gbl->rw_wl_nop_cycles_per_group[i] = rw_wl_nop_cycles; - } -#endif gbl->rw_wl_nop_cycles = rw_wl_nop_cycles; -#if ARRIAV || CYCLONEV - /* For AV/CV, lfifo is hardened and always runs at full rate so - max latency in AFI clocks, used here, is correspondingly smaller */ + //USER For AV/CV, lfifo is hardened and always runs at full rate + //USER so max latency in AFI clocks, used here, is correspondingly smaller if (QUARTER_RATE_MODE) { - max_latency = (1<curr_read_lat = (rlat + 1) / 4 + 8; } else if (HALF_RATE_MODE) { - /* write latency */ + //USER write latency wlat = (wlat - 1) / 2 + 1; - /* set a pretty high read latency initially */ + //USER set a pretty high read latency initially gbl->curr_read_lat = (rlat + 1) / 2 + 8; } else { - /* write latency */ - /* Adjust Write Latency for Hard PHY */ + //USER write latency + // Adjust Write Latency for Hard PHY wlat = wlat + 1; -#if LPDDR2 - /* Add another one in hard for LPDDR2 since this value is raw - from controller assume tdqss is one */ - wlat = wlat + 1; -#endif - /* set a pretty high read latency initially */ + //USER set a pretty high read latency initially gbl->curr_read_lat = rlat + 16; } - if (gbl->curr_read_lat > max_latency) + if (gbl->curr_read_lat > max_latency) { gbl->curr_read_lat = max_latency; - + } IOWR_32DIRECT(PHY_MGR_PHY_RLAT, 0, gbl->curr_read_lat); - /* advertise write latency */ + //USER advertise write latency gbl->curr_write_lat = wlat; -#if MULTIPLE_AFI_WLAT - for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { - IOWR_32DIRECT(PHY_MGR_AFI_WLAT, i*4, wlat - 2); - } -#else IOWR_32DIRECT(PHY_MGR_AFI_WLAT, 0, wlat - 2); -#endif - mem_precharge_and_activate (); + //USER initialize bit slips + + mem_precharge_and_activate(); +} + +//USER Set VFIFO and LFIFO to instant-on settings in skip calibration mode + +static void mem_skip_calibrate(void) +{ + uint32_t vfifo_offset; + uint32_t i, j, r; + + TRACE_FUNC(); + + // Need to update every shadow register set used by the interface + for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r += NUM_RANKS_PER_SHADOW_REG) { + + // Strictly speaking this should be called once per group to make + // sure each group's delay chains are refreshed from the SCC register file, + // but since we're resetting all delay chains anyway, we can save some + // runtime by calling select_shadow_regs_for_update just once to switch rank. + select_shadow_regs_for_update(r, 0, 1); + + //USER Set output phase alignment settings appropriate for skip calibration + for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { + + scc_mgr_set_dqs_en_phase(i, 0); + // Case:33398 + // + // Write data arrives to the I/O two cycles before write latency is reached (720 deg). + // -> due to bit-slip in a/c bus + // -> to allow board skew where dqs is longer than ck + // -> how often can this happen!? + // -> can claim back some ptaps for high freq support if we can relax this, but i digress... + // + // The write_clk leads mem_ck by 90 deg + // The minimum ptap of the OPA is 180 deg + // Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay + // The write_clk is always delayed by 2 ptaps + // + // Hence, to make DQS aligned to CK, we need to delay DQS by: + // (720 - 90 - 180 - 2 * (360 / IO_DLL_CHAIN_LENGTH)) + // + // Dividing the above by (360 / IO_DLL_CHAIN_LENGTH) gives us the number of ptaps, which simplies to: + // + // (1.25 * IO_DLL_CHAIN_LENGTH - 2) + scc_mgr_set_dqdqs_output_phase(i, (1.25 * IO_DLL_CHAIN_LENGTH - 2)); + } + + IOWR_32DIRECT(SCC_MGR_DQS_ENA, 0, 0xff); + IOWR_32DIRECT(SCC_MGR_DQS_IO_ENA, 0, 0xff); + + for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) { + IOWR_32DIRECT(SCC_MGR_GROUP_COUNTER, 0, i); + IOWR_32DIRECT(SCC_MGR_DQ_ENA, 0, 0xff); + IOWR_32DIRECT(SCC_MGR_DM_ENA, 0, 0xff); + } + + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + } + + // Compensate for simulation model behaviour + for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { + scc_mgr_set_dqs_bus_in_delay(i, 10); + scc_mgr_load_dqs(i); + } + IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); + + //ArriaV has hard FIFOs that can only be initialized by incrementing in sequencer + vfifo_offset = CALIB_VFIFO_OFFSET; + for (j = 0; j < vfifo_offset; j++) { + if (HARD_PHY) { + IOWR_32DIRECT(PHY_MGR_CMD_INC_VFIFO_HARD_PHY, 0, 0xff); + } else { + IOWR_32DIRECT(PHY_MGR_CMD_INC_VFIFO_FR, 0, 0xff); + } + } + + IOWR_32DIRECT(PHY_MGR_CMD_FIFO_RESET, 0, 0); + + // For ACV with hard lfifo, we get the skip-cal setting from generation-time constant + gbl->curr_read_lat = CALIB_LFIFO_OFFSET; + IOWR_32DIRECT(PHY_MGR_PHY_RLAT, 0, gbl->curr_read_lat); } -/* Memory calibration entry point */ +//USER Memory calibration entry point -static uint32_t mem_calibrate (void) +static uint32_t mem_calibrate(void) { uint32_t i; - uint32_t rank_bgn; + uint32_t rank_bgn, sr; uint32_t write_group, write_test_bgn; uint32_t read_group, read_test_bgn; uint32_t run_groups, current_run; + uint32_t failing_groups = 0; + uint32_t group_failed = 0; + uint32_t sr_failed = 0; + + TRACE_FUNC(); - /* Initialize the data settings */ - pr_debug("Preparing to init data\n"); - pr_debug("Init complete\n"); + // Initialize the data settings + DPRINT(1, "Preparing to init data"); + DPRINT(1, "Init complete"); gbl->error_substage = CAL_SUBSTAGE_NIL; gbl->error_stage = CAL_STAGE_NIL; @@ -3947,93 +5067,208 @@ static uint32_t mem_calibrate (void) gbl->fom_in = 0; gbl->fom_out = 0; - mem_config (); + TCLRPT_SET(debug_summary_report->cal_read_latency, 0); + TCLRPT_SET(debug_summary_report->cal_write_latency, 0); + + mem_config(); if (ARRIAV || CYCLONEV) { + uint32_t bypass_mode = (HARD_PHY) ? 0x1 : 0x0; for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) { IOWR_32DIRECT(SCC_MGR_GROUP_COUNTER, 0, i); - scc_set_bypass_mode(i); + scc_set_bypass_mode(i, bypass_mode); } } - /* Zero all delay chain/phase settings for all - groups and all shadow register sets */ - scc_mgr_zero_all (); + if (((DYNAMIC_CALIB_STEPS) & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) { + //USER Set VFIFO and LFIFO to instant-on settings in skip calibration mode - run_groups = ~0; + mem_skip_calibrate(); + } else { + for (i = 0; i < NUM_CALIB_REPEAT; i++) { - for (write_group = 0, write_test_bgn = 0; write_group - < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; write_group++, - write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { + //USER Zero all delay chain/phase settings for all groups and all shadow register sets + scc_mgr_zero_all(); - /* Mark the group as being attempted for calibration */ + run_groups = ~param->skip_groups; - current_run = run_groups & ((1 << RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); - run_groups = run_groups >> RW_MGR_NUM_DQS_PER_WRITE_GROUP; + for (write_group = 0, write_test_bgn = 0; + write_group < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; + write_group++, write_test_bgn += RW_MGR_MEM_DQ_PER_WRITE_DQS) { + // Initialized the group failure + group_failed = 0; - if (current_run == 0) - continue; + // Mark the group as being attempted for calibration - IOWR_32DIRECT(SCC_MGR_GROUP_COUNTER, 0, write_group); - scc_mgr_zero_group (write_group, write_test_bgn, 0); + BFM_GBL_SET(vfifo_idx, 0); + current_run = + run_groups & ((1 << RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1); + run_groups = run_groups >> RW_MGR_NUM_DQS_PER_WRITE_GROUP; - for (read_group = write_group * RW_MGR_NUM_DQS_PER_WRITE_GROUP, - read_test_bgn = 0; - read_group < (write_group + 1) * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - read_group++, read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) { + if (current_run == 0) { + continue; + } - /* Calibrate the VFIFO */ - if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_VFIFO)) { - if (!rw_mgr_mem_calibrate_vfifo(read_group, read_test_bgn)) - return 0; - } - } + IOWR_32DIRECT(SCC_MGR_GROUP_COUNTER, 0, write_group); + scc_mgr_zero_group(write_group, write_test_bgn, 0); + + for (read_group = + write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH, read_test_bgn = 0; + read_group < + (write_group + + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH && group_failed == 0; + read_group++, read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) { + + //USER Calibrate the VFIFO + if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_VFIFO)) { + if (!rw_mgr_mem_calibrate_vfifo + (read_group, read_test_bgn)) { + group_failed = 1; + + if (! + (gbl-> + phy_debug_mode_flags & + PHY_DEBUG_SWEEP_ALL_GROUPS)) { + return 0; + } + } + } + } - /* level writes (or align DK with CK for RLDRAMX) */ - if (!(ARRIAV || CYCLONEV)) { - if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_WLEVEL)) { - if (!rw_mgr_mem_calibrate_wlevel(write_group, write_test_bgn)) - return 0; - } - } + //USER level writes (or align DK with CK for RLDRAMX) + if (group_failed == 0) { + if ((DDRX || RLDRAMII) && !(ARRIAV || CYCLONEV)) { + if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_WLEVEL)) { + if (!rw_mgr_mem_calibrate_wlevel + (write_group, write_test_bgn)) { + group_failed = 1; + + if (! + (gbl-> + phy_debug_mode_flags & + PHY_DEBUG_SWEEP_ALL_GROUPS)) { + return 0; + } + } + } + } + } + //USER Calibrate the output side + if (group_failed == 0) { + for (rank_bgn = 0, sr = 0; + rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; + rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) { + sr_failed = 0; + if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES)) { + if ((STATIC_CALIB_STEPS) & + CALIB_SKIP_DELAY_SWEEPS) { + //USER not needed in quick mode! + } else { + //USER Determine if this set of ranks should be skipped entirely + if (!param->skip_shadow_regs[sr]) { + + //USER Select shadow register set + select_shadow_regs_for_update + (rank_bgn, write_group, + 1); + + if (!rw_mgr_mem_calibrate_writes(rank_bgn, write_group, write_test_bgn)) { + sr_failed = 1; + if (! + (gbl-> + phy_debug_mode_flags + & + PHY_DEBUG_SWEEP_ALL_GROUPS)) + { + return 0; + } + } + } + } + } + if (sr_failed == 0) { + TCLRPT_SET(debug_cal_report-> + cal_status_per_group[sr] + [write_group].error_stage, + CAL_STAGE_NIL); + } else { + group_failed = 1; + } + } + } - /* Calibrate the output side */ - for (rank_bgn = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS; - rank_bgn += NUM_RANKS_PER_SHADOW_REG) { - if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES)) { - if ((STATIC_CALIB_STEPS) & CALIB_SKIP_DELAY_SWEEPS) { - /* not needed in quick mode! */ - } else { - /* Determine if this set of - * ranks should be skipped - * entirely */ - if (!rw_mgr_mem_calibrate_writes(rank_bgn, write_group, write_test_bgn)) - return 0; + if (group_failed == 0) { + for (read_group = + write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH, read_test_bgn = 0; + read_group < + (write_group + + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH / + RW_MGR_MEM_IF_WRITE_DQS_WIDTH && group_failed == 0; + read_group++, read_test_bgn += + RW_MGR_MEM_DQ_PER_READ_DQS) { + + if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES)) { + if (!rw_mgr_mem_calibrate_vfifo_end + (read_group, read_test_bgn)) { + group_failed = 1; + + if (! + (gbl-> + phy_debug_mode_flags & + PHY_DEBUG_SWEEP_ALL_GROUPS)) { + return 0; + } + } + } + } + } + + if (group_failed == 0) { + +#if STATIC_IN_RTL_SIM +#else +#endif } + + if (group_failed != 0) { + failing_groups++; + } + } - } - for (read_group = write_group * RW_MGR_NUM_DQS_PER_WRITE_GROUP, - read_test_bgn = 0; - read_group < (write_group + 1) * RW_MGR_NUM_DQS_PER_WRITE_GROUP; - read_group++, read_test_bgn += RW_MGR_MEM_DQ_PER_READ_DQS) { - if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES)) { - if (!rw_mgr_mem_calibrate_vfifo_end(read_group, read_test_bgn)) - return 0; + // USER If there are any failing groups then report the failure + if (failing_groups != 0) { + return 0; + } + //USER Calibrate the LFIFO + if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) { + //USER If we're skipping groups as part of debug, don't calibrate LFIFO + if (param->skip_groups == 0) { + if (!rw_mgr_mem_calibrate_lfifo()) { + return 0; + } + } } } } - /* Calibrate the LFIFO */ - if (!((STATIC_CALIB_STEPS) & CALIB_SKIP_LFIFO)) { - /* If we're skipping groups as part of debug, - don't calibrate LFIFO */ - if (!rw_mgr_mem_calibrate_lfifo ()) - return 0; + TCLRPT_SET(debug_summary_report->cal_write_latency, IORD_32DIRECT(MEM_T_WL_ADD, 0)); + if (QUARTER_RATE == 1) { + // The read latency is in terms of AFI cycles so we multiply by 4 in quarter + // rate to get the memory cycles. + TCLRPT_SET(debug_summary_report->cal_read_latency, gbl->curr_read_lat * 4); + } else if (HALF_RATE == 1) { + // The read latency is in terms of AFI cycles so we multiply by 2 in half + // rate to get the memory cycles. + TCLRPT_SET(debug_summary_report->cal_read_latency, gbl->curr_read_lat * 2); + } else { + TCLRPT_SET(debug_summary_report->cal_read_latency, gbl->curr_read_lat); } - /* Do not remove this line as it makes sure all of our decisions - have been applied */ + //USER Do not remove this line as it makes sure all of our decisions have been applied IOWR_32DIRECT(SCC_MGR_UPD, 0, 0); return 1; } @@ -4043,32 +5278,59 @@ static uint32_t run_mem_calibrate(void) uint32_t pass; uint32_t debug_info; - /* Initialize the debug status to show that calibration has started. */ - /* This should occur before anything else */ - /* Reset pass/fail status shown on afi_cal_success/fail */ + // Initialize the debug status to show that calibration has started. + // This should occur before anything else + // Reset pass/fail status shown on afi_cal_success/fail IOWR_32DIRECT(PHY_MGR_CAL_STATUS, 0, PHY_MGR_CAL_RESET); + TRACE_FUNC(); + + BFM_STAGE("calibrate"); + //stop tracking manger + uint32_t ctrlcfg = IORD_32DIRECT(CTRL_CONFIG_REG, 0); + + IOWR_32DIRECT(CTRL_CONFIG_REG, 0, ctrlcfg & 0xFFBFFFFF); + initialize(); - rw_mgr_mem_initialize (); - pass = mem_calibrate (); - mem_precharge_and_activate (); + + rw_mgr_mem_initialize(); + + pass = mem_calibrate(); + + mem_precharge_and_activate(); + + //pe_checkout_pattern(); IOWR_32DIRECT(PHY_MGR_CMD_FIFO_RESET, 0, 0); - /* Handoff */ + if (pass) { + TCLRPT_SET(debug_summary_report->error_stage, CAL_STAGE_NIL); + + BFM_STAGE("handoff"); + +#ifdef TEST_SIZE + if (!check_test_mem(0)) { + gbl->error_stage = 0x92; + gbl->error_group = 0x92; + } +#endif + } + + //USER Handoff - /* Don't return control of the PHY back to AFI when in debug mode */ + //USER Don't return control of the PHY back to AFI when in debug mode if ((gbl->phy_debug_mode_flags & PHY_DEBUG_IN_DEBUG_MODE) == 0) { - rw_mgr_mem_handoff (); + rw_mgr_mem_handoff(); - /* In Hard PHY this is a 2-bit control: */ - /* 0: AFI Mux Select */ - /* 1: DDIO Mux Select */ + // In Hard PHY this is a 2-bit control: + // 0: AFI Mux Select + // 1: DDIO Mux Select IOWR_32DIRECT(PHY_MGR_MUX_SEL, 0, 0x2); } + IOWR_32DIRECT(CTRL_CONFIG_REG, 0, ctrlcfg); if (pass) { - pr_debug("CALIBRATION PASSED\n"); + IPRINT("CALIBRATION PASSED"); gbl->fom_in /= 2; gbl->fom_out /= 2; @@ -4081,7 +5343,7 @@ static uint32_t run_mem_calibrate(void) gbl->fom_out = 0xff; } - /* Update the FOM in the register file */ + // Update the FOM in the register file debug_info = gbl->fom_in; debug_info |= gbl->fom_out << 8; IOWR_32DIRECT(REG_FILE_FOM, 0, debug_info); @@ -4090,32 +5352,40 @@ static uint32_t run_mem_calibrate(void) IOWR_32DIRECT(PHY_MGR_CAL_STATUS, 0, PHY_MGR_CAL_SUCCESS); } else { - pr_debug("CALIBRATION FAILED\n"); + + IPRINT("CALIBRATION FAILED"); debug_info = gbl->error_stage; debug_info |= gbl->error_substage << 8; debug_info |= gbl->error_group << 16; - IOWR_32DIRECT(REG_FILE_FAILING_STAGE, 0, debug_info); IOWR_32DIRECT(PHY_MGR_CAL_DEBUG_INFO, 0, debug_info); IOWR_32DIRECT(PHY_MGR_CAL_STATUS, 0, PHY_MGR_CAL_FAIL); - /* Update the failing group/stage in the register file */ + // Update the failing group/stage in the register file debug_info = gbl->error_stage; debug_info |= gbl->error_substage << 8; debug_info |= gbl->error_group << 16; IOWR_32DIRECT(REG_FILE_FAILING_STAGE, 0, debug_info); + } - /* Set the debug status to show that calibration has ended. */ - /* This should occur after everything else */ + // Mark the reports as being ready to read + TCLRPT_SET(debug_summary_report->report_flags, debug_summary_report->report_flags |= + DEBUG_REPORT_STATUS_REPORT_READY); + TCLRPT_SET(debug_cal_report->report_flags, debug_cal_report->report_flags |= + DEBUG_REPORT_STATUS_REPORT_READY); + TCLRPT_SET(debug_margin_report->report_flags, debug_margin_report->report_flags |= + DEBUG_REPORT_STATUS_REPORT_READY); + + // Set the debug status to show that calibration has ended. + // This should occur after everything else return pass; } -static void hc_initialize_rom_data(const uint32_t *inst_rom_init, uint32_t inst_rom_init_size, - const uint32_t *ac_rom_init, uint32_t ac_rom_init_size) +static void hc_initialize_rom_data(void) { uint32_t i; @@ -4132,7 +5402,7 @@ static void hc_initialize_rom_data(const uint32_t *inst_rom_init, uint32_t inst_ static void initialize_reg_file(void) { - /* Initialize the register file with the correct data */ + // Initialize the register file with the correct data IOWR_32DIRECT(REG_FILE_SIGNATURE, 0, REG_FILE_INIT_SEQ_SIGNATURE); IOWR_32DIRECT(REG_FILE_DEBUG_DATA_ADDR, 0, 0); IOWR_32DIRECT(REG_FILE_CUR_STAGE, 0, 0); @@ -4144,59 +5414,45 @@ static void initialize_reg_file(void) static void initialize_hps_phy(void) { - /* These may need to be included also: */ - /* wrap_back_en (false) */ - /* atpg_en (false) */ - /* pipelineglobalenable (true) */ + // These may need to be included also: + // wrap_back_en (false) + // atpg_en (false) + // pipelineglobalenable (true) uint32_t reg; - /* Tracking also gets configured here because it's in the - same register */ + // Tracking also gets configured here because it's in the same register uint32_t trk_sample_count = 7500; - uint32_t trk_long_idle_sample_count = (10 << 16) | 100; - /* Format is number of outer loops in the 16 MSB, sample - count in 16 LSB. */ + uint32_t trk_long_idle_sample_count = (10 << 16) | 100; // Format is number of outer loops in the 16 MSB, sample count in 16 LSB. reg = 0; -#if DDR3 || DDR2 reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2); -#else - reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(1); -#endif reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1); reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1); reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1); reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0); -#if LPDDR2 - reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(0); -#else reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1); -#endif - /* Fix for long latency VFIFO */ - /* This field selects the intrinsic latency to RDATA_EN/FULL path. - 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. */ + // Fix for long latency VFIFO + // This field selects the intrinsic latency to RDATA_EN/FULL path. 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles. reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0); - reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET( - trk_sample_count); + reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(trk_sample_count); IOWR_32DIRECT(BASE_MMR, SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_OFFSET, reg); reg = 0; - reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET( - trk_sample_count >> - SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); - reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET( - trk_long_idle_sample_count); + reg |= + SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(trk_sample_count >> + SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH); + reg |= + SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(trk_long_idle_sample_count); IOWR_32DIRECT(BASE_MMR, SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_OFFSET, reg); reg = 0; - reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET( - trk_long_idle_sample_count >> - SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); + reg |= + SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(trk_long_idle_sample_count + >> + SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH); IOWR_32DIRECT(BASE_MMR, SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_OFFSET, reg); } -#if USE_DQS_TRACKING - static void initialize_tracking(void) { uint32_t concatenated_longidle = 0x0; @@ -4206,8 +5462,7 @@ static void initialize_tracking(void) uint32_t dtaps_per_ptap; uint32_t tmp_delay; - /* compute usable version of value in case we skip full - computation later */ + // compute usable version of value in case we skip full computation later dtaps_per_ptap = 0; tmp_delay = 0; while (tmp_delay < IO_DELAY_PER_OPA_TAP) { @@ -4216,25 +5471,18 @@ static void initialize_tracking(void) } dtaps_per_ptap--; - concatenated_longidle = concatenated_longidle ^ 10; - /*longidle outer loop */ + concatenated_longidle = concatenated_longidle ^ 10; //longidle outer loop concatenated_longidle = concatenated_longidle << 16; - concatenated_longidle = concatenated_longidle ^ 100; - /*longidle sample count */ + concatenated_longidle = concatenated_longidle ^ 100; //longidle sample count - concatenated_delays = concatenated_delays ^ 243; - /* trfc, worst case of 933Mhz 4Gb */ + concatenated_delays = concatenated_delays ^ 243; // trfc, worst case of 933Mhz 4Gb concatenated_delays = concatenated_delays << 8; - concatenated_delays = concatenated_delays ^ 14; - /* trcd, worst case */ + concatenated_delays = concatenated_delays ^ 14; // trcd, worst case concatenated_delays = concatenated_delays << 8; - concatenated_delays = concatenated_delays ^ 10; - /* vfifo wait */ + concatenated_delays = concatenated_delays ^ 10; // vfifo wait concatenated_delays = concatenated_delays << 8; - concatenated_delays = concatenated_delays ^ 4; - /* mux delay */ + concatenated_delays = concatenated_delays ^ 4; // mux delay -#if DDR3 || LPDDR2 concatenated_rw_addr = concatenated_rw_addr ^ __RW_MGR_IDLE; concatenated_rw_addr = concatenated_rw_addr << 8; concatenated_rw_addr = concatenated_rw_addr ^ __RW_MGR_ACTIVATE_1; @@ -4242,31 +5490,39 @@ static void initialize_tracking(void) concatenated_rw_addr = concatenated_rw_addr ^ __RW_MGR_SGLE_READ; concatenated_rw_addr = concatenated_rw_addr << 8; concatenated_rw_addr = concatenated_rw_addr ^ __RW_MGR_PRECHARGE_ALL; -#endif -#if DDR3 || LPDDR2 concatenated_refresh = concatenated_refresh ^ __RW_MGR_REFRESH_ALL; -#else - concatenated_refresh = concatenated_refresh ^ 0; -#endif concatenated_refresh = concatenated_refresh << 24; - concatenated_refresh = concatenated_refresh ^ 1000; /* trefi */ + concatenated_refresh = concatenated_refresh ^ 1000; // trefi - /* Initialize the register file with the correct data */ + // Initialize the register file with the correct data IOWR_32DIRECT(REG_FILE_DTAPS_PER_PTAP, 0, dtaps_per_ptap); IOWR_32DIRECT(REG_FILE_TRK_SAMPLE_COUNT, 0, 7500); IOWR_32DIRECT(REG_FILE_TRK_LONGIDLE, 0, concatenated_longidle); IOWR_32DIRECT(REG_FILE_DELAYS, 0, concatenated_delays); IOWR_32DIRECT(REG_FILE_TRK_RW_MGR_ADDR, 0, concatenated_rw_addr); - IOWR_32DIRECT(REG_FILE_TRK_READ_DQS_WIDTH, 0, - RW_MGR_MEM_IF_READ_DQS_WIDTH); + IOWR_32DIRECT(REG_FILE_TRK_READ_DQS_WIDTH, 0, RW_MGR_MEM_IF_READ_DQS_WIDTH); IOWR_32DIRECT(REG_FILE_TRK_RFSH, 0, concatenated_refresh); } -#endif /* USE_DQS_TRACKING */ +static void user_init_cal_req(void) +{ + uint32_t scc_afi_reg; + + scc_afi_reg = IORD_32DIRECT(SCC_MGR_AFI_CAL_INIT, 0); + + if (scc_afi_reg == 1) { // 1 is initialization request + initialize(); + rw_mgr_mem_initialize(); + rw_mgr_mem_handoff(); + IOWR_32DIRECT(PHY_MGR_MUX_SEL, 0, 0); + IOWR_32DIRECT(PHY_MGR_CAL_STATUS, 0, PHY_MGR_CAL_SUCCESS); + } else if (scc_afi_reg == 2) { + run_mem_calibrate(); + } +} -static int socfpga_sdram_calibration(const uint32_t *inst_rom_init, uint32_t inst_rom_init_size, - const uint32_t *ac_rom_init, uint32_t ac_rom_init_size) +static int socfpga_mem_calibration(void) { param_t my_param; gbl_t my_gbl; @@ -4276,93 +5532,102 @@ static int socfpga_sdram_calibration(const uint32_t *inst_rom_init, uint32_t ins param = &my_param; gbl = &my_gbl; - /* Initialize the debug mode flags */ + // Initialize the debug mode flags gbl->phy_debug_mode_flags = 0; - /* Set the calibration enabled by default */ + // Set the calibration enabled by default gbl->phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT; + // Only enable margining by default if requested + // Only sweep all groups (regardless of fail state) by default if requested + //Set enabled read test by default - /* Initialize the register file */ + // Initialize the register file initialize_reg_file(); - /* Initialize any PHY CSR */ + // Initialize any PHY CSR initialize_hps_phy(); scc_mgr_initialize(); -#if USE_DQS_TRACKING initialize_tracking(); -#endif - /* Enable all ranks, groups */ - for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++) - param->skip_ranks[i] = 0; + // Initialize the TCL report. This must occur before any printf + // but after the debug mode flags and register file - for (i = 0; i < NUM_SHADOW_REGS; ++i) + // USER Enable all ranks, groups + for (i = 0; i < RW_MGR_MEM_NUMBER_OF_RANKS; i++) { + param->skip_ranks[i] = 0; + } + for (i = 0; i < NUM_SHADOW_REGS; ++i) { param->skip_shadow_regs[i] = 0; - + } param->skip_groups = 0; - pr_debug("Preparing to start memory calibration\n"); - - pr_debug("%s%s %s ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u " - "dqs=%u,%u dq=%u dm=%u " - "ptap_delay=%u dtap_delay=%u dtap_dqsen_delay=%u, dll=%u\n", - RDIMM ? "r" : (LRDIMM ? "l" : ""), - DDR2 ? "DDR2" : (DDR3 ? "DDR3" : (QDRII ? "QDRII" : (RLDRAMII ? - "RLDRAMII" : (RLDRAM3 ? "RLDRAM3" : "??PROTO??")))), - FULL_RATE ? "FR" : (HALF_RATE ? "HR" : (QUARTER_RATE ? - "QR" : "??RATE??")), - RW_MGR_MEM_NUMBER_OF_RANKS, - RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, - RW_MGR_MEM_DQ_PER_READ_DQS, - RW_MGR_MEM_DQ_PER_WRITE_DQS, - RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, - RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS, - RW_MGR_MEM_IF_READ_DQS_WIDTH, - RW_MGR_MEM_IF_WRITE_DQS_WIDTH, - RW_MGR_MEM_DATA_WIDTH, - RW_MGR_MEM_DATA_MASK_WIDTH, - IO_DELAY_PER_OPA_TAP, - IO_DELAY_PER_DCHAIN_TAP, - IO_DELAY_PER_DQS_EN_DCHAIN_TAP, - IO_DLL_CHAIN_LENGTH); - pr_debug("max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u " - "io_in_d=%u io_out1_d=%u io_out2_d=%u" - "dqs_in_reserve=%u dqs_out_reserve=%u\n", - IO_DQS_EN_PHASE_MAX, - IO_DQDQS_OUT_PHASE_MAX, - IO_DQS_EN_DELAY_MAX, - IO_DQS_IN_DELAY_MAX, - IO_IO_IN_DELAY_MAX, - IO_IO_OUT1_DELAY_MAX, - IO_IO_OUT2_DELAY_MAX, - IO_DQS_IN_RESERVE, - IO_DQS_OUT_RESERVE); - - hc_initialize_rom_data(inst_rom_init, inst_rom_init_size, - ac_rom_init, ac_rom_init_size); - - /* update info for sims */ + IPRINT("Preparing to start memory calibration"); + + TRACE_FUNC(); + DPRINT(1, + "%s%s %s ranks=%lu cs/dimm=%lu dq/dqs=%lu,%lu vg/dqs=%lu,%lu dqs=%lu,%lu dq=%lu dm=%lu " + "ptap_delay=%lu dtap_delay=%lu dtap_dqsen_delay=%lu, dll=%lu", + RDIMM ? "r" : (LRDIMM ? "l" : ""), + DDR2 ? "DDR2" : (DDR3 ? "DDR3" + : (QDRII ? "QDRII" + : (RLDRAMII ? "RLDRAMII" + : (RLDRAM3 ? "RLDRAM3" : "??PROTO??")))), + FULL_RATE ? "FR" : (HALF_RATE ? "HR" : (QUARTER_RATE ? "QR" : "??RATE??")), + (long unsigned int)RW_MGR_MEM_NUMBER_OF_RANKS, + (long unsigned int)RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM, + (long unsigned int)RW_MGR_MEM_DQ_PER_READ_DQS, + (long unsigned int)RW_MGR_MEM_DQ_PER_WRITE_DQS, + (long unsigned int)RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS, + (long unsigned int)RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS, + (long unsigned int)RW_MGR_MEM_IF_READ_DQS_WIDTH, + (long unsigned int)RW_MGR_MEM_IF_WRITE_DQS_WIDTH, + (long unsigned int)RW_MGR_MEM_DATA_WIDTH, + (long unsigned int)RW_MGR_MEM_DATA_MASK_WIDTH, + (long unsigned int)IO_DELAY_PER_OPA_TAP, (long unsigned int)IO_DELAY_PER_DCHAIN_TAP, + (long unsigned int)IO_DELAY_PER_DQS_EN_DCHAIN_TAP, + (long unsigned int)IO_DLL_CHAIN_LENGTH); + DPRINT(1, + "max values: en_p=%lu dqdqs_p=%lu en_d=%lu dqs_in_d=%lu io_in_d=%lu io_out1_d=%lu io_out2_d=%lu" + "dqs_in_reserve=%lu dqs_out_reserve=%lu", (long unsigned int)IO_DQS_EN_PHASE_MAX, + (long unsigned int)IO_DQDQS_OUT_PHASE_MAX, (long unsigned int)IO_DQS_EN_DELAY_MAX, + (long unsigned int)IO_DQS_IN_DELAY_MAX, (long unsigned int)IO_IO_IN_DELAY_MAX, + (long unsigned int)IO_IO_OUT1_DELAY_MAX, (long unsigned int)IO_IO_OUT2_DELAY_MAX, + (long unsigned int)IO_DQS_IN_RESERVE, (long unsigned int)IO_DQS_OUT_RESERVE); + + hc_initialize_rom_data(); + + //USER update info for sims reg_file_set_stage(CAL_STAGE_NIL); reg_file_set_group(0); - /* Load global needed for those actions that require */ - /* some dynamic calibration support */ + // Load global needed for those actions that require + // some dynamic calibration support dyn_calib_steps = STATIC_CALIB_STEPS; - /* Load global to allow dynamic selection of delay loop settings */ - /* based on calibration mode */ - if (!(dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS)) { + // Load global to allow dynamic selection of delay loop settings + // based on calibration mode + if (!((DYNAMIC_CALIB_STEPS) & CALIB_SKIP_DELAY_LOOPS)) { skip_delay_mask = 0xff; } else { skip_delay_mask = 0x0; } - pass = run_mem_calibrate (); +#ifdef TEST_SIZE + if (!check_test_mem(1)) { + IOWR_32DIRECT(PHY_MGR_CAL_DEBUG_INFO, 0, 0x9090); + IOWR_32DIRECT(PHY_MGR_CAL_STATUS, 0, PHY_MGR_CAL_FAIL); + } + write_test_mem(); + if (!check_test_mem(0)) { + IOWR_32DIRECT(PHY_MGR_CAL_DEBUG_INFO, 0, 0x9191); + IOWR_32DIRECT(PHY_MGR_CAL_STATUS, 0, PHY_MGR_CAL_FAIL); + } +#endif + + pass = run_mem_calibrate(); - pr_debug("Calibration complete\n"); - /* Send the end of transmission character */ - pr_debug("%c\n", 0x4); + // EMPTY - return pass == 0 ? -EINVAL : 0; + return pass; } diff --git a/arch/arm/mach-socfpga/include/mach/sequencer.h b/arch/arm/mach-socfpga/include/mach/sequencer.h index 7c9157a257..8676b4efdd 100644 --- a/arch/arm/mach-socfpga/include/mach/sequencer.h +++ b/arch/arm/mach-socfpga/include/mach/sequencer.h @@ -2,70 +2,38 @@ #define _SEQUENCER_H_ /* - * Copyright Altera Corporation (C) 2012-2014. All rights reserved - * - * SPDX-License-Identifier: BSD-3-Clause - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Altera Corporation nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define MRS_MIRROR_PING_PONG_ATSO 0 -#define DYNAMIC_CALIBRATION_MODE 0 -#define STATIC_QUICK_CALIBRATION 0 -#define DISABLE_GUARANTEED_READ 0 -#define STATIC_SKIP_CALIBRATION 0 - -#if ENABLE_ASSERT -#define ERR_IE_TEXT "Internal Error: Sub-system: %s, File: %s, Line: %d\n%s%s" - -#define ALTERA_INTERNAL_ERROR(string) \ - {err_report_internal_error(string, "SEQ", __FILE__, __LINE__); \ - exit(-1); } - -#define ALTERA_ASSERT(condition) \ - if (!(condition)) {\ - ALTERA_INTERNAL_ERROR(#condition); } -#define ALTERA_INFO_ASSERT(condition, text) \ - if (!(condition)) {\ - ALTERA_INTERNAL_ERROR(text); } - -#else +* Copyright Altera Corporation (C) 2012-2014. All rights reserved +* +* SPDX-License-Identifier: BSD-3-Clause +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Altera Corporation nor the +* names of its contributors may be used to endorse or promote products +* derived from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ #define ALTERA_ASSERT(condition) -#define ALTERA_INFO_ASSERT(condition, text) - -#endif +#define ALTERA_INFO_ASSERT(condition,text) - -#if RLDRAMII -#define RW_MGR_NUM_DM_PER_WRITE_GROUP (1) -#define RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP (1) -#else -#define RW_MGR_NUM_DM_PER_WRITE_GROUP (RW_MGR_MEM_DATA_MASK_WIDTH \ - / RW_MGR_MEM_IF_WRITE_DQS_WIDTH) -#define RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP (RW_MGR_TRUE_MEM_DATA_MASK_WIDTH \ - / RW_MGR_MEM_IF_WRITE_DQS_WIDTH) -#endif +#define RW_MGR_NUM_DM_PER_WRITE_GROUP (RW_MGR_MEM_DATA_MASK_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH) +#define RW_MGR_NUM_TRUE_DM_PER_WRITE_GROUP (RW_MGR_TRUE_MEM_DATA_MASK_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH) #define RW_MGR_NUM_DQS_PER_WRITE_GROUP (RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH) #define NUM_RANKS_PER_SHADOW_REG (RW_MGR_MEM_NUMBER_OF_RANKS / NUM_SHADOW_REGS) @@ -75,11 +43,9 @@ #define RW_MGR_DI_BASE (BASE_RW_MGR + 0x0020) -#if DDR3 #define DDR3_MR1_ODT_MASK 0xFFFFFD99 #define DDR3_MR2_ODT_MASK 0xFFFFF9FF #define DDR3_AC_MIRR_MASK 0x020A8 -#endif /* DDR3 */ #define RW_MGR_LOAD_CNTR_0 BASE_RW_MGR + 0x0800 #define RW_MGR_LOAD_CNTR_1 BASE_RW_MGR + 0x0804 @@ -142,10 +108,7 @@ #define CAL_SUBSTAGE_REFRESH 1 #define MAX_RANKS (RW_MGR_MEM_NUMBER_OF_RANKS) -#define MAX_DQS (RW_MGR_MEM_IF_WRITE_DQS_WIDTH > \ - RW_MGR_MEM_IF_READ_DQS_WIDTH ? \ - RW_MGR_MEM_IF_WRITE_DQS_WIDTH : \ - RW_MGR_MEM_IF_READ_DQS_WIDTH) +#define MAX_DQS (RW_MGR_MEM_IF_WRITE_DQS_WIDTH > RW_MGR_MEM_IF_READ_DQS_WIDTH ? RW_MGR_MEM_IF_WRITE_DQS_WIDTH : RW_MGR_MEM_IF_READ_DQS_WIDTH) #define MAX_DQ (RW_MGR_MEM_DATA_WIDTH) #define MAX_DM (RW_MGR_MEM_DATA_MASK_WIDTH) @@ -158,19 +121,22 @@ * - The remaining words are part of the transfer. */ -#define BASE_PTR_MGR SEQUENCER_PTR_MGR_INST_BASE -#define BASE_PHY_MGR SDR_PHYGRP_PHYMGRGRP_ADDRESS -#define BASE_RW_MGR SDR_PHYGRP_RWMGRGRP_ADDRESS -#define BASE_DATA_MGR SDR_PHYGRP_DATAMGRGRP_ADDRESS -#define BASE_SCC_MGR SDR_PHYGRP_SCCGRP_ADDRESS -#define BASE_REG_FILE SDR_PHYGRP_REGFILEGRP_ADDRESS -#define BASE_TIMER SEQUENCER_TIMER_INST_BASE -#define BASE_MMR SDR_CTRLGRP_ADDRESS -#define BASE_TRK_MGR (0x000D0000) +/* Define the base address of each manager. */ + +/* MarkW: how should these base addresses be done for A-V? */ +#define BASE_PTR_MGR SEQUENCER_PTR_MGR_INST_BASE +#define BASE_PHY_MGR (0x00088000) +#define BASE_RW_MGR (0x00090000) +#define BASE_DATA_MGR (0x00098000) +#define BASE_SCC_MGR SEQUENCER_SCC_MGR_INST_BASE +#define BASE_REG_FILE SEQUENCER_REG_FILE_INST_BASE +#define BASE_TIMER SEQUENCER_TIMER_INST_BASE +#define BASE_MMR (0x000C0000) +#define BASE_TRK_MGR (0x000D0000) /* Register file addresses. */ -#define REG_FILE_SIGNATURE (BASE_REG_FILE + 0x0000) -#define REG_FILE_DEBUG_DATA_ADDR (BASE_REG_FILE + 0x0004) +#define REG_FILE_SIGNATURE (BASE_REG_FILE + 0x0000) +#define REG_FILE_DEBUG_DATA_ADDR (BASE_REG_FILE + 0x0004) #define REG_FILE_CUR_STAGE (BASE_REG_FILE + 0x0008) #define REG_FILE_FOM (BASE_REG_FILE + 0x000C) #define REG_FILE_FAILING_STAGE (BASE_REG_FILE + 0x0010) @@ -184,37 +150,28 @@ #define REG_FILE_TRK_RW_MGR_ADDR (BASE_REG_FILE + 0x002C) #define REG_FILE_TRK_READ_DQS_WIDTH (BASE_REG_FILE + 0x0030) #define REG_FILE_TRK_RFSH (BASE_REG_FILE + 0x0034) +#define CTRL_CONFIG_REG (BASE_MMR + 0x0000) /* PHY manager configuration registers. */ -#define PHY_MGR_PHY_RLAT (BASE_PHY_MGR + 0x40 + 0x00) -#define PHY_MGR_RESET_MEM_STBL (BASE_PHY_MGR + 0x40 + 0x04) -#define PHY_MGR_MUX_SEL (BASE_PHY_MGR + 0x40 + 0x08) -#define PHY_MGR_CAL_STATUS (BASE_PHY_MGR + 0x40 + 0x0c) -#define PHY_MGR_CAL_DEBUG_INFO (BASE_PHY_MGR + 0x40 + 0x10) -#define PHY_MGR_VFIFO_RD_EN_OVRD (BASE_PHY_MGR + 0x40 + 0x14) -#if CALIBRATE_BIT_SLIPS -#define PHY_MGR_FR_SHIFT (BASE_PHY_MGR + 0x40 + 0x20) -#if MULTIPLE_AFI_WLAT -#define PHY_MGR_AFI_WLAT (BASE_PHY_MGR + 0x40 + 0x20 + 4 * \ - RW_MGR_MEM_IF_WRITE_DQS_WIDTH) -#else -#define PHY_MGR_AFI_WLAT (BASE_PHY_MGR + 0x40 + 0x18) -#endif -#else -#define PHY_MGR_AFI_WLAT (BASE_PHY_MGR + 0x40 + 0x18) -#endif -#define PHY_MGR_AFI_RLAT (BASE_PHY_MGR + 0x40 + 0x1c) +#define PHY_MGR_PHY_RLAT (BASE_PHY_MGR + 0x4000) +#define PHY_MGR_RESET_MEM_STBL (BASE_PHY_MGR + 0x4004) +#define PHY_MGR_MUX_SEL (BASE_PHY_MGR + 0x4008) +#define PHY_MGR_CAL_STATUS (BASE_PHY_MGR + 0x400c) +#define PHY_MGR_CAL_DEBUG_INFO (BASE_PHY_MGR + 0x4010) +#define PHY_MGR_VFIFO_RD_EN_OVRD (BASE_PHY_MGR + 0x4014) +#define PHY_MGR_AFI_WLAT (BASE_PHY_MGR + 0x4018) +#define PHY_MGR_AFI_RLAT (BASE_PHY_MGR + 0x401c) -#define PHY_MGR_CAL_RESET (0) +#define PHY_MGR_CAL_RESET (0) #define PHY_MGR_CAL_SUCCESS (1) -#define PHY_MGR_CAL_FAIL (2) +#define PHY_MGR_CAL_FAIL (2) /* PHY manager command addresses. */ #define PHY_MGR_CMD_INC_VFIFO_FR (BASE_PHY_MGR + 0x0000) #define PHY_MGR_CMD_INC_VFIFO_HR (BASE_PHY_MGR + 0x0004) -#define PHY_MGR_CMD_INC_VFIFO_HARD_PHY (BASE_PHY_MGR + 0x0004) +#define PHY_MGR_CMD_INC_VFIFO_HARD_PHY (BASE_PHY_MGR + 0x0004) #define PHY_MGR_CMD_FIFO_RESET (BASE_PHY_MGR + 0x0008) #define PHY_MGR_CMD_INC_VFIFO_FR_HR (BASE_PHY_MGR + 0x000C) #define PHY_MGR_CMD_INC_VFIFO_QR (BASE_PHY_MGR + 0x0010) @@ -227,25 +184,25 @@ #define PHY_MGR_CALIB_SKIP_STEPS (BASE_PHY_MGR + 0x000c) #define PHY_MGR_CALIB_VFIFO_OFFSET (BASE_PHY_MGR + 0x0010) #define PHY_MGR_CALIB_LFIFO_OFFSET (BASE_PHY_MGR + 0x0014) -#define PHY_MGR_RDIMM (BASE_PHY_MGR + 0x0018) -#define PHY_MGR_MEM_T_WL (BASE_PHY_MGR + 0x001c) -#define PHY_MGR_MEM_T_RL (BASE_PHY_MGR + 0x0020) +#define PHY_MGR_RDIMM (BASE_PHY_MGR + 0x0018) +#define PHY_MGR_MEM_T_WL (BASE_PHY_MGR + 0x001c) +#define PHY_MGR_MEM_T_RL (BASE_PHY_MGR + 0x0020) /* Data Manager */ -#define DATA_MGR_DRAM_CFG (BASE_DATA_MGR + 0x0000) -#define DATA_MGR_MEM_T_WL (BASE_DATA_MGR + 0x0004) -#define DATA_MGR_MEM_T_ADD (BASE_DATA_MGR + 0x0008) -#define DATA_MGR_MEM_T_RL (BASE_DATA_MGR + 0x000C) -#define DATA_MGR_MEM_T_RFC (BASE_DATA_MGR + 0x0010) -#define DATA_MGR_MEM_T_REFI (BASE_DATA_MGR + 0x0014) -#define DATA_MGR_MEM_T_WR (BASE_DATA_MGR + 0x0018) -#define DATA_MGR_MEM_T_MRD (BASE_DATA_MGR + 0x001C) -#define DATA_MGR_COL_WIDTH (BASE_DATA_MGR + 0x0020) -#define DATA_MGR_ROW_WIDTH (BASE_DATA_MGR + 0x0024) -#define DATA_MGR_BANK_WIDTH (BASE_DATA_MGR + 0x0028) -#define DATA_MGR_CS_WIDTH (BASE_DATA_MGR + 0x002C) -#define DATA_MGR_ITF_WIDTH (BASE_DATA_MGR + 0x0030) -#define DATA_MGR_DVC_WIDTH (BASE_DATA_MGR + 0x0034) +#define DATA_MGR_DRAM_CFG (BASE_DATA_MGR + 0x0000) +#define DATA_MGR_MEM_T_WL (BASE_DATA_MGR + 0x0004) +#define DATA_MGR_MEM_T_ADD (BASE_DATA_MGR + 0x0008) +#define DATA_MGR_MEM_T_RL (BASE_DATA_MGR + 0x000C) +#define DATA_MGR_MEM_T_RFC (BASE_DATA_MGR + 0x0010) +#define DATA_MGR_MEM_T_REFI (BASE_DATA_MGR + 0x0014) +#define DATA_MGR_MEM_T_WR (BASE_DATA_MGR + 0x0018) +#define DATA_MGR_MEM_T_MRD (BASE_DATA_MGR + 0x001C) +#define DATA_MGR_COL_WIDTH (BASE_DATA_MGR + 0x0020) +#define DATA_MGR_ROW_WIDTH (BASE_DATA_MGR + 0x0024) +#define DATA_MGR_BANK_WIDTH (BASE_DATA_MGR + 0x0028) +#define DATA_MGR_CS_WIDTH (BASE_DATA_MGR + 0x002C) +#define DATA_MGR_ITF_WIDTH (BASE_DATA_MGR + 0x0030) +#define DATA_MGR_DVC_WIDTH (BASE_DATA_MGR + 0x0034) #define MEM_T_WL_ADD DATA_MGR_MEM_T_WL #define MEM_T_RL_ADD DATA_MGR_MEM_T_RL @@ -253,114 +210,75 @@ #define CALIB_SKIP_DELAY_LOOPS (1 << 0) #define CALIB_SKIP_ALL_BITS_CHK (1 << 1) #define CALIB_SKIP_DELAY_SWEEPS (1 << 2) -#define CALIB_SKIP_VFIFO (1 << 3) -#define CALIB_SKIP_LFIFO (1 << 4) -#define CALIB_SKIP_WLEVEL (1 << 5) -#define CALIB_SKIP_WRITES (1 << 6) +#define CALIB_SKIP_VFIFO (1 << 3) +#define CALIB_SKIP_LFIFO (1 << 4) +#define CALIB_SKIP_WLEVEL (1 << 5) +#define CALIB_SKIP_WRITES (1 << 6) #define CALIB_SKIP_FULL_TEST (1 << 7) -#define CALIB_SKIP_ALL (CALIB_SKIP_VFIFO | \ - CALIB_SKIP_LFIFO | CALIB_SKIP_WLEVEL | \ - CALIB_SKIP_WRITES | CALIB_SKIP_FULL_TEST) +#define CALIB_SKIP_ALL (CALIB_SKIP_VFIFO | CALIB_SKIP_LFIFO | CALIB_SKIP_WLEVEL | CALIB_SKIP_WRITES | CALIB_SKIP_FULL_TEST) #define CALIB_IN_RTL_SIM (1 << 8) /* Scan chain manager command addresses */ -#define WRITE_SCC_DQS_IN_DELAY(group, delay) \ - IOWR_32DIRECT(SCC_MGR_DQS_IN_DELAY, (group) << 2, delay) -#define WRITE_SCC_DQS_EN_DELAY(group, delay) \ - IOWR_32DIRECT(SCC_MGR_DQS_EN_DELAY, (group) << 2, (delay) \ - + IO_DQS_EN_DELAY_OFFSET) -#define WRITE_SCC_DQS_EN_PHASE(group, phase) \ - IOWR_32DIRECT(SCC_MGR_DQS_EN_PHASE, (group) << 2, phase) -#define WRITE_SCC_DQDQS_OUT_PHASE(group, phase) \ - IOWR_32DIRECT(SCC_MGR_DQDQS_OUT_PHASE, (group) << 2, phase) -#define WRITE_SCC_OCT_OUT1_DELAY(group, delay) \ - IOWR_32DIRECT(SCC_MGR_OCT_OUT1_DELAY, (group) << 2, delay) +#define WRITE_SCC_DQS_IN_DELAY(group, delay) IOWR_32DIRECT(SCC_MGR_DQS_IN_DELAY, (group) << 2, delay) +#define WRITE_SCC_DQS_EN_DELAY(group, delay) IOWR_32DIRECT(SCC_MGR_DQS_EN_DELAY, (group) << 2, (delay) + IO_DQS_EN_DELAY_OFFSET) +#define WRITE_SCC_DQS_EN_PHASE(group, phase) IOWR_32DIRECT(SCC_MGR_DQS_EN_PHASE, (group) << 2, phase) +#define WRITE_SCC_DQDQS_OUT_PHASE(group, phase) IOWR_32DIRECT(SCC_MGR_DQDQS_OUT_PHASE, (group) << 2, phase) +#define WRITE_SCC_OCT_OUT1_DELAY(group, delay) IOWR_32DIRECT(SCC_MGR_OCT_OUT1_DELAY, (group) << 2, delay) #define WRITE_SCC_OCT_OUT2_DELAY(group, delay) #define WRITE_SCC_DQS_BYPASS(group, bypass) -#define WRITE_SCC_DQ_OUT1_DELAY(pin, delay) \ - IOWR_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (pin) << 2, delay) +#define WRITE_SCC_DQ_OUT1_DELAY(pin, delay) IOWR_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (pin) << 2, delay) #define WRITE_SCC_DQ_OUT2_DELAY(pin, delay) -#define WRITE_SCC_DQ_IN_DELAY(pin, delay) \ - IOWR_32DIRECT(SCC_MGR_IO_IN_DELAY, (pin) << 2, delay) +#define WRITE_SCC_DQ_IN_DELAY(pin, delay) IOWR_32DIRECT(SCC_MGR_IO_IN_DELAY, (pin) << 2, delay) #define WRITE_SCC_DQ_BYPASS(pin, bypass) #define WRITE_SCC_RFIFO_MODE(pin, mode) -#define WRITE_SCC_HHP_EXTRAS(value) \ - IOWR_32DIRECT(SCC_MGR_HHP_GLOBALS, SCC_MGR_HHP_EXTRAS_OFFSET, value) -#define WRITE_SCC_HHP_DQSE_MAP(value) \ - IOWR_32DIRECT(SCC_MGR_HHP_GLOBALS, SCC_MGR_HHP_DQSE_MAP_OFFSET, value) +#define WRITE_SCC_HHP_EXTRAS(value) IOWR_32DIRECT(SCC_MGR_HHP_GLOBALS, SCC_MGR_HHP_EXTRAS_OFFSET, value) +#define WRITE_SCC_HHP_DQSE_MAP(value) IOWR_32DIRECT(SCC_MGR_HHP_GLOBALS, SCC_MGR_HHP_DQSE_MAP_OFFSET, value) -#define WRITE_SCC_DQS_IO_OUT1_DELAY(delay) \ - IOWR_32DIRECT(SCC_MGR_IO_OUT1_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2, delay) +#define WRITE_SCC_DQS_IO_OUT1_DELAY(delay) IOWR_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2, delay) #define WRITE_SCC_DQS_IO_OUT2_DELAY(delay) -#define WRITE_SCC_DQS_IO_IN_DELAY(delay) \ - IOWR_32DIRECT(SCC_MGR_IO_IN_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2, delay) +#define WRITE_SCC_DQS_IO_IN_DELAY(delay) IOWR_32DIRECT(SCC_MGR_IO_IN_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2, delay) -#define WRITE_SCC_DM_IO_OUT1_DELAY(pin, delay) \ - IOWR_32DIRECT(SCC_MGR_IO_OUT1_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2, delay) +#define WRITE_SCC_DM_IO_OUT1_DELAY(pin, delay) IOWR_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2, delay) #define WRITE_SCC_DM_IO_OUT2_DELAY(pin, delay) -#define WRITE_SCC_DM_IO_IN_DELAY(pin, delay) \ - IOWR_32DIRECT(SCC_MGR_IO_IN_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2, delay) +#define WRITE_SCC_DM_IO_IN_DELAY(pin, delay) IOWR_32DIRECT(SCC_MGR_IO_IN_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2, delay) #define WRITE_SCC_DM_BYPASS(pin, bypass) -#define READ_SCC_DQS_IN_DELAY(group) \ - IORD_32DIRECT(SCC_MGR_DQS_IN_DELAY, (group) << 2) -#define READ_SCC_DQS_EN_DELAY(group) \ - (IORD_32DIRECT(SCC_MGR_DQS_EN_DELAY, (group) << 2) \ - - IO_DQS_EN_DELAY_OFFSET) -#define READ_SCC_DQS_EN_PHASE(group) \ - IORD_32DIRECT(SCC_MGR_DQS_EN_PHASE, (group) << 2) -#define READ_SCC_DQDQS_OUT_PHASE(group) \ - IORD_32DIRECT(SCC_MGR_DQDQS_OUT_PHASE, (group) << 2) -#define READ_SCC_OCT_OUT1_DELAY(group) \ - IORD_32DIRECT(SCC_MGR_OCT_OUT1_DELAY, \ - (group * RW_MGR_MEM_IF_READ_DQS_WIDTH / \ - RW_MGR_MEM_IF_WRITE_DQS_WIDTH) << 2) +#define READ_SCC_DQS_IN_DELAY(group) IORD_32DIRECT(SCC_MGR_DQS_IN_DELAY, (group) << 2) +#define READ_SCC_DQS_EN_DELAY(group) (IORD_32DIRECT(SCC_MGR_DQS_EN_DELAY, (group) << 2) - IO_DQS_EN_DELAY_OFFSET) +#define READ_SCC_DQS_EN_PHASE(group) IORD_32DIRECT(SCC_MGR_DQS_EN_PHASE, (group) << 2) +#define READ_SCC_DQDQS_OUT_PHASE(group) IORD_32DIRECT(SCC_MGR_DQDQS_OUT_PHASE, (group) << 2) +#define READ_SCC_OCT_OUT1_DELAY(group) IORD_32DIRECT(SCC_MGR_OCT_OUT1_DELAY, (group * RW_MGR_MEM_IF_READ_DQS_WIDTH / RW_MGR_MEM_IF_WRITE_DQS_WIDTH) << 2) #define READ_SCC_OCT_OUT2_DELAY(group) 0 #define READ_SCC_DQS_BYPASS(group) 0 #define READ_SCC_DQS_BYPASS(group) 0 -#define READ_SCC_DQ_OUT1_DELAY(pin) \ - IORD_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (pin) << 2) +#define READ_SCC_DQ_OUT1_DELAY(pin) IORD_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (pin) << 2) #define READ_SCC_DQ_OUT2_DELAY(pin) 0 -#define READ_SCC_DQ_IN_DELAY(pin) \ - IORD_32DIRECT(SCC_MGR_IO_IN_DELAY, (pin) << 2) +#define READ_SCC_DQ_IN_DELAY(pin) IORD_32DIRECT(SCC_MGR_IO_IN_DELAY, (pin) << 2) #define READ_SCC_DQ_BYPASS(pin) 0 #define READ_SCC_RFIFO_MODE(pin) 0 -#define READ_SCC_DQS_IO_OUT1_DELAY() \ - IORD_32DIRECT(SCC_MGR_IO_OUT1_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2) +#define READ_SCC_DQS_IO_OUT1_DELAY() IORD_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2) #define READ_SCC_DQS_IO_OUT2_DELAY() 0 -#define READ_SCC_DQS_IO_IN_DELAY() \ - IORD_32DIRECT(SCC_MGR_IO_IN_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2) +#define READ_SCC_DQS_IO_IN_DELAY() IORD_32DIRECT(SCC_MGR_IO_IN_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS) << 2) -#define READ_SCC_DM_IO_OUT1_DELAY(pin) \ - IORD_32DIRECT(SCC_MGR_IO_OUT1_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2) +#define READ_SCC_DM_IO_OUT1_DELAY(pin) IORD_32DIRECT(SCC_MGR_IO_OUT1_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2) #define READ_SCC_DM_IO_OUT2_DELAY(pin) 0 -#define READ_SCC_DM_IO_IN_DELAY(pin) \ - IORD_32DIRECT(SCC_MGR_IO_IN_DELAY, \ - (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2) +#define READ_SCC_DM_IO_IN_DELAY(pin) IORD_32DIRECT(SCC_MGR_IO_IN_DELAY, (RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + pin) << 2) #define READ_SCC_DM_BYPASS(pin) 0 - #define SCC_MGR_GROUP_COUNTER (BASE_SCC_MGR + 0x0000) #define SCC_MGR_DQS_IN_DELAY (BASE_SCC_MGR + 0x0100) #define SCC_MGR_DQS_EN_PHASE (BASE_SCC_MGR + 0x0200) @@ -368,28 +286,27 @@ #define SCC_MGR_DQDQS_OUT_PHASE (BASE_SCC_MGR + 0x0400) #define SCC_MGR_OCT_OUT1_DELAY (BASE_SCC_MGR + 0x0500) #define SCC_MGR_IO_OUT1_DELAY (BASE_SCC_MGR + 0x0700) -#define SCC_MGR_IO_IN_DELAY (BASE_SCC_MGR + 0x0900) - +#define SCC_MGR_IO_IN_DELAY (BASE_SCC_MGR + 0x0900) /* HHP-HPS-specific versions of some commands */ #define SCC_MGR_DQS_EN_DELAY_GATE (BASE_SCC_MGR + 0x0600) #define SCC_MGR_IO_OE_DELAY (BASE_SCC_MGR + 0x0800) -#define SCC_MGR_HHP_GLOBALS (BASE_SCC_MGR + 0x0A00) -#define SCC_MGR_HHP_RFILE (BASE_SCC_MGR + 0x0B00) +#define SCC_MGR_HHP_GLOBALS (BASE_SCC_MGR + 0x0A00) +#define SCC_MGR_HHP_RFILE (BASE_SCC_MGR + 0x0B00) /* HHP-HPS-specific values */ #define SCC_MGR_HHP_EXTRAS_OFFSET 0 #define SCC_MGR_HHP_DQSE_MAP_OFFSET 1 -#define SCC_MGR_DQS_ENA (BASE_SCC_MGR + 0x0E00) -#define SCC_MGR_DQS_IO_ENA (BASE_SCC_MGR + 0x0E04) -#define SCC_MGR_DQ_ENA (BASE_SCC_MGR + 0x0E08) -#define SCC_MGR_DM_ENA (BASE_SCC_MGR + 0x0E0C) -#define SCC_MGR_UPD (BASE_SCC_MGR + 0x0E20) -#define SCC_MGR_ACTIVE_RANK (BASE_SCC_MGR + 0x0E40) +#define SCC_MGR_DQS_ENA (BASE_SCC_MGR + 0x0E00) +#define SCC_MGR_DQS_IO_ENA (BASE_SCC_MGR + 0x0E04) +#define SCC_MGR_DQ_ENA (BASE_SCC_MGR + 0x0E08) +#define SCC_MGR_DM_ENA (BASE_SCC_MGR + 0x0E0C) +#define SCC_MGR_UPD (BASE_SCC_MGR + 0x0E20) +#define SCC_MGR_ACTIVE_RANK (BASE_SCC_MGR + 0x0E40) #define SCC_MGR_AFI_CAL_INIT (BASE_SCC_MGR + 0x0D00) -/* PHY Debug mode flag constants */ +// PHY Debug mode flag constants #define PHY_DEBUG_IN_DEBUG_MODE 0x00000001 #define PHY_DEBUG_ENABLE_CAL_RPT 0x00000002 #define PHY_DEBUG_ENABLE_MARGIN_RPT 0x00000004 @@ -397,46 +314,44 @@ #define PHY_DEBUG_DISABLE_GUARANTEED_READ 0x00000010 #define PHY_DEBUG_ENABLE_NON_DESTRUCTIVE_CALIBRATION 0x00000020 -/* Init and Reset delay constants - Only use if defined by sequencer_defines.h, - * otherwise, revert to defaults - * Default for Tinit = (0+1) * ((202+1) * (2 * 131 + 1) + 1) = 53532 = 200.75us @ 266MHz - */ +// Init and Reset delay constants - Only use if defined by sequencer_defines.h, +// otherwise, revert to defaults +// Default for Tinit = (0+1) * ((202+1) * (2 * 131 + 1) + 1) = 53532 = 200.75us @ 266MHz #ifdef TINIT_CNTR0_VAL - #define SEQ_TINIT_CNTR0_VAL TINIT_CNTR0_VAL +#define SEQ_TINIT_CNTR0_VAL TINIT_CNTR0_VAL #else - #define SEQ_TINIT_CNTR0_VAL 0 +#define SEQ_TINIT_CNTR0_VAL 0 #endif #ifdef TINIT_CNTR1_VAL - #define SEQ_TINIT_CNTR1_VAL TINIT_CNTR1_VAL +#define SEQ_TINIT_CNTR1_VAL TINIT_CNTR1_VAL #else - #define SEQ_TINIT_CNTR1_VAL 202 +#define SEQ_TINIT_CNTR1_VAL 202 #endif #ifdef TINIT_CNTR2_VAL - #define SEQ_TINIT_CNTR2_VAL TINIT_CNTR2_VAL +#define SEQ_TINIT_CNTR2_VAL TINIT_CNTR2_VAL #else - #define SEQ_TINIT_CNTR2_VAL 131 +#define SEQ_TINIT_CNTR2_VAL 131 #endif - -/* Default for Treset = (2+1) * ((252+1) * (2 * 131 + 1) + 1) = 133563 = 500.86us @ 266MHz */ +// Default for Treset = (2+1) * ((252+1) * (2 * 131 + 1) + 1) = 133563 = 500.86us @ 266MHz #ifdef TRESET_CNTR0_VAL - #define SEQ_TRESET_CNTR0_VAL TRESET_CNTR0_VAL +#define SEQ_TRESET_CNTR0_VAL TRESET_CNTR0_VAL #else - #define SEQ_TRESET_CNTR0_VAL 2 +#define SEQ_TRESET_CNTR0_VAL 2 #endif #ifdef TRESET_CNTR1_VAL - #define SEQ_TRESET_CNTR1_VAL TRESET_CNTR1_VAL +#define SEQ_TRESET_CNTR1_VAL TRESET_CNTR1_VAL #else - #define SEQ_TRESET_CNTR1_VAL 252 +#define SEQ_TRESET_CNTR1_VAL 252 #endif #ifdef TRESET_CNTR2_VAL - #define SEQ_TRESET_CNTR2_VAL TRESET_CNTR2_VAL +#define SEQ_TRESET_CNTR2_VAL TRESET_CNTR2_VAL #else - #define SEQ_TRESET_CNTR2_VAL 131 +#define SEQ_TRESET_CNTR2_VAL 131 #endif /* Bitfield type changes depending on protocol */ @@ -445,18 +360,32 @@ typedef uint32_t t_btfld; #define RW_MGR_INST_ROM_WRITE BASE_RW_MGR + 0x1800 #define RW_MGR_AC_ROM_WRITE BASE_RW_MGR + 0x1C00 +static const uint32_t inst_rom_init_size; +static const uint32_t inst_rom_init[]; +static const uint32_t ac_rom_init_size; +static const uint32_t ac_rom_init[]; + /* parameter variable holder */ typedef struct param_type { + t_btfld dm_correct_mask; t_btfld read_correct_mask; t_btfld read_correct_mask_vg; t_btfld write_correct_mask; t_btfld write_correct_mask_vg; + + /* set a particular entry to 1 if we need to skip a particular rank */ + uint32_t skip_ranks[MAX_RANKS]; + + /* set a particular entry to 1 if we need to skip a particular group */ + uint32_t skip_groups; + + /* set a particular entry to 1 if the shadow register (which represents a set of ranks) needs to be skipped */ + uint32_t skip_shadow_regs[NUM_SHADOW_REGS]; - /* set a particular entry to 1 if we need to skip a particular group */ } param_t; /* global variable holder */ @@ -484,11 +413,41 @@ typedef struct gbl_type { uint32_t fom_in; uint32_t fom_out; - /*USER Number of RW Mgr NOP cycles between - write command and write data */ -#if MULTIPLE_AFI_WLAT - uint32_t rw_wl_nop_cycles_per_group[RW_MGR_MEM_IF_WRITE_DQS_WIDTH]; -#endif + //USER Number of RW Mgr NOP cycles between write command and write data uint32_t rw_wl_nop_cycles; } gbl_t; + +// External global variables +static gbl_t *gbl; +static param_t *param; + +// External functions +static uint32_t rw_mgr_mem_calibrate_full_test(uint32_t min_correct, t_btfld * bit_chk, + uint32_t test_dm); +static uint32_t run_mem_calibrate(void); +static void rw_mgr_mem_calibrate_eye_diag_aid(void); +static void rw_mgr_load_mrs_calib(void); +static void rw_mgr_load_mrs_exec(void); +static void rw_mgr_mem_initialize(void); +static void rw_mgr_mem_dll_lock_wait(void); +static inline void scc_mgr_set_dq_in_delay(uint32_t write_group, uint32_t dq_in_group, + uint32_t delay); +static inline void scc_mgr_set_dq_out1_delay(uint32_t write_group, uint32_t dq_in_group, + uint32_t delay); +static inline void scc_mgr_set_dq_out2_delay(uint32_t write_group, uint32_t dq_in_group, + uint32_t delay); +static inline void scc_mgr_load_dq(uint32_t dq_in_group); +static inline void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group, uint32_t delay); +static inline void scc_mgr_load_dqs(uint32_t dqs); +static void scc_mgr_set_group_dqs_io_and_oct_out1_gradual(uint32_t write_group, uint32_t delay); +static void scc_mgr_set_group_dqs_io_and_oct_out2_gradual(uint32_t write_group, uint32_t delay); +static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group, uint32_t delay); +static void scc_mgr_set_dqs_en_phase_all_ranks(uint32_t read_group, uint32_t phase); +static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group, uint32_t phase); +static inline void scc_mgr_set_dm_out1_delay(uint32_t write_group, uint32_t dm, uint32_t delay); +static inline void scc_mgr_set_dm_out2_delay(uint32_t write_group, uint32_t dm, uint32_t delay); +static inline void scc_mgr_load_dm(uint32_t dm); +static void rw_mgr_incr_vfifo_auto(uint32_t grp); +static void rw_mgr_decr_vfifo_auto(uint32_t grp); +static int sdram_calibration(void); #endif diff --git a/arch/arm/mach-socfpga/include/mach/sequencer_defines.h b/arch/arm/mach-socfpga/include/mach/sequencer_defines.h new file mode 100644 index 0000000000..5059844106 --- /dev/null +++ b/arch/arm/mach-socfpga/include/mach/sequencer_defines.h @@ -0,0 +1,6 @@ +#define TINIT_CNTR1_VAL 32 +#define TINIT_CNTR2_VAL 32 +#define TINIT_CNTR0_VAL 99 +#define TRESET_CNTR1_VAL 99 +#define TRESET_CNTR2_VAL 10 +#define TRESET_CNTR0_VAL 99 diff --git a/arch/arm/mach-socfpga/include/mach/system.h b/arch/arm/mach-socfpga/include/mach/system.h new file mode 100755 index 0000000000..89527b2c2b --- /dev/null +++ b/arch/arm/mach-socfpga/include/mach/system.h @@ -0,0 +1,37 @@ +/* +* Copyright Altera Corporation (C) 2012-2014. All rights reserved +* +* SPDX-License-Identifier: BSD-3-Clause +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Altera Corporation nor the +* names of its contributors may be used to endorse or promote products +* derived from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#define SEQUENCER_DATA_MGR_INST_BASE 0x60000 +#define SEQUENCER_PHY_MGR_INST_BASE 0x48000 +#define SEQUENCER_PTR_MGR_INST_BASE 0x40000 +#define SEQUENCER_RAM_BASE 0x20000 +#define SEQUENCER_ROM_BASE 0x10000 +#define SEQUENCER_RW_MGR_INST_BASE 0x50000 +#define SEQUENCER_SCC_MGR_INST_BASE 0x58000 +#define SEQUENCER_REG_FILE_INST_BASE 0x70000 +#define SEQUENCER_TIMER_INST_BASE 0x78000 diff --git a/arch/arm/mach-socfpga/include/mach/tclrpt.h b/arch/arm/mach-socfpga/include/mach/tclrpt.h new file mode 100755 index 0000000000..4345b23ba6 --- /dev/null +++ b/arch/arm/mach-socfpga/include/mach/tclrpt.h @@ -0,0 +1,38 @@ +#ifndef TCLRPT_H_ +#define TCLRPT_H_ +/* +* Copyright Altera Corporation (C) 2012-2014. All rights reserved +* +* SPDX-License-Identifier: BSD-3-Clause +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of Altera Corporation nor the +* names of its contributors may be used to endorse or promote products +* derived from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL ALTERA CORPORATION BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "sequencer.h" + +#define TCLRPT_SET(item, value) + +// None of the rest of the file should be referenced if ENABLE_TCL_DEBUG is not +// set (although it's not a problem if it is, but this helps catch errors) + +#endif -- cgit v1.2.3