Monday, August 16, 2010

Write leveling - in more detail

Inconsistencies in DDR3 specifications (JEDEC & Micron):

Under the topic of 'Write Leveling Procedure':
After tWLMRD and a DQS LOW preamble (tWPRE) have been satisfied...

In the figure below, there is no reference to tWPRE, and the spot which should have tWPRE actually has tDQSL. tWPRE is defined as 0.9 clocks, tDQSL is defined as 0.45 clocks. tWPRE is shown to include both the high and low (making a whole clock) of the DQS, yet it implies under write leveling that it is the low only.

Also for tWLS and tWLH, look at the JEDEC waveform. It's obvious what they mean, but it's shown very badly in the Micron figure.

-----

It is very unclear what tWLOE is meant to do. According to the JEDEC specification a DDR3 chip may choose to drive write leveling on all DQs and not just the prime DQs. This doesn't explain why I care about early or late DQs... I don't know. Perhaps I'll realize this later.

-----

Here is a version that uses wl.v - a module that takes care of write leveling. I am now detecting the separate DQs for each DQS, and I am incrementing each individually. Some things to note are that this is still very primitive!!! And of course that there are 2 clock domains here, the 200 MHz iodelay reference clock, and the 300 MHz DDR3 clock. Other than some sampling and delta compares, the code is very straightforward.

wl.v is new, and top.v is slightly modified. Of course the SW7 button is no longer used. It's pretty cool watching this in Chipscope.

wl.v:

//clock is currently 200 MHz (5ns period)
module wl(
input iodelay_clk,
input iodelay_rst,
output reg [7:0] wl_ce,

input start,
output reg done,

input ddr3_clk,
input ddr3_rst,
output reg [7:0] wl_dqs_out,

input [7:0] wl_dq_primes_in
);

reg [31:0] dqs_state;
reg [31:0] dqs_next_state;
localparam DQS_LOW = (0);
localparam DQS_HIGH = (DQS_LOW + 1);

reg [3:0] wl_dqs_go;
always @ (posedge ddr3_clk) begin
wl_dqs_go[3:1] <= wl_dqs_go[2:0];
end
always @ (posedge ddr3_clk or posedge ddr3_rst) begin
if(ddr3_rst) begin
dqs_state <= DQS_LOW;
wl_dqs_out <= 0;
end else begin
dqs_state <= dqs_next_state;
wl_dqs_out <= 0;
case (dqs_state)
DQS_HIGH: begin
wl_dqs_out <= {8{1'b1}};
end
endcase
end
end

always @ (*) begin
dqs_next_state = DQS_LOW;
case (dqs_state)
DQS_LOW: begin
dqs_next_state = DQS_LOW;
if(wl_dqs_go[3] ^ wl_dqs_go[2])
dqs_next_state = DQS_HIGH;
end
DQS_HIGH: begin
dqs_next_state = DQS_LOW;
end
endcase
end

reg [31:0] state;
reg [31:0] next_state;
localparam IDLE = (0);
localparam CHECK_0 = (IDLE + 1);
localparam INCREMENT_0 = (CHECK_0 + 1);
localparam WAIT_INCREMENT_0 = (INCREMENT_0 + 1);
localparam GO_0 = (WAIT_INCREMENT_0 + 1);
localparam WAIT_GO_0 = (GO_0 + 1);
localparam CHECK_1 = (WAIT_GO_0 + 1);
localparam INCREMENT_1 = (CHECK_1 + 1);
localparam WAIT_INCREMENT_1 = (INCREMENT_1 + 1);
localparam GO_1 = (WAIT_INCREMENT_1 + 1);
localparam WAIT_GO_1 = (GO_1 + 1);
localparam DONE = (WAIT_GO_1 + 1);

reg [7:0] wl_dq_primes;

reg [9:0] wait_0;
reg [9:0] wait_1;

reg [2:0] start_in;

always @ (posedge iodelay_clk or posedge iodelay_rst) begin
if(iodelay_rst) begin
state <= IDLE;
wl_dq_primes <= 0;
wait_0 <= 10'b1;
wait_1 <= 10'b1;
wl_ce <= 0;
wl_dqs_go[0] <= 0;
done <= 0;
start_in <= 0;
end else begin
state <= next_state;
//sampled externally, but reading only after 10
//clocks, is stable by then.
wl_dq_primes <= wl_dq_primes_in;
wait_0 <= {wait_0[8:0], wait_0[9]};
wait_1 <= {wait_1[8:0], wait_1[9]};
wl_ce <= 0;
start_in <= {start_in[1:0], start};
case (state)
INCREMENT_0: begin
wl_ce <= wl_dq_primes;
wait_0 <= 1;
end
GO_0: begin
wl_dqs_go[0] <= ~wl_dqs_go[0];
wait_0 <= 1;
end
INCREMENT_1: begin
wl_ce <= ~wl_dq_primes;
wait_1 <= 1;
end
GO_1: begin
wl_dqs_go[0] <= ~wl_dqs_go[0];
wait_1 <= 1;
end
DONE: begin
done <= ~done;
end
endcase
end
end
always @ (*) begin
next_state = IDLE;
case (state)
IDLE: begin
next_state = IDLE;
if(start_in[2] ^ start_in[1])
next_state = CHECK_0;
end
CHECK_0: begin
if(wl_dq_primes == {8{1'b0}})
next_state = INCREMENT_1;
else
next_state = INCREMENT_0;
end
INCREMENT_0: begin
next_state = WAIT_INCREMENT_0;
end
WAIT_INCREMENT_0: begin
next_state = WAIT_INCREMENT_0;
if(wait_0[9])
next_state = GO_0;
end
GO_0: begin
next_state = WAIT_GO_0;
end
WAIT_GO_0: begin
next_state = WAIT_GO_0;
if(wait_0[9])
next_state = CHECK_0;
end
CHECK_1: begin
if(wl_dq_primes == {8{1'b1}})
next_state = DONE;
else
next_state = INCREMENT_1;
end
INCREMENT_1: begin
next_state = WAIT_INCREMENT_1;
end
WAIT_INCREMENT_1: begin
next_state = WAIT_INCREMENT_1;
if(wait_1[9])
next_state = GO_1;
end
GO_1: begin
next_state = WAIT_GO_1;
end
WAIT_GO_1: begin
next_state = WAIT_GO_1;
if(wait_0[9])
next_state = CHECK_1;
end
DONE: begin
next_state = IDLE;
end
endcase
end

endmodule

top.v:
`timescale 1ns / 1ps
module my_debug(
input [1023:0] d,
output [1023:0] q
);
assign q = d;
endmodule
module top(
input SYSCLK_P, //200 MHz
input SYSCLK_N,
input CPU_RESET,

input SW7,

//DDR3 in use: SODIMM: MT4JSF6464HY-1G1B1, 4 Components: MT41J64M16BLA-187E
//Datasheets: jsf4c64x64hy.pdf (SODIMM), 1Gb_DDR3_SDRAM.pdf (Components)
output [1:0] DDR3_CLK_P,
output [1:0] DDR3_CLK_N,

output reg [1:0] DDR3_CKE,

output reg [1:0] DDR3_ODT,

output reg [1:0] DDR3_CS_N,

output reg DDR3_RESET_N,
input DDR3_TEMP_EVENT_N,

output reg DDR3_WE_N,
output reg DDR3_CAS_N,
output reg DDR3_RAS_N,

inout [7:0] DDR3_DQS_N,
inout [7:0] DDR3_DQS_P,

output [7:0] DDR3_DM,

inout [63:0] DDR3_DQ,

output reg [2:0] DDR3_BA,

output reg [15:0] DDR3_A //SODIMM in general has 16 address bits, this SODIMM only has 13... top 3 unused
);

//Current design:
//tCK = 3.3 ns
//CL = 5, CWL = 5

wire sysclk;
wire [7:0] dqs_out;
wire [7:0] dqs_in_p;
wire [7:0] dqs_in_n;
reg [7:0] dqs_tri_p;
reg [7:0] dqs_tri_n;
reg [63:0] dq_out;
(* S = "TRUE" *)
wire [63:0] dq_in;
reg [63:0] dq_tri;

wire clkout0;
wire clkout0_bufg_in;
BUFG clkout0_bufg_inst(
.I(clkout0_bufg_in),
.O(clkout0)
);

wire clkout1;
wire clkout1_bufg_in;
BUFG clkout1_bufg_inst(
.I(clkout1_bufg_in),
.O(clkout1)
);

wire reset;
assign reset = CPU_RESET | ~DDR3_TEMP_EVENT_N | ~sysclk_mmcm_inst_locked;

reg [1:0] clkout0_reset_delay;
wire clkout0_reset_sync;
assign clkout0_reset_sync = clkout0_reset_delay[1];

always @ (posedge clkout0 or posedge reset) begin
if(reset) begin
clkout0_reset_delay <= {2{1'b1}};
end else begin
clkout0_reset_delay <= {clkout0_reset_delay[0], reset};
end
end

reg [1:0] sysclk_reset_delay;
wire sysclk_reset_sync;
assign sysclk_reset_sync = sysclk_reset_delay[1];

always @ (posedge sysclk or posedge reset) begin
if(reset) begin
sysclk_reset_delay <= {2{1'b1}};
end else begin
sysclk_reset_delay <= {sysclk_reset_delay[0], reset};
end
end

wire sw7_pushed;

sw_push_detect sw_push_detect_inst(
.clock(sysclk),
.reset(sysclk_reset_sync),

.SW(SW7),
.sw_pushed(sw7_pushed)
);

reg [31:0] state;
localparam WAIT_200US = (0);
localparam WAIT_500US = (WAIT_200US + 1);
localparam WAIT_110NS = (WAIT_500US + 1);
localparam ISSUE_MR2 = (WAIT_110NS + 1);
localparam WAIT_MR2 = (ISSUE_MR2 + 1);
localparam ISSUE_MR3 = (WAIT_MR2 + 1);
localparam WAIT_MR3 = (ISSUE_MR3 + 1);
localparam ISSUE_MR1 = (WAIT_MR3 + 1);
localparam WAIT_MR1 = (ISSUE_MR1 + 1);
localparam ISSUE_MR0 = (WAIT_MR1 + 1);
localparam WAIT_MR0 = (ISSUE_MR0 + 1);
localparam ISSUE_ZQCL = (WAIT_MR0 + 1);
localparam WAIT_ZQCL = (ISSUE_ZQCL + 1);
localparam ISSUE_MR1_WRITE_LEVELIZATION = (WAIT_ZQCL + 1);
localparam WAIT_MR1_WRITE_LEVELIZATION = (ISSUE_MR1_WRITE_LEVELIZATION + 1);
localparam TURN_ON_ODT = (WAIT_MR1_WRITE_LEVELIZATION + 1);
localparam WAIT_ODTL = (TURN_ON_ODT + 1);
localparam WAIT_TWLDQSEN = (WAIT_ODTL + 1);
localparam WAIT_TWLMRD = (WAIT_TWLDQSEN + 1);
localparam IDLE = (WAIT_TWLMRD + 1);

reg [18:0] count;

(* KEEP = "TRUE" *)
wire [4:0] cntvalueout [7:0];

wire [7:0] dqs_out_delay_ce;
reg start;

(* KEEP = "TRUE" *)
wire done;

wl wl_inst(
.iodelay_clk(sysclk),
.iodelay_rst(sysclk_reset_sync),
.wl_ce(dqs_out_delay_ce),

.start(start),
.done(done),

.ddr3_clk(clkout0),
.ddr3_rst(clkout0_reset_sync),
.wl_dqs_out(dqs_out),

.wl_dq_primes_in({dq_in[57], dq_in[50], dq_in[41], dq_in[38], dq_in[26], dq_in[16], dq_in[10], dq_in[4]})
);

always @ (posedge clkout0 or posedge clkout0_reset_sync) begin
if(clkout0_reset_sync) begin
DDR3_RESET_N <= 0;
DDR3_CKE <= 2'h0;
DDR3_ODT <= 2'h0;
DDR3_CS_N <= 2'h3;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 1;
DDR3_BA <= 3'h0;
DDR3_A <= 0;
count <= 0;
state <= WAIT_200US;
dqs_tri_p <= {8{1'b1}};
dqs_tri_n <= {8{1'b1}};
dq_out <= 0;
dq_tri <= {64{1'b1}};
start <= 0;
end else begin
count <= count + 1;
DDR3_RESET_N <= 1;
DDR3_CKE <= 2'h3;
DDR3_CS_N <= 2'h3;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 1;
DDR3_BA <= 3'h0;
DDR3_A <= 0;
DDR3_ODT <= 2'h0;
dqs_tri_p <= {8{1'b1}};
dqs_tri_n <= {8{1'b1}};
case (state)
WAIT_200US: begin //initialization step 2, 3 (see micron ddr3 datasheet)
DDR3_RESET_N <= 0;
DDR3_CKE <= 2'h0;
if(count[16]) begin //60000 clocks to get 200us
count <= 0;
state <= WAIT_500US;
end
end
WAIT_500US: begin //initialization step 4, 5 (see micron ddr3 datasheet)
DDR3_CKE <= 2'h0;
if(count[18]) begin //150000 clocks to get 500us
count <= 0;
state <= WAIT_110NS;
end
end
WAIT_110NS: begin //initialization step 5, 6 (see micron ddr3 datasheet)
if(count[6]) //33 clocks to get 110ns
state <= ISSUE_MR2;
end
ISSUE_MR2: begin //initialization step 6 (see micron ddr3 datasheet)
//The MIG disables dynamic ODT
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h2;
count <= 0;
state <= WAIT_MR2;
end
WAIT_MR2: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR3;
end
ISSUE_MR3: begin //initialization step 7 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h3;
count <= 0;
state <= WAIT_MR3;
end
WAIT_MR3: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR1;
end
ISSUE_MR1: begin //initialization step 8 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h1;
//DLL enabled
DDR3_A[0] <= 0;
//According to ML605 example design - HIGH
{DDR3_A[5], DDR3_A[1]} <= 2'h1; //Output Drive Strength
//According to ML605 example design - 40 ohm
{DDR3_A[9], DDR3_A[6], DDR3_A[2]} <= 3'h3; //RTT Nom
//Output enabled (1 == TRISTATED)
DDR3_A[12] <= 0;
count <= 0;
state <= WAIT_MR1;
end
WAIT_MR1: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR0;
end
ISSUE_MR0: begin //initialization step 9 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h0;
//CL = 5
DDR3_A[6:4] <= 1;
//DLL Reset
DDR3_A[8] <= 1;
//WR = 5 = 15ns / 3.3ns
DDR3_A[11:9] <= 1;
count <= 0;
state <= WAIT_MR0;
end
WAIT_MR0: begin //tMRD time between Mode Register updates
if(count[9]) //tDLLK = 512 clocks
state <= ISSUE_ZQCL;
end
ISSUE_ZQCL: begin
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 0;
DDR3_A[10] <= 1;
state <= WAIT_ZQCL;
end
WAIT_ZQCL: begin //tZQinit
if(count[9]) //tZQinit = 512 clocks
state <= ISSUE_MR1_WRITE_LEVELIZATION;
end
ISSUE_MR1_WRITE_LEVELIZATION: begin
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h1;
//DLL enabled
DDR3_A[0] <= 0;
//According to ML605 example design - HIGH
{DDR3_A[5], DDR3_A[1]} <= 2'h1; //Output Drive Strength
//According to ML605 example design - 40 ohm
{DDR3_A[9], DDR3_A[6], DDR3_A[2]} <= 3'h3; //RTT Nom
//Output enabled (1 == TRISTATED)
DDR3_A[12] <= 0;
//Write levelization enabled
DDR3_A[7] <= 1;
count <= 0;
state <= WAIT_MR1_WRITE_LEVELIZATION;
end
WAIT_MR1_WRITE_LEVELIZATION: begin //tMRD time between Mode Register updates
if(count[4]) //tMOD = 12 clocks
state <= TURN_ON_ODT;
end
TURN_ON_ODT: begin
DDR3_ODT <= {2{1'b1}};
count <= 0;
state <= WAIT_ODTL;
end
WAIT_ODTL: begin
DDR3_ODT <= {2{1'b1}};
if(count[2]) begin //ODTL = WL - 2 = 3
count <= 0;
state <= WAIT_TWLDQSEN;
end
end
WAIT_TWLDQSEN: begin
DDR3_ODT <= {2{1'b1}};
if(count[5]) begin //tWLDQSEN = 25
count <= 0;
state <= WAIT_TWLMRD;
end
end
WAIT_TWLMRD: begin
DDR3_ODT <= {2{1'b1}};
if(count[6]) begin //tWLMRD = 40
count <= 0;
state <= IDLE;
end
end
IDLE: begin
DDR3_ODT <= {2{1'b1}};
dqs_tri_p <= 0;
dqs_tri_n <= 0;
if(count[8]) begin
start <= ~start;
count <= 0;
end
end
endcase
end
end

assign DDR3_DM = dqs_in_p | dqs_in_n;

IBUFGDS sysclk_ibufgds_inst(
.I(SYSCLK_P),
.IB(SYSCLK_N),
.O(sysclk)
);

//CLKOUT[n] = CLKIN * CLKFBOUT_MULT_F / CLKOUT[n]_DIVIDE_F
//CLKIN = 200 MHz
//Currently configuring CLKOUT0 as (200 * 6) / 4 = 300
//CLKOUT0 = 300 MHz
//CLKOUT1 = 300 MHz @ -90 degrees
wire sysclk_mmcm_inst_locked;
MMCM_ADV # (
.BANDWIDTH("OPTIMIZED"),
.CLKFBOUT_MULT_F(6),
.CLKFBOUT_PHASE(0.000),
.CLKIN1_PERIOD(5.000),
.CLKIN2_PERIOD(0),
.CLKOUT0_DIVIDE_F(4.000),
.CLKOUT1_DIVIDE(4),
.CLKOUT2_DIVIDE(1),
.CLKOUT3_DIVIDE(1),
.CLKOUT4_DIVIDE(1),
.CLKOUT5_DIVIDE(1),
.CLKOUT6_DIVIDE(1),
.CLKOUT0_DUTY_CYCLE(0.5),
.CLKOUT1_DUTY_CYCLE(0.5),
.CLKOUT2_DUTY_CYCLE(0.5),
.CLKOUT3_DUTY_CYCLE(0.5),
.CLKOUT4_DUTY_CYCLE(0.5),
.CLKOUT5_DUTY_CYCLE(0.5),
.CLKOUT6_DUTY_CYCLE(0.5),
.CLKOUT0_PHASE(0),
.CLKOUT1_PHASE(-90.0),
.CLKOUT2_PHASE(0),
.CLKOUT3_PHASE(0),
.CLKOUT4_PHASE(0),
.CLKOUT5_PHASE(0),
.CLKOUT6_PHASE(0),
.CLKOUT4_CASCADE("FALSE"),
.CLOCK_HOLD("FALSE"),
.COMPENSATION("INTERNAL"),
.DIVCLK_DIVIDE(1),
.REF_JITTER1(0.005),
.REF_JITTER2(0.005),
.STARTUP_WAIT("FALSE"),
.CLKFBOUT_USE_FINE_PS("TRUE"),
.CLKOUT0_USE_FINE_PS("TRUE"),
.CLKOUT1_USE_FINE_PS("TRUE"),
.CLKOUT2_USE_FINE_PS("FALSE"),
.CLKOUT3_USE_FINE_PS("FALSE"),
.CLKOUT4_USE_FINE_PS("FALSE"),
.CLKOUT5_USE_FINE_PS("FALSE"),
.CLKOUT6_USE_FINE_PS("FALSE")
) sysclk_mmcm_inst(
.CLKFBIN(sysclk_fbout),
.CLKFBOUT(sysclk_fbout),
.CLKFBOUTB(),
.CLKFBSTOPPED(),
.CLKINSEL(1'b1),
.CLKINSTOPPED(),
.CLKIN1(sysclk),
.CLKIN2(1'b0),
.CLKOUT0(clkout0_bufg_in),
.CLKOUT1(clkout1_bufg_in),
.CLKOUT2(),
.CLKOUT3(),
.CLKOUT4(),
.CLKOUT5(),
.CLKOUT6(),
.CLKOUT0B(),
.CLKOUT1B(),
.CLKOUT2B(),
.CLKOUT3B(),
.DADDR(7'h0),
.DCLK(1'b0),
.DEN(1'b0),
.DI(16'h0),
.DO(),
.DRDY(),
.DWE(1'b0),
.LOCKED(sysclk_mmcm_inst_locked),
.PSCLK(1'b0),
.PSDONE(),
.PSEN(1'b0),
.PSINCDEC(1'b0),
.PWRDWN(1'b0),
.RST(1'b0)
);

(* IODELAY_GROUP = "IODELAY_GROUP_INST" *)
IDELAYCTRL idelayctrl_inst(
.RDY(),
.REFCLK(sysclk),
.RST(sysclk_reset_sync)
);

genvar dqs_i;
generate
for(dqs_i = 0; dqs_i < 8; dqs_i = dqs_i + 1) begin : dqs_gen
wire dqs_out_oddr;
wire dqs_out_delay;
ODDR # (
.DDR_CLK_EDGE("SAME_EDGE")
) dqs_oddr_inst(
.Q(dqs_out_oddr),
.C(clkout0),
.CE(1'b1),
.D1(dqs_out[dqs_i]),
.D2(1'b0),
.R(1'b0),
.S(1'b0)
);
(* IODELAY_GROUP = "IODELAY_GROUP_INST" *)
IODELAYE1 # (
.DELAY_SRC("O"),
.IDELAY_TYPE("VARIABLE"),
.ODELAY_TYPE("VARIABLE"),
.ODELAY_VALUE(0),
.REFCLK_FREQUENCY(200.0),
.SIGNAL_PATTERN("DATA")
) dqs_iodelaye1_inst(
.C(sysclk),
.CE(dqs_out_delay_ce[dqs_i]),
.CINVCTRL(1'b0),
.CLKIN(1'b0),
.CNTVALUEIN(5'h0),
.CNTVALUEOUT(cntvalueout[dqs_i]),
.DATAIN(1'b0),
.DATAOUT(dqs_out_delay),
.IDATAIN(1'b0),
.INC(1'b1),
.ODATAIN(dqs_out_oddr),
.RST(sysclk_reset_sync),
.T()
);
IOBUFDS_DIFF_OUT dqs_iobufds_diff_out_inst(
.O(dqs_in_p[dqs_i]),
.OB(dqs_in_n[dqs_i]),
.IO(DDR3_DQS_P[dqs_i]),
.IOB(DDR3_DQS_N[dqs_i]),
.I(dqs_out_delay),
.TM(dqs_tri_p[dqs_i]),
.TS(dqs_tri_n[dqs_i])
);
end
endgenerate

genvar dq_i;
generate
for(dq_i = 0; dq_i < 64; dq_i = dq_i + 1) begin : dq_gen
IOBUF dq_iobuf_inst(
.O(dq_in[dq_i]),
.IO(DDR3_DQ[dq_i]),
.I(dq_out[dq_i]),
.T(dq_tri[dq_i])
);
end
endgenerate

genvar clk_i;
generate
for(clk_i = 0; clk_i < 2; clk_i = clk_i + 1) begin : clk_gen
OBUFDS clk_obufds_inst(
.O(DDR3_CLK_P[clk_i]),
.OB(DDR3_CLK_N[clk_i]),
.I(clkout1)
);
end
endgenerate

my_debug debug_inst(
.d({cntvalueout[0], dqs_in_p, done}),
.q()
);

endmodule

Tuesday, August 10, 2010

Aligning DQSs - Add ODDR at IOs

I want to start seeing more appropriate write leveling data. Currently the write leveling is giving coherent results, but they are unaligned. I see that is because of the routing per each DQS from their FF. Now I've added an ODDR to put all DQSs at their respective IOs. Xilinx's tools require that the ODDR sit before the IODELAY. Here is the relevant code in top.v:


genvar dqs_i;
generate
for(dqs_i = 0; dqs_i < 8; dqs_i = dqs_i + 1) begin : dqs_gen
wire dqs_out_oddr;
wire dqs_out_delay;
ODDR dqs_oddr_inst(
.Q(dqs_out_oddr),
.C(clkout0),
.CE(1'b1),
.D1(dqs_out[dqs_i]),
.D2(dqs_out[dqs_i]),
.R(1'b0),
.S(1'b0)
);
(* IODELAY_GROUP = "IODELAY_GROUP_INST" *)
IODELAYE1 # (
.DELAY_SRC("O"),
.IDELAY_TYPE("VARIABLE"),
.ODELAY_TYPE("VARIABLE"),
.ODELAY_VALUE(0),
.REFCLK_FREQUENCY(200.0),
.SIGNAL_PATTERN("DATA")
) dqs_iodelaye1_inst(
.C(sysclk),
.CE(dqs_out_delay_ce),
.CINVCTRL(1'b0),
.CLKIN(1'b0),
.CNTVALUEIN(5'h0),
.CNTVALUEOUT(cntvalueout[dqs_i]),
.DATAIN(1'b0),
.DATAOUT(dqs_out_delay),
.IDATAIN(1'b0),
.INC(1'b1),
.ODATAIN(dqs_out_oddr),
.RST(reset_sync),
.T()
);
IOBUFDS_DIFF_OUT dqs_iobufds_diff_out_inst(
.O(dqs_in_p[dqs_i]),
.OB(dqs_in_n[dqs_i]),
.IO(DDR3_DQS_P[dqs_i]),
.IOB(DDR3_DQS_N[dqs_i]),
.I(dqs_out_delay),
.TM(dqs_tri_p[dqs_i]),
.TS(dqs_tri_n[dqs_i])
);
end
endgenerate

I now see in chipscope that the DQSs are moving in a more uniform manner.

Friday, August 6, 2010

Initialization

Following Micron's initialization procedure is fairly simple. Of course you do have to know whether you are doing it right. When I play with a new device, I like to see it respond as soon as possible. I created the initialization state machine, and put it into write leveling mode.

Instantiate an MMCM_ADV to create a 300 MHz phase 0, and a 300 MHz phase -90 from the 200 MHz system clock.
Instantiate one IDELAYCTRL with a 200 MHz clock.
Instantiate an IODELAYE1 for each DQS. Set ODELAY_TYPE to "VARIABLE" (and also IDELAY_TYPE to "VARIABLE" since there is an ISE bug which reports INC and CE as unused and ignored if IDELAY_TYPE is "FIXED"). Input the non-delayed DQS on ODATAIN, and the delayed version comes out on DATAOUT. Connect DATAOUT directly to the I port of the IOBUFDS_DIFF_OUT. You can then use the CE signal to slowly increment the count and you should see the prime DQs outputting the CLK value at the moment the strobe occurs.

In order to see this, I connected chipscope and used a pushbutton to increment all the delays. Through chipscope I can see the differences on what is latched on the DQ lines dependent on the delay used. On my chip, each delay is about 78 ps, which makes approximately 20-21 delays cover half of the clock period. This is verified using the delays and looking at the DQ outputs.

Note: The Micron SODIMM DQ mapping to component DQ mapping is shown in the SODIMM datasheet. On the ML605, the prime DQs are: 4, 10, 16, 26, 38, 41, 50, and 57.

This was very cool. Here's the code so far to make this work: (This is NOT a controller, this is just my sandbox for learning, there aren't even timing or pin constraints yet!)


//top.v



`timescale 1ns / 1ps
module top(
input SYSCLK_P, //200 MHz
input SYSCLK_N,
input CPU_RESET,

input SW7,

//DDR3 in use: SODIMM: MT4JSF6464HY-1G1B1, 4 Components: MT41J64M16BLA-187E
//Datasheets: jsf4c64x64hy.pdf (SODIMM), 1Gb_DDR3_SDRAM.pdf (Components)
output [1:0] DDR3_CLK_P,
output [1:0] DDR3_CLK_N,

output reg [1:0] DDR3_CKE,

output reg [1:0] DDR3_ODT,

output reg [1:0] DDR3_CS_N,

output reg DDR3_RESET_N,
input DDR3_TEMP_EVENT_N,

output reg DDR3_WE_N,
output reg DDR3_CAS_N,
output reg DDR3_RAS_N,

inout [7:0] DDR3_DQS_N,
inout [7:0] DDR3_DQS_P,

output [7:0] DDR3_DM,

inout [63:0] DDR3_DQ,

output reg [2:0] DDR3_BA,

output reg [15:0] DDR3_A //SODIMM in general has 16 address bits, this SODIMM only has 13... top 3 unused
);

//Current design:
//tCK = 3.3 ns
//CL = 5, CWL = 5

wire sysclk;
reg [7:0] dqs_out;
wire [7:0] dqs_in_p;
wire [7:0] dqs_in_n;
reg [7:0] dqs_tri_p;
reg [7:0] dqs_tri_n;
reg [63:0] dq_out;
wire [63:0] dq_in;
reg [63:0] dq_tri;

wire clkout0;
wire clkout0_bufg_in;
BUFG clkout0_bufg_inst(
.I(clkout0_bufg_in),
.O(clkout0)
);

wire clkout1;
wire clkout1_bufg_in;
BUFG clkout1_bufg_inst(
.I(clkout1_bufg_in),
.O(clkout1)
);

wire reset;
assign reset = CPU_RESET | ~DDR3_TEMP_EVENT_N | ~sysclk_mmcm_inst_locked;
reg [1:0] reset_delay;
wire reset_sync;
assign reset_sync = reset_delay[1];

always @ (posedge clkout0 or posedge reset) begin
if(reset) begin
reset_delay <= {2{1'b1}};
end else begin
reset_delay <= {reset_delay[0], reset};
end
end

wire sw7_pushed;

sw_push_detect sw_push_detect_inst(
.clock(sysclk),
.reset(reset_sync),

.SW(SW7),
.sw_pushed(sw7_pushed)
);

reg [31:0] state;
localparam WAIT_200US = (0);
localparam WAIT_500US = (WAIT_200US + 1);
localparam WAIT_110NS = (WAIT_500US + 1);
localparam ISSUE_MR2 = (WAIT_110NS + 1);
localparam WAIT_MR2 = (ISSUE_MR2 + 1);
localparam ISSUE_MR3 = (WAIT_MR2 + 1);
localparam WAIT_MR3 = (ISSUE_MR3 + 1);
localparam ISSUE_MR1 = (WAIT_MR3 + 1);
localparam WAIT_MR1 = (ISSUE_MR1 + 1);
localparam ISSUE_MR0 = (WAIT_MR1 + 1);
localparam WAIT_MR0 = (ISSUE_MR0 + 1);
localparam ISSUE_ZQCL = (WAIT_MR0 + 1);
localparam WAIT_ZQCL = (ISSUE_ZQCL + 1);
localparam ISSUE_MR1_WRITE_LEVELIZATION = (WAIT_ZQCL + 1);
localparam WAIT_MR1_WRITE_LEVELIZATION = (ISSUE_MR1_WRITE_LEVELIZATION + 1);
localparam TURN_ON_ODT = (WAIT_MR1_WRITE_LEVELIZATION + 1);
localparam WAIT_ODTL = (TURN_ON_ODT + 1);
localparam WAIT_TWLDQSEN = (WAIT_ODTL + 1);
localparam WAIT_TWLMRD = (WAIT_TWLDQSEN + 1);
localparam IDLE = (WAIT_TWLMRD + 1);

reg [18:0] count;

(* KEEP = "TRUE" *)
wire [4:0] cntvalueout [7:0];

reg dqs_out_delay_ce;

always @ (posedge sysclk) begin
dqs_out_delay_ce <= 0;
if(sw7_pushed)
dqs_out_delay_ce <= 1;
end

always @ (posedge clkout0 or posedge reset_sync) begin
if(reset_sync) begin
DDR3_RESET_N <= 0;
DDR3_CKE <= 2'h0;
DDR3_ODT <= 2'h0;
DDR3_CS_N <= 2'h3;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 1;
DDR3_BA <= 3'h0;
DDR3_A <= 0;
count <= 0;
state <= WAIT_200US;
dqs_out <= 0;
dqs_tri_p <= {8{1'b1}};
dqs_tri_n <= {8{1'b1}};
dq_out <= 0;
dq_tri <= {64{1'b1}};
end else begin
count <= count + 1;
DDR3_RESET_N <= 1;
DDR3_CKE <= 2'h3;
DDR3_CS_N <= 2'h3;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 1;
DDR3_BA <= 3'h0;
DDR3_A <= 0;
DDR3_ODT <= 2'h0;
dqs_out <= 0;
case (state)
WAIT_200US: begin //initialization step 2, 3 (see micron ddr3 datasheet)
DDR3_RESET_N <= 0;
DDR3_CKE <= 2'h0;
if(count[16]) begin //60000 clocks to get 200us
count <= 0;
state <= WAIT_500US;
end
end
WAIT_500US: begin //initialization step 4, 5 (see micron ddr3 datasheet)
DDR3_CKE <= 2'h0;
if(count[18]) begin //150000 clocks to get 500us
count <= 0;
state <= WAIT_110NS;
end
end
WAIT_110NS: begin //initialization step 5, 6 (see micron ddr3 datasheet)
if(count[6]) //33 clocks to get 110ns
state <= ISSUE_MR2;
end
ISSUE_MR2: begin //initialization step 6 (see micron ddr3 datasheet)
//The MIG disables dynamic ODT
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h2;
count <= 0;
state <= WAIT_MR2;
end
WAIT_MR2: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR3;
end
ISSUE_MR3: begin //initialization step 7 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h3;
count <= 0;
state <= WAIT_MR3;
end
WAIT_MR3: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR1;
end
ISSUE_MR1: begin //initialization step 8 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h1;
//DLL enabled
DDR3_A[0] <= 0;
//According to ML605 example design - HIGH
{DDR3_A[5], DDR3_A[1]} <= 2'h1; //Output Drive Strength
//According to ML605 example design - 40 ohm
{DDR3_A[9], DDR3_A[6], DDR3_A[2]} <= 3'h3; //RTT Nom
//Output enabled (1 == TRISTATED)
DDR3_A[12] <= 0;
count <= 0;
state <= WAIT_MR1;
end
WAIT_MR1: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR0;
end
ISSUE_MR0: begin //initialization step 9 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h0;
//CL = 5
DDR3_A[6:4] <= 1;
//DLL Reset
DDR3_A[8] <= 1;
//WR = 5 = 15ns / 3.3ns
DDR3_A[11:9] <= 1;
count <= 0;
state <= WAIT_MR0;
end
WAIT_MR0: begin //tMRD time between Mode Register updates
if(count[9]) //tDLLK = 512 clocks
state <= ISSUE_ZQCL;
end
ISSUE_ZQCL: begin
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 0;
DDR3_A[10] <= 1;
state <= WAIT_ZQCL;
end
WAIT_ZQCL: begin //tZQinit
if(count[9]) //tZQinit = 512 clocks
state <= ISSUE_MR1_WRITE_LEVELIZATION;
end
ISSUE_MR1_WRITE_LEVELIZATION: begin
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h1;
//DLL enabled
DDR3_A[0] <= 0;
//According to ML605 example design - HIGH
{DDR3_A[5], DDR3_A[1]} <= 2'h1; //Output Drive Strength
//According to ML605 example design - 40 ohm
{DDR3_A[9], DDR3_A[6], DDR3_A[2]} <= 3'h3; //RTT Nom
//Output enabled (1 == TRISTATED)
DDR3_A[12] <= 0;
//Write levelization enabled
DDR3_A[7] <= 1;
count <= 0;
state <= WAIT_MR1_WRITE_LEVELIZATION;
end
WAIT_MR1_WRITE_LEVELIZATION: begin //tMRD time between Mode Register updates
if(count[4]) //tMOD = 12 clocks
state <= TURN_ON_ODT;
end
TURN_ON_ODT: begin
DDR3_ODT <= {2{1'b1}};
count <= 0;
state <= WAIT_ODTL;
end
WAIT_ODTL: begin
DDR3_ODT <= {2{1'b1}};
if(count[2]) begin //ODTL = WL - 2 = 3
count <= 0;
state <= WAIT_TWLDQSEN;
end
end
WAIT_TWLDQSEN: begin
DDR3_ODT <= {2{1'b1}};
if(count[5]) begin //tWLDQSEN = 25
count <= 0;
state <= WAIT_TWLMRD;
end
end
WAIT_TWLMRD: begin
DDR3_ODT <= {2{1'b1}};
if(count[6]) begin //tWLMRD = 40
count <= 0;
state <= IDLE;
end
end
IDLE: begin
DDR3_ODT <= {2{1'b1}};
dqs_out <= {8{1'b1}};
DDR3_ODT <= {2{1'b1}};
count <= 0;
state <= WAIT_TWLDQSEN;
end
endcase
dq_out <= dq_in;
dqs_tri_p <= 0;
dqs_tri_n <= 0;
end
end

assign DDR3_DM = dqs_in_p | dqs_in_n;

IBUFGDS sysclk_ibufgds_inst(
.I(SYSCLK_P),
.IB(SYSCLK_N),
.O(sysclk)
);

//CLKOUT[n] = CLKIN * CLKFBOUT_MULT_F / CLKOUT[n]_DIVIDE_F
//CLKIN = 200 MHz
//Currently configuring CLKOUT0 as (200 * 6) / 4 = 300
//CLKOUT0 = 300 MHz
wire sysclk_mmcm_inst_locked;
MMCM_ADV # (
.BANDWIDTH("OPTIMIZED"),
.CLKFBOUT_MULT_F(6),
.CLKFBOUT_PHASE(0.000),
.CLKIN1_PERIOD(5.000),
.CLKIN2_PERIOD(0),
.CLKOUT0_DIVIDE_F(4.000),
.CLKOUT1_DIVIDE(4),
.CLKOUT2_DIVIDE(1),
.CLKOUT3_DIVIDE(1),
.CLKOUT4_DIVIDE(1),
.CLKOUT5_DIVIDE(1),
.CLKOUT6_DIVIDE(1),
.CLKOUT0_DUTY_CYCLE(0.5),
.CLKOUT1_DUTY_CYCLE(0.5),
.CLKOUT2_DUTY_CYCLE(0.5),
.CLKOUT3_DUTY_CYCLE(0.5),
.CLKOUT4_DUTY_CYCLE(0.5),
.CLKOUT5_DUTY_CYCLE(0.5),
.CLKOUT6_DUTY_CYCLE(0.5),
.CLKOUT0_PHASE(0),
.CLKOUT1_PHASE(-90.0),
.CLKOUT2_PHASE(0),
.CLKOUT3_PHASE(0),
.CLKOUT4_PHASE(0),
.CLKOUT5_PHASE(0),
.CLKOUT6_PHASE(0),
.CLKOUT4_CASCADE("FALSE"),
.CLOCK_HOLD("FALSE"),
.COMPENSATION("INTERNAL"),
.DIVCLK_DIVIDE(1),
.REF_JITTER1(0.005),
.REF_JITTER2(0.005),
.STARTUP_WAIT("FALSE"),
.CLKFBOUT_USE_FINE_PS("TRUE"),
.CLKOUT0_USE_FINE_PS("TRUE"),
.CLKOUT1_USE_FINE_PS("TRUE"),
.CLKOUT2_USE_FINE_PS("FALSE"),
.CLKOUT3_USE_FINE_PS("FALSE"),
.CLKOUT4_USE_FINE_PS("FALSE"),
.CLKOUT5_USE_FINE_PS("FALSE"),
.CLKOUT6_USE_FINE_PS("FALSE")
) sysclk_mmcm_inst(
.CLKFBIN(sysclk_fbout),
.CLKFBOUT(sysclk_fbout),
.CLKFBOUTB(),
.CLKFBSTOPPED(),
.CLKINSEL(1'b1),
.CLKINSTOPPED(),
.CLKIN1(sysclk),
.CLKIN2(1'b0),
.CLKOUT0(clkout0_bufg_in),
.CLKOUT1(clkout1_bufg_in),
.CLKOUT2(),
.CLKOUT3(),
.CLKOUT4(),
.CLKOUT5(),
.CLKOUT6(),
.CLKOUT0B(),
.CLKOUT1B(),
.CLKOUT2B(),
.CLKOUT3B(),
.DADDR(7'h0),
.DCLK(1'b0),
.DEN(1'b0),
.DI(16'h0),
.DO(),
.DRDY(),
.DWE(1'b0),
.LOCKED(sysclk_mmcm_inst_locked),
.PSCLK(1'b0),
.PSDONE(),
.PSEN(1'b0),
.PSINCDEC(1'b0),
.PWRDWN(1'b0),
.RST(1'b0)
);

(* IODELAY_GROUP = "IODELAY_GROUP_INST" *)
IDELAYCTRL idelayctrl_inst(
.RDY(),
.REFCLK(sysclk),
.RST(reset_sync)
);

genvar dqs_i;
generate
for(dqs_i = 0; dqs_i < 8; dqs_i = dqs_i + 1) begin : dqs_gen
wire dqs_out_delay;
(* IODELAY_GROUP = "IODELAY_GROUP_INST" *)
IODELAYE1 # (
.DELAY_SRC("O"),
.IDELAY_TYPE("VARIABLE"),
.ODELAY_TYPE("VARIABLE"),
.ODELAY_VALUE(0),
.REFCLK_FREQUENCY(200.0)
) dqs_iodelaye1_inst(
.C(sysclk),
.CE(dqs_out_delay_ce),
.CINVCTRL(1'b0),
.CLKIN(1'b0),
.CNTVALUEIN(5'h0),
.CNTVALUEOUT(cntvalueout[dqs_i]),
.DATAIN(1'b0),
.DATAOUT(dqs_out_delay),
.IDATAIN(1'b0),
.INC(1'b1),
.ODATAIN(dqs_out[dqs_i]),
.RST(reset_sync),
.T()
);
IOBUFDS_DIFF_OUT dqs_iobufds_diff_out_inst(
.O(dqs_in_p[dqs_i]),
.OB(dqs_in_n[dqs_i]),
.IO(DDR3_DQS_P[dqs_i]),
.IOB(DDR3_DQS_N[dqs_i]),
.I(dqs_out_delay),
.TM(dqs_tri_p[dqs_i]),
.TS(dqs_tri_n[dqs_i])
);
end
endgenerate

genvar dq_i;
generate
for(dq_i = 0; dq_i < 64; dq_i = dq_i + 1) begin : dq_gen
IOBUF dq_iobuf_inst(
.O(dq_in[dq_i]),
.IO(DDR3_DQ[dq_i]),
.I(dq_out[dq_i]),
.T(dq_tri[dq_i])
);
end
endgenerate

genvar clk_i;
generate
for(clk_i = 0; clk_i < 2; clk_i = clk_i + 1) begin : clk_gen
OBUFDS clk_obufds_inst(
.O(DDR3_CLK_P[clk_i]),
.OB(DDR3_CLK_N[clk_i]),
.I(clkout1)
);
end
endgenerate

endmodule


//sw_push_detect.v


module sw_push_detect(
input clock,
input reset,

input SW,
output reg sw_pushed
);

reg [1:0] sw_sample;
localparam PUSHED_LENGTH = (11);
reg [PUSHED_LENGTH - 1:0] pushed;
reg sw_high;

always @ (posedge clock or posedge reset) begin
if(reset) begin
sw_sample <= 0;
pushed <= 0;
sw_pushed <= 0;
end else begin
sw_pushed <= 0;
sw_sample <= {sw_sample[0], SW};
if(pushed != {PUSHED_LENGTH{1'b1}}) begin
if(sw_sample[1])
pushed <= pushed + 1;
end else
sw_high <= 1;
if(pushed > 0) begin
if(~sw_sample[1])
pushed <= pushed - 1;
end else begin
sw_pushed <= sw_high;
sw_high <= 0;
end
end
end

endmodule


//top.ucf


NET "SYSCLK_N" LOC = "H9";
NET "SYSCLK_P" LOC = "J9";
NET "CPU_RESET" LOC = "H10";
NET "SW7" LOC = "G17";

NET "SYSCLK_P" IOSTANDARD = LVDS_25;
NET "SYSCLK_N" IOSTANDARD = LVDS_25;
NET "CPU_RESET" IOSTANDARD = SSTL15; 
NET "SW7" IOSTANDARD = SSTL15; 


NET "DDR3_A[0]" LOC = "L14";
NET "DDR3_A[1]" LOC = "A16";
NET "DDR3_A[2]" LOC = "B16";
NET "DDR3_A[3]" LOC = "E16";
NET "DDR3_A[4]" LOC = "D16";
NET "DDR3_A[5]" LOC = "J17";
NET "DDR3_A[6]" LOC = "A15";
NET "DDR3_A[7]" LOC = "B15";
NET "DDR3_A[8]" LOC = "G15";
NET "DDR3_A[9]" LOC = "F15";
NET "DDR3_A[10]" LOC = "M16";
NET "DDR3_A[11]" LOC = "M15";
NET "DDR3_A[12]" LOC = "H15";
NET "DDR3_A[13]" LOC = "J15";
NET "DDR3_A[14]" LOC = "D15";
NET "DDR3_A[15]" LOC = "C15";
NET "DDR3_BA[0]" LOC = "K19";
NET "DDR3_BA[1]" LOC = "J19";
NET "DDR3_BA[2]" LOC = "L15";
NET "DDR3_CAS_N" LOC = "C17";
NET "DDR3_CKE[0]" LOC = "M18";
NET "DDR3_CKE[1]" LOC = "M17";
NET "DDR3_CLK_N[0]" LOC = "H18";
NET "DDR3_CLK_P[0]" LOC = "G18";
NET "DDR3_CLK_N[1]" LOC = "L16";
NET "DDR3_CLK_P[1]" LOC = "K16";
NET "DDR3_DQ[0]" LOC = "J11";
NET "DDR3_DQ[1]" LOC = "E13";
NET "DDR3_DQ[2]" LOC = "F13";
NET "DDR3_DQ[3]" LOC = "K11";
NET "DDR3_DQ[4]" LOC = "L11";
NET "DDR3_DQ[5]" LOC = "K13";
NET "DDR3_DQ[6]" LOC = "K12";
NET "DDR3_DQ[7]" LOC = "D11";
NET "DDR3_DQ[8]" LOC = "M13";
NET "DDR3_DQ[9]" LOC = "J14";
NET "DDR3_DQ[10]" LOC = "B13";
NET "DDR3_DQ[11]" LOC = "B12";
NET "DDR3_DQ[12]" LOC = "G10";
NET "DDR3_DQ[13]" LOC = "M11";
NET "DDR3_DQ[14]" LOC = "C12";
NET "DDR3_DQ[15]" LOC = "A11";
NET "DDR3_DQ[16]" LOC = "G11";
NET "DDR3_DQ[17]" LOC = "F11";
NET "DDR3_DQ[18]" LOC = "D14";
NET "DDR3_DQ[19]" LOC = "C14";
NET "DDR3_DQ[20]" LOC = "G12";
NET "DDR3_DQ[21]" LOC = "G13";
NET "DDR3_DQ[22]" LOC = "F14";
NET "DDR3_DQ[23]" LOC = "H14";
NET "DDR3_DQ[24]" LOC = "C19";
NET "DDR3_DQ[25]" LOC = "G20";
NET "DDR3_DQ[26]" LOC = "E19";
NET "DDR3_DQ[27]" LOC = "F20";
NET "DDR3_DQ[28]" LOC = "A20";
NET "DDR3_DQ[29]" LOC = "A21";
NET "DDR3_DQ[30]" LOC = "E22";
NET "DDR3_DQ[31]" LOC = "E23";
NET "DDR3_DQ[32]" LOC = "G21";
NET "DDR3_DQ[33]" LOC = "B21";
NET "DDR3_DQ[34]" LOC = "A23";
NET "DDR3_DQ[35]" LOC = "A24";
NET "DDR3_DQ[36]" LOC = "C20";
NET "DDR3_DQ[37]" LOC = "D20";
NET "DDR3_DQ[38]" LOC = "J20";
NET "DDR3_DQ[39]" LOC = "G22";
NET "DDR3_DQ[40]" LOC = "D26";
NET "DDR3_DQ[41]" LOC = "F26";
NET "DDR3_DQ[42]" LOC = "B26";
NET "DDR3_DQ[43]" LOC = "E26";
NET "DDR3_DQ[44]" LOC = "C24";
NET "DDR3_DQ[45]" LOC = "D25";
NET "DDR3_DQ[46]" LOC = "D27";
NET "DDR3_DQ[47]" LOC = "C25";
NET "DDR3_DQ[48]" LOC = "C27";
NET "DDR3_DQ[49]" LOC = "B28";
NET "DDR3_DQ[50]" LOC = "D29";
NET "DDR3_DQ[51]" LOC = "B27";
NET "DDR3_DQ[52]" LOC = "G27";
NET "DDR3_DQ[53]" LOC = "A28";
NET "DDR3_DQ[54]" LOC = "E24";
NET "DDR3_DQ[55]" LOC = "G25";
NET "DDR3_DQ[56]" LOC = "F28";
NET "DDR3_DQ[57]" LOC = "B31";
NET "DDR3_DQ[58]" LOC = "H29";
NET "DDR3_DQ[59]" LOC = "H28";
NET "DDR3_DQ[60]" LOC = "B30";
NET "DDR3_DQ[61]" LOC = "A30";
NET "DDR3_DQ[62]" LOC = "E29";
NET "DDR3_DQ[63]" LOC = "F29";
NET "DDR3_DM[0]" LOC = "E11";
NET "DDR3_DM[1]" LOC = "B11";
NET "DDR3_DM[2]" LOC = "E14";
NET "DDR3_DM[3]" LOC = "D19";
NET "DDR3_DM[4]" LOC = "B22";
NET "DDR3_DM[5]" LOC = "A26";
NET "DDR3_DM[6]" LOC = "A29";
NET "DDR3_DM[7]" LOC = "A31";
NET "DDR3_DQS_N[0]" LOC = "E12";
NET "DDR3_DQS_P[0]" LOC = "D12";
NET "DDR3_DQS_N[1]" LOC = "J12";
NET "DDR3_DQS_P[1]" LOC = "H12";
NET "DDR3_DQS_N[2]" LOC = "A14";
NET "DDR3_DQS_P[2]" LOC = "A13";
NET "DDR3_DQS_N[3]" LOC = "H20";
NET "DDR3_DQS_P[3]" LOC = "H19";
NET "DDR3_DQS_N[4]" LOC = "C23";
NET "DDR3_DQS_P[4]" LOC = "B23";
NET "DDR3_DQS_N[5]" LOC = "A25";
NET "DDR3_DQS_P[5]" LOC = "B25";
NET "DDR3_DQS_N[6]" LOC = "G28";
NET "DDR3_DQS_P[6]" LOC = "H27";
NET "DDR3_DQS_N[7]" LOC = "D30";
NET "DDR3_DQS_P[7]" LOC = "C30";
NET "DDR3_ODT[0]" LOC = "F18";
NET "DDR3_ODT[1]" LOC = "E17";
NET "DDR3_RAS_N" LOC = "L19";
NET "DDR3_RESET_N" LOC = "E18";
NET "DDR3_CS_N[0]" LOC = "K18";
NET "DDR3_CS_N[1]" LOC = "K17";
NET "DDR3_TEMP_EVENT_N" LOC = "D17";
NET "DDR3_WE_N" LOC = "B17";

NET "DDR3_DQS_P[*]" IOSTANDARD = DIFF_SSTL15_T_DCI;
NET "DDR3_DQS_N[*]" IOSTANDARD = DIFF_SSTL15_T_DCI;
#NET "DDR3_CLK_P[*]" IOSTANDARD = DIFF_SSTL15_T_DCI;
#NET "DDR3_CLK_N[*]" IOSTANDARD = DIFF_SSTL15_T_DCI;
NET "DDR3_CLK_P[*]" IOSTANDARD = DIFF_SSTL15_DCI;
NET "DDR3_CLK_N[*]" IOSTANDARD = DIFF_SSTL15_DCI;

NET "DDR3_DQ[*]" IOSTANDARD = SSTL15_T_DCI;

NET "DDR3_A[*]" IOSTANDARD = SSTL15;
NET "DDR3_BA[*]" IOSTANDARD = SSTL15;
NET "DDR3_RAS_N" IOSTANDARD = SSTL15;
NET "DDR3_CAS_N" IOSTANDARD = SSTL15;
NET "DDR3_WE_N" IOSTANDARD = SSTL15;
NET "DDR3_RESET_N" IOSTANDARD = LVCMOS15;
NET "DDR3_CS_N[*]" IOSTANDARD = SSTL15;
NET "DDR3_ODT[*]" IOSTANDARD = SSTL15;
NET "DDR3_CKE[*]" IOSTANDARD = SSTL15;
NET "DDR3_DM[*]" IOSTANDARD = SSTL15;

NET "DDR3_TEMP_EVENT_N" IOSTANDARD = LVCMOS15;

CONFIG DCI_CASCADE = "36 35";
CONFIG DCI_CASCADE = "26 25";