Under the topic of 'Write Leveling Procedure':
After tWLMRD and a DQS LOW preamble (tWPRE) have been satisfied...
In the figure below, there is no reference to tWPRE, and the spot which should have tWPRE actually has tDQSL. tWPRE is defined as 0.9 clocks, tDQSL is defined as 0.45 clocks. tWPRE is shown to include both the high and low (making a whole clock) of the DQS, yet it implies under write leveling that it is the low only.
Also for tWLS and tWLH, look at the JEDEC waveform. It's obvious what they mean, but it's shown very badly in the Micron figure.
-----
It is very unclear what tWLOE is meant to do. According to the JEDEC specification a DDR3 chip may choose to drive write leveling on all DQs and not just the prime DQs. This doesn't explain why I care about early or late DQs... I don't know. Perhaps I'll realize this later.
-----
Here is a version that uses wl.v - a module that takes care of write leveling. I am now detecting the separate DQs for each DQS, and I am incrementing each individually. Some things to note are that this is still very primitive!!! And of course that there are 2 clock domains here, the 200 MHz iodelay reference clock, and the 300 MHz DDR3 clock. Other than some sampling and delta compares, the code is very straightforward.
wl.v is new, and top.v is slightly modified. Of course the SW7 button is no longer used. It's pretty cool watching this in Chipscope.
wl.v:
//clock is currently 200 MHz (5ns period)
module wl(
input iodelay_clk,
input iodelay_rst,
output reg [7:0] wl_ce,
input start,
output reg done,
input ddr3_clk,
input ddr3_rst,
output reg [7:0] wl_dqs_out,
input [7:0] wl_dq_primes_in
);
reg [31:0] dqs_state;
reg [31:0] dqs_next_state;
localparam DQS_LOW = (0);
localparam DQS_HIGH = (DQS_LOW + 1);
reg [3:0] wl_dqs_go;
always @ (posedge ddr3_clk) begin
wl_dqs_go[3:1] <= wl_dqs_go[2:0];
end
always @ (posedge ddr3_clk or posedge ddr3_rst) begin
if(ddr3_rst) begin
dqs_state <= DQS_LOW;
wl_dqs_out <= 0;
end else begin
dqs_state <= dqs_next_state;
wl_dqs_out <= 0;
case (dqs_state)
DQS_HIGH: begin
wl_dqs_out <= {8{1'b1}};
end
endcase
end
end
always @ (*) begin
dqs_next_state = DQS_LOW;
case (dqs_state)
DQS_LOW: begin
dqs_next_state = DQS_LOW;
if(wl_dqs_go[3] ^ wl_dqs_go[2])
dqs_next_state = DQS_HIGH;
end
DQS_HIGH: begin
dqs_next_state = DQS_LOW;
end
endcase
end
reg [31:0] state;
reg [31:0] next_state;
localparam IDLE = (0);
localparam CHECK_0 = (IDLE + 1);
localparam INCREMENT_0 = (CHECK_0 + 1);
localparam WAIT_INCREMENT_0 = (INCREMENT_0 + 1);
localparam GO_0 = (WAIT_INCREMENT_0 + 1);
localparam WAIT_GO_0 = (GO_0 + 1);
localparam CHECK_1 = (WAIT_GO_0 + 1);
localparam INCREMENT_1 = (CHECK_1 + 1);
localparam WAIT_INCREMENT_1 = (INCREMENT_1 + 1);
localparam GO_1 = (WAIT_INCREMENT_1 + 1);
localparam WAIT_GO_1 = (GO_1 + 1);
localparam DONE = (WAIT_GO_1 + 1);
reg [7:0] wl_dq_primes;
reg [9:0] wait_0;
reg [9:0] wait_1;
reg [2:0] start_in;
always @ (posedge iodelay_clk or posedge iodelay_rst) begin
if(iodelay_rst) begin
state <= IDLE;
wl_dq_primes <= 0;
wait_0 <= 10'b1;
wait_1 <= 10'b1;
wl_ce <= 0;
wl_dqs_go[0] <= 0;
done <= 0;
start_in <= 0;
end else begin
state <= next_state;
//sampled externally, but reading only after 10
//clocks, is stable by then.
wl_dq_primes <= wl_dq_primes_in;
wait_0 <= {wait_0[8:0], wait_0[9]};
wait_1 <= {wait_1[8:0], wait_1[9]};
wl_ce <= 0;
start_in <= {start_in[1:0], start};
case (state)
INCREMENT_0: begin
wl_ce <= wl_dq_primes;
wait_0 <= 1;
end
GO_0: begin
wl_dqs_go[0] <= ~wl_dqs_go[0];
wait_0 <= 1;
end
INCREMENT_1: begin
wl_ce <= ~wl_dq_primes;
wait_1 <= 1;
end
GO_1: begin
wl_dqs_go[0] <= ~wl_dqs_go[0];
wait_1 <= 1;
end
DONE: begin
done <= ~done;
end
endcase
end
end
always @ (*) begin
next_state = IDLE;
case (state)
IDLE: begin
next_state = IDLE;
if(start_in[2] ^ start_in[1])
next_state = CHECK_0;
end
CHECK_0: begin
if(wl_dq_primes == {8{1'b0}})
next_state = INCREMENT_1;
else
next_state = INCREMENT_0;
end
INCREMENT_0: begin
next_state = WAIT_INCREMENT_0;
end
WAIT_INCREMENT_0: begin
next_state = WAIT_INCREMENT_0;
if(wait_0[9])
next_state = GO_0;
end
GO_0: begin
next_state = WAIT_GO_0;
end
WAIT_GO_0: begin
next_state = WAIT_GO_0;
if(wait_0[9])
next_state = CHECK_0;
end
CHECK_1: begin
if(wl_dq_primes == {8{1'b1}})
next_state = DONE;
else
next_state = INCREMENT_1;
end
INCREMENT_1: begin
next_state = WAIT_INCREMENT_1;
end
WAIT_INCREMENT_1: begin
next_state = WAIT_INCREMENT_1;
if(wait_1[9])
next_state = GO_1;
end
GO_1: begin
next_state = WAIT_GO_1;
end
WAIT_GO_1: begin
next_state = WAIT_GO_1;
if(wait_0[9])
next_state = CHECK_1;
end
DONE: begin
next_state = IDLE;
end
endcase
end
endmodule
top.v:
`timescale 1ns / 1ps
module my_debug(
input [1023:0] d,
output [1023:0] q
);
assign q = d;
endmodule
module top(
input SYSCLK_P, //200 MHz
input SYSCLK_N,
input CPU_RESET,
input SW7,
//DDR3 in use: SODIMM: MT4JSF6464HY-1G1B1, 4 Components: MT41J64M16BLA-187E
//Datasheets: jsf4c64x64hy.pdf (SODIMM), 1Gb_DDR3_SDRAM.pdf (Components)
output [1:0] DDR3_CLK_P,
output [1:0] DDR3_CLK_N,
output reg [1:0] DDR3_CKE,
output reg [1:0] DDR3_ODT,
output reg [1:0] DDR3_CS_N,
output reg DDR3_RESET_N,
input DDR3_TEMP_EVENT_N,
output reg DDR3_WE_N,
output reg DDR3_CAS_N,
output reg DDR3_RAS_N,
inout [7:0] DDR3_DQS_N,
inout [7:0] DDR3_DQS_P,
output [7:0] DDR3_DM,
inout [63:0] DDR3_DQ,
output reg [2:0] DDR3_BA,
output reg [15:0] DDR3_A //SODIMM in general has 16 address bits, this SODIMM only has 13... top 3 unused
);
//Current design:
//tCK = 3.3 ns
//CL = 5, CWL = 5
wire sysclk;
wire [7:0] dqs_out;
wire [7:0] dqs_in_p;
wire [7:0] dqs_in_n;
reg [7:0] dqs_tri_p;
reg [7:0] dqs_tri_n;
reg [63:0] dq_out;
(* S = "TRUE" *)
wire [63:0] dq_in;
reg [63:0] dq_tri;
wire clkout0;
wire clkout0_bufg_in;
BUFG clkout0_bufg_inst(
.I(clkout0_bufg_in),
.O(clkout0)
);
wire clkout1;
wire clkout1_bufg_in;
BUFG clkout1_bufg_inst(
.I(clkout1_bufg_in),
.O(clkout1)
);
wire reset;
assign reset = CPU_RESET | ~DDR3_TEMP_EVENT_N | ~sysclk_mmcm_inst_locked;
reg [1:0] clkout0_reset_delay;
wire clkout0_reset_sync;
assign clkout0_reset_sync = clkout0_reset_delay[1];
always @ (posedge clkout0 or posedge reset) begin
if(reset) begin
clkout0_reset_delay <= {2{1'b1}};
end else begin
clkout0_reset_delay <= {clkout0_reset_delay[0], reset};
end
end
reg [1:0] sysclk_reset_delay;
wire sysclk_reset_sync;
assign sysclk_reset_sync = sysclk_reset_delay[1];
always @ (posedge sysclk or posedge reset) begin
if(reset) begin
sysclk_reset_delay <= {2{1'b1}};
end else begin
sysclk_reset_delay <= {sysclk_reset_delay[0], reset};
end
end
wire sw7_pushed;
sw_push_detect sw_push_detect_inst(
.clock(sysclk),
.reset(sysclk_reset_sync),
.SW(SW7),
.sw_pushed(sw7_pushed)
);
reg [31:0] state;
localparam WAIT_200US = (0);
localparam WAIT_500US = (WAIT_200US + 1);
localparam WAIT_110NS = (WAIT_500US + 1);
localparam ISSUE_MR2 = (WAIT_110NS + 1);
localparam WAIT_MR2 = (ISSUE_MR2 + 1);
localparam ISSUE_MR3 = (WAIT_MR2 + 1);
localparam WAIT_MR3 = (ISSUE_MR3 + 1);
localparam ISSUE_MR1 = (WAIT_MR3 + 1);
localparam WAIT_MR1 = (ISSUE_MR1 + 1);
localparam ISSUE_MR0 = (WAIT_MR1 + 1);
localparam WAIT_MR0 = (ISSUE_MR0 + 1);
localparam ISSUE_ZQCL = (WAIT_MR0 + 1);
localparam WAIT_ZQCL = (ISSUE_ZQCL + 1);
localparam ISSUE_MR1_WRITE_LEVELIZATION = (WAIT_ZQCL + 1);
localparam WAIT_MR1_WRITE_LEVELIZATION = (ISSUE_MR1_WRITE_LEVELIZATION + 1);
localparam TURN_ON_ODT = (WAIT_MR1_WRITE_LEVELIZATION + 1);
localparam WAIT_ODTL = (TURN_ON_ODT + 1);
localparam WAIT_TWLDQSEN = (WAIT_ODTL + 1);
localparam WAIT_TWLMRD = (WAIT_TWLDQSEN + 1);
localparam IDLE = (WAIT_TWLMRD + 1);
reg [18:0] count;
(* KEEP = "TRUE" *)
wire [4:0] cntvalueout [7:0];
wire [7:0] dqs_out_delay_ce;
reg start;
(* KEEP = "TRUE" *)
wire done;
wl wl_inst(
.iodelay_clk(sysclk),
.iodelay_rst(sysclk_reset_sync),
.wl_ce(dqs_out_delay_ce),
.start(start),
.done(done),
.ddr3_clk(clkout0),
.ddr3_rst(clkout0_reset_sync),
.wl_dqs_out(dqs_out),
.wl_dq_primes_in({dq_in[57], dq_in[50], dq_in[41], dq_in[38], dq_in[26], dq_in[16], dq_in[10], dq_in[4]})
);
always @ (posedge clkout0 or posedge clkout0_reset_sync) begin
if(clkout0_reset_sync) begin
DDR3_RESET_N <= 0;
DDR3_CKE <= 2'h0;
DDR3_ODT <= 2'h0;
DDR3_CS_N <= 2'h3;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 1;
DDR3_BA <= 3'h0;
DDR3_A <= 0;
count <= 0;
state <= WAIT_200US;
dqs_tri_p <= {8{1'b1}};
dqs_tri_n <= {8{1'b1}};
dq_out <= 0;
dq_tri <= {64{1'b1}};
start <= 0;
end else begin
count <= count + 1;
DDR3_RESET_N <= 1;
DDR3_CKE <= 2'h3;
DDR3_CS_N <= 2'h3;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 1;
DDR3_BA <= 3'h0;
DDR3_A <= 0;
DDR3_ODT <= 2'h0;
dqs_tri_p <= {8{1'b1}};
dqs_tri_n <= {8{1'b1}};
case (state)
WAIT_200US: begin //initialization step 2, 3 (see micron ddr3 datasheet)
DDR3_RESET_N <= 0;
DDR3_CKE <= 2'h0;
if(count[16]) begin //60000 clocks to get 200us
count <= 0;
state <= WAIT_500US;
end
end
WAIT_500US: begin //initialization step 4, 5 (see micron ddr3 datasheet)
DDR3_CKE <= 2'h0;
if(count[18]) begin //150000 clocks to get 500us
count <= 0;
state <= WAIT_110NS;
end
end
WAIT_110NS: begin //initialization step 5, 6 (see micron ddr3 datasheet)
if(count[6]) //33 clocks to get 110ns
state <= ISSUE_MR2;
end
ISSUE_MR2: begin //initialization step 6 (see micron ddr3 datasheet)
//The MIG disables dynamic ODT
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h2;
count <= 0;
state <= WAIT_MR2;
end
WAIT_MR2: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR3;
end
ISSUE_MR3: begin //initialization step 7 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h3;
count <= 0;
state <= WAIT_MR3;
end
WAIT_MR3: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR1;
end
ISSUE_MR1: begin //initialization step 8 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h1;
//DLL enabled
DDR3_A[0] <= 0;
//According to ML605 example design - HIGH
{DDR3_A[5], DDR3_A[1]} <= 2'h1; //Output Drive Strength
//According to ML605 example design - 40 ohm
{DDR3_A[9], DDR3_A[6], DDR3_A[2]} <= 3'h3; //RTT Nom
//Output enabled (1 == TRISTATED)
DDR3_A[12] <= 0;
count <= 0;
state <= WAIT_MR1;
end
WAIT_MR1: begin //tMRD time between Mode Register updates
if(count[2]) //tMRD = 4 clocks
state <= ISSUE_MR0;
end
ISSUE_MR0: begin //initialization step 9 (see micron ddr3 datasheet)
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h0;
//CL = 5
DDR3_A[6:4] <= 1;
//DLL Reset
DDR3_A[8] <= 1;
//WR = 5 = 15ns / 3.3ns
DDR3_A[11:9] <= 1;
count <= 0;
state <= WAIT_MR0;
end
WAIT_MR0: begin //tMRD time between Mode Register updates
if(count[9]) //tDLLK = 512 clocks
state <= ISSUE_ZQCL;
end
ISSUE_ZQCL: begin
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 1;
DDR3_CAS_N <= 1;
DDR3_WE_N <= 0;
DDR3_A[10] <= 1;
state <= WAIT_ZQCL;
end
WAIT_ZQCL: begin //tZQinit
if(count[9]) //tZQinit = 512 clocks
state <= ISSUE_MR1_WRITE_LEVELIZATION;
end
ISSUE_MR1_WRITE_LEVELIZATION: begin
DDR3_CS_N <= 2'h0;
DDR3_RAS_N <= 0;
DDR3_CAS_N <= 0;
DDR3_WE_N <= 0;
DDR3_BA <= 3'h1;
//DLL enabled
DDR3_A[0] <= 0;
//According to ML605 example design - HIGH
{DDR3_A[5], DDR3_A[1]} <= 2'h1; //Output Drive Strength
//According to ML605 example design - 40 ohm
{DDR3_A[9], DDR3_A[6], DDR3_A[2]} <= 3'h3; //RTT Nom
//Output enabled (1 == TRISTATED)
DDR3_A[12] <= 0;
//Write levelization enabled
DDR3_A[7] <= 1;
count <= 0;
state <= WAIT_MR1_WRITE_LEVELIZATION;
end
WAIT_MR1_WRITE_LEVELIZATION: begin //tMRD time between Mode Register updates
if(count[4]) //tMOD = 12 clocks
state <= TURN_ON_ODT;
end
TURN_ON_ODT: begin
DDR3_ODT <= {2{1'b1}};
count <= 0;
state <= WAIT_ODTL;
end
WAIT_ODTL: begin
DDR3_ODT <= {2{1'b1}};
if(count[2]) begin //ODTL = WL - 2 = 3
count <= 0;
state <= WAIT_TWLDQSEN;
end
end
WAIT_TWLDQSEN: begin
DDR3_ODT <= {2{1'b1}};
if(count[5]) begin //tWLDQSEN = 25
count <= 0;
state <= WAIT_TWLMRD;
end
end
WAIT_TWLMRD: begin
DDR3_ODT <= {2{1'b1}};
if(count[6]) begin //tWLMRD = 40
count <= 0;
state <= IDLE;
end
end
IDLE: begin
DDR3_ODT <= {2{1'b1}};
dqs_tri_p <= 0;
dqs_tri_n <= 0;
if(count[8]) begin
start <= ~start;
count <= 0;
end
end
endcase
end
end
assign DDR3_DM = dqs_in_p | dqs_in_n;
IBUFGDS sysclk_ibufgds_inst(
.I(SYSCLK_P),
.IB(SYSCLK_N),
.O(sysclk)
);
//CLKOUT[n] = CLKIN * CLKFBOUT_MULT_F / CLKOUT[n]_DIVIDE_F
//CLKIN = 200 MHz
//Currently configuring CLKOUT0 as (200 * 6) / 4 = 300
//CLKOUT0 = 300 MHz
//CLKOUT1 = 300 MHz @ -90 degrees
wire sysclk_mmcm_inst_locked;
MMCM_ADV # (
.BANDWIDTH("OPTIMIZED"),
.CLKFBOUT_MULT_F(6),
.CLKFBOUT_PHASE(0.000),
.CLKIN1_PERIOD(5.000),
.CLKIN2_PERIOD(0),
.CLKOUT0_DIVIDE_F(4.000),
.CLKOUT1_DIVIDE(4),
.CLKOUT2_DIVIDE(1),
.CLKOUT3_DIVIDE(1),
.CLKOUT4_DIVIDE(1),
.CLKOUT5_DIVIDE(1),
.CLKOUT6_DIVIDE(1),
.CLKOUT0_DUTY_CYCLE(0.5),
.CLKOUT1_DUTY_CYCLE(0.5),
.CLKOUT2_DUTY_CYCLE(0.5),
.CLKOUT3_DUTY_CYCLE(0.5),
.CLKOUT4_DUTY_CYCLE(0.5),
.CLKOUT5_DUTY_CYCLE(0.5),
.CLKOUT6_DUTY_CYCLE(0.5),
.CLKOUT0_PHASE(0),
.CLKOUT1_PHASE(-90.0),
.CLKOUT2_PHASE(0),
.CLKOUT3_PHASE(0),
.CLKOUT4_PHASE(0),
.CLKOUT5_PHASE(0),
.CLKOUT6_PHASE(0),
.CLKOUT4_CASCADE("FALSE"),
.CLOCK_HOLD("FALSE"),
.COMPENSATION("INTERNAL"),
.DIVCLK_DIVIDE(1),
.REF_JITTER1(0.005),
.REF_JITTER2(0.005),
.STARTUP_WAIT("FALSE"),
.CLKFBOUT_USE_FINE_PS("TRUE"),
.CLKOUT0_USE_FINE_PS("TRUE"),
.CLKOUT1_USE_FINE_PS("TRUE"),
.CLKOUT2_USE_FINE_PS("FALSE"),
.CLKOUT3_USE_FINE_PS("FALSE"),
.CLKOUT4_USE_FINE_PS("FALSE"),
.CLKOUT5_USE_FINE_PS("FALSE"),
.CLKOUT6_USE_FINE_PS("FALSE")
) sysclk_mmcm_inst(
.CLKFBIN(sysclk_fbout),
.CLKFBOUT(sysclk_fbout),
.CLKFBOUTB(),
.CLKFBSTOPPED(),
.CLKINSEL(1'b1),
.CLKINSTOPPED(),
.CLKIN1(sysclk),
.CLKIN2(1'b0),
.CLKOUT0(clkout0_bufg_in),
.CLKOUT1(clkout1_bufg_in),
.CLKOUT2(),
.CLKOUT3(),
.CLKOUT4(),
.CLKOUT5(),
.CLKOUT6(),
.CLKOUT0B(),
.CLKOUT1B(),
.CLKOUT2B(),
.CLKOUT3B(),
.DADDR(7'h0),
.DCLK(1'b0),
.DEN(1'b0),
.DI(16'h0),
.DO(),
.DRDY(),
.DWE(1'b0),
.LOCKED(sysclk_mmcm_inst_locked),
.PSCLK(1'b0),
.PSDONE(),
.PSEN(1'b0),
.PSINCDEC(1'b0),
.PWRDWN(1'b0),
.RST(1'b0)
);
(* IODELAY_GROUP = "IODELAY_GROUP_INST" *)
IDELAYCTRL idelayctrl_inst(
.RDY(),
.REFCLK(sysclk),
.RST(sysclk_reset_sync)
);
genvar dqs_i;
generate
for(dqs_i = 0; dqs_i < 8; dqs_i = dqs_i + 1) begin : dqs_gen
wire dqs_out_oddr;
wire dqs_out_delay;
ODDR # (
.DDR_CLK_EDGE("SAME_EDGE")
) dqs_oddr_inst(
.Q(dqs_out_oddr),
.C(clkout0),
.CE(1'b1),
.D1(dqs_out[dqs_i]),
.D2(1'b0),
.R(1'b0),
.S(1'b0)
);
(* IODELAY_GROUP = "IODELAY_GROUP_INST" *)
IODELAYE1 # (
.DELAY_SRC("O"),
.IDELAY_TYPE("VARIABLE"),
.ODELAY_TYPE("VARIABLE"),
.ODELAY_VALUE(0),
.REFCLK_FREQUENCY(200.0),
.SIGNAL_PATTERN("DATA")
) dqs_iodelaye1_inst(
.C(sysclk),
.CE(dqs_out_delay_ce[dqs_i]),
.CINVCTRL(1'b0),
.CLKIN(1'b0),
.CNTVALUEIN(5'h0),
.CNTVALUEOUT(cntvalueout[dqs_i]),
.DATAIN(1'b0),
.DATAOUT(dqs_out_delay),
.IDATAIN(1'b0),
.INC(1'b1),
.ODATAIN(dqs_out_oddr),
.RST(sysclk_reset_sync),
.T()
);
IOBUFDS_DIFF_OUT dqs_iobufds_diff_out_inst(
.O(dqs_in_p[dqs_i]),
.OB(dqs_in_n[dqs_i]),
.IO(DDR3_DQS_P[dqs_i]),
.IOB(DDR3_DQS_N[dqs_i]),
.I(dqs_out_delay),
.TM(dqs_tri_p[dqs_i]),
.TS(dqs_tri_n[dqs_i])
);
end
endgenerate
genvar dq_i;
generate
for(dq_i = 0; dq_i < 64; dq_i = dq_i + 1) begin : dq_gen
IOBUF dq_iobuf_inst(
.O(dq_in[dq_i]),
.IO(DDR3_DQ[dq_i]),
.I(dq_out[dq_i]),
.T(dq_tri[dq_i])
);
end
endgenerate
genvar clk_i;
generate
for(clk_i = 0; clk_i < 2; clk_i = clk_i + 1) begin : clk_gen
OBUFDS clk_obufds_inst(
.O(DDR3_CLK_P[clk_i]),
.OB(DDR3_CLK_N[clk_i]),
.I(clkout1)
);
end
endgenerate
my_debug debug_inst(
.d({cntvalueout[0], dqs_in_p, done}),
.q()
);
endmodule