]>
Commit | Line | Data |
---|---|---|
58e5e9af KG |
1 | /* |
2 | * Copyright 2008 Freescale Semiconductor, Inc. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * Version 2 as published by the Free Software Foundation. | |
7 | */ | |
8 | ||
9 | #include <common.h> | |
10 | #include <asm/fsl_ddr_sdram.h> | |
11 | ||
12 | #include "ddr.h" | |
13 | ||
c360ceac DL |
14 | unsigned int |
15 | compute_cas_latency_ddr3(const dimm_params_t *dimm_params, | |
16 | common_timing_params_t *outpdimm, | |
17 | unsigned int number_of_dimms) | |
18 | { | |
19 | unsigned int i; | |
20 | unsigned int tAAmin_ps = 0; | |
21 | unsigned int tCKmin_X_ps = 0; | |
22 | unsigned int common_caslat; | |
23 | unsigned int caslat_actual; | |
24 | unsigned int retry = 16; | |
25 | unsigned int tmp; | |
26 | const unsigned int mclk_ps = get_memory_clk_period_ps(); | |
27 | ||
28 | /* compute the common CAS latency supported between slots */ | |
29 | tmp = dimm_params[0].caslat_X; | |
30 | for (i = 1; i < number_of_dimms; i++) | |
31 | tmp &= dimm_params[i].caslat_X; | |
32 | common_caslat = tmp; | |
33 | ||
34 | /* compute the max tAAmin tCKmin between slots */ | |
35 | for (i = 0; i < number_of_dimms; i++) { | |
36 | tAAmin_ps = max(tAAmin_ps, dimm_params[i].tAA_ps); | |
37 | tCKmin_X_ps = max(tCKmin_X_ps, dimm_params[i].tCKmin_X_ps); | |
38 | } | |
39 | /* validate if the memory clk is in the range of dimms */ | |
40 | if (mclk_ps < tCKmin_X_ps) { | |
41 | printf("The DIMM max tCKmin is %d ps," | |
42 | "doesn't support the MCLK cycle %d ps\n", | |
43 | tCKmin_X_ps, mclk_ps); | |
44 | return 1; | |
45 | } | |
46 | /* determine the acutal cas latency */ | |
47 | caslat_actual = (tAAmin_ps + mclk_ps - 1) / mclk_ps; | |
48 | /* check if the dimms support the CAS latency */ | |
49 | while (!(common_caslat & (1 << caslat_actual)) && retry > 0) { | |
50 | caslat_actual++; | |
51 | retry--; | |
52 | } | |
53 | /* once the caculation of caslat_actual is completed | |
54 | * we must verify that this CAS latency value does not | |
55 | * exceed tAAmax, which is 20 ns for all DDR3 speed grades | |
56 | */ | |
57 | if (caslat_actual * mclk_ps > 20000) { | |
58 | printf("The choosen cas latency %d is too large\n", | |
59 | caslat_actual); | |
60 | return 1; | |
61 | } | |
62 | outpdimm->lowest_common_SPD_caslat = caslat_actual; | |
63 | ||
64 | return 0; | |
65 | } | |
66 | ||
58e5e9af KG |
67 | /* |
68 | * compute_lowest_common_dimm_parameters() | |
69 | * | |
70 | * Determine the worst-case DIMM timing parameters from the set of DIMMs | |
71 | * whose parameters have been computed into the array pointed to | |
72 | * by dimm_params. | |
73 | */ | |
74 | unsigned int | |
75 | compute_lowest_common_dimm_parameters(const dimm_params_t *dimm_params, | |
76 | common_timing_params_t *outpdimm, | |
77 | unsigned int number_of_dimms) | |
78 | { | |
79 | unsigned int i; | |
80 | ||
81 | unsigned int tCKmin_X_ps = 0; | |
82 | unsigned int tCKmax_ps = 0xFFFFFFFF; | |
83 | unsigned int tCKmax_max_ps = 0; | |
84 | unsigned int tRCD_ps = 0; | |
85 | unsigned int tRP_ps = 0; | |
86 | unsigned int tRAS_ps = 0; | |
87 | unsigned int tWR_ps = 0; | |
88 | unsigned int tWTR_ps = 0; | |
89 | unsigned int tRFC_ps = 0; | |
90 | unsigned int tRRD_ps = 0; | |
91 | unsigned int tRC_ps = 0; | |
92 | unsigned int refresh_rate_ps = 0; | |
93 | unsigned int tIS_ps = 0; | |
94 | unsigned int tIH_ps = 0; | |
95 | unsigned int tDS_ps = 0; | |
96 | unsigned int tDH_ps = 0; | |
97 | unsigned int tRTP_ps = 0; | |
98 | unsigned int tDQSQ_max_ps = 0; | |
99 | unsigned int tQHS_ps = 0; | |
100 | ||
101 | unsigned int temp1, temp2; | |
58e5e9af | 102 | unsigned int additive_latency = 0; |
c360ceac | 103 | #if !defined(CONFIG_FSL_DDR3) |
58e5e9af | 104 | const unsigned int mclk_ps = get_memory_clk_period_ps(); |
c360ceac | 105 | unsigned int lowest_good_caslat; |
58e5e9af KG |
106 | unsigned int not_ok; |
107 | ||
108 | debug("using mclk_ps = %u\n", mclk_ps); | |
c360ceac | 109 | #endif |
58e5e9af KG |
110 | |
111 | temp1 = 0; | |
112 | for (i = 0; i < number_of_dimms; i++) { | |
113 | /* | |
114 | * If there are no ranks on this DIMM, | |
115 | * it probably doesn't exist, so skip it. | |
116 | */ | |
117 | if (dimm_params[i].n_ranks == 0) { | |
118 | temp1++; | |
119 | continue; | |
120 | } | |
121 | ||
122 | /* | |
123 | * Find minimum tCKmax_ps to find fastest slow speed, | |
124 | * i.e., this is the slowest the whole system can go. | |
125 | */ | |
126 | tCKmax_ps = min(tCKmax_ps, dimm_params[i].tCKmax_ps); | |
127 | ||
128 | /* Either find maximum value to determine slowest | |
129 | * speed, delay, time, period, etc */ | |
130 | tCKmin_X_ps = max(tCKmin_X_ps, dimm_params[i].tCKmin_X_ps); | |
131 | tCKmax_max_ps = max(tCKmax_max_ps, dimm_params[i].tCKmax_ps); | |
132 | tRCD_ps = max(tRCD_ps, dimm_params[i].tRCD_ps); | |
133 | tRP_ps = max(tRP_ps, dimm_params[i].tRP_ps); | |
134 | tRAS_ps = max(tRAS_ps, dimm_params[i].tRAS_ps); | |
135 | tWR_ps = max(tWR_ps, dimm_params[i].tWR_ps); | |
136 | tWTR_ps = max(tWTR_ps, dimm_params[i].tWTR_ps); | |
137 | tRFC_ps = max(tRFC_ps, dimm_params[i].tRFC_ps); | |
138 | tRRD_ps = max(tRRD_ps, dimm_params[i].tRRD_ps); | |
139 | tRC_ps = max(tRC_ps, dimm_params[i].tRC_ps); | |
140 | tIS_ps = max(tIS_ps, dimm_params[i].tIS_ps); | |
141 | tIH_ps = max(tIH_ps, dimm_params[i].tIH_ps); | |
142 | tDS_ps = max(tDS_ps, dimm_params[i].tDS_ps); | |
143 | tDH_ps = max(tDH_ps, dimm_params[i].tDH_ps); | |
144 | tRTP_ps = max(tRTP_ps, dimm_params[i].tRTP_ps); | |
145 | tQHS_ps = max(tQHS_ps, dimm_params[i].tQHS_ps); | |
146 | refresh_rate_ps = max(refresh_rate_ps, | |
147 | dimm_params[i].refresh_rate_ps); | |
148 | ||
149 | /* | |
150 | * Find maximum tDQSQ_max_ps to find slowest. | |
151 | * | |
152 | * FIXME: is finding the slowest value the correct | |
153 | * strategy for this parameter? | |
154 | */ | |
155 | tDQSQ_max_ps = max(tDQSQ_max_ps, dimm_params[i].tDQSQ_max_ps); | |
156 | } | |
157 | ||
158 | outpdimm->ndimms_present = number_of_dimms - temp1; | |
159 | ||
160 | if (temp1 == number_of_dimms) { | |
161 | debug("no dimms this memory controller\n"); | |
162 | return 0; | |
163 | } | |
164 | ||
165 | outpdimm->tCKmin_X_ps = tCKmin_X_ps; | |
166 | outpdimm->tCKmax_ps = tCKmax_ps; | |
167 | outpdimm->tCKmax_max_ps = tCKmax_max_ps; | |
168 | outpdimm->tRCD_ps = tRCD_ps; | |
169 | outpdimm->tRP_ps = tRP_ps; | |
170 | outpdimm->tRAS_ps = tRAS_ps; | |
171 | outpdimm->tWR_ps = tWR_ps; | |
172 | outpdimm->tWTR_ps = tWTR_ps; | |
173 | outpdimm->tRFC_ps = tRFC_ps; | |
174 | outpdimm->tRRD_ps = tRRD_ps; | |
175 | outpdimm->tRC_ps = tRC_ps; | |
176 | outpdimm->refresh_rate_ps = refresh_rate_ps; | |
177 | outpdimm->tIS_ps = tIS_ps; | |
178 | outpdimm->tIH_ps = tIH_ps; | |
179 | outpdimm->tDS_ps = tDS_ps; | |
180 | outpdimm->tDH_ps = tDH_ps; | |
181 | outpdimm->tRTP_ps = tRTP_ps; | |
182 | outpdimm->tDQSQ_max_ps = tDQSQ_max_ps; | |
183 | outpdimm->tQHS_ps = tQHS_ps; | |
184 | ||
185 | /* Determine common burst length for all DIMMs. */ | |
186 | temp1 = 0xff; | |
187 | for (i = 0; i < number_of_dimms; i++) { | |
188 | if (dimm_params[i].n_ranks) { | |
189 | temp1 &= dimm_params[i].burst_lengths_bitmask; | |
190 | } | |
191 | } | |
192 | outpdimm->all_DIMMs_burst_lengths_bitmask = temp1; | |
193 | ||
194 | /* Determine if all DIMMs registered buffered. */ | |
195 | temp1 = temp2 = 0; | |
196 | for (i = 0; i < number_of_dimms; i++) { | |
197 | if (dimm_params[i].n_ranks) { | |
198 | if (dimm_params[i].registered_dimm) | |
199 | temp1 = 1; | |
200 | if (!dimm_params[i].registered_dimm) | |
201 | temp2 = 1; | |
202 | } | |
203 | } | |
204 | ||
205 | outpdimm->all_DIMMs_registered = 0; | |
206 | if (temp1 && !temp2) { | |
207 | outpdimm->all_DIMMs_registered = 1; | |
208 | } | |
209 | ||
210 | outpdimm->all_DIMMs_unbuffered = 0; | |
211 | if (!temp1 && temp2) { | |
212 | outpdimm->all_DIMMs_unbuffered = 1; | |
213 | } | |
214 | ||
215 | /* CHECKME: */ | |
216 | if (!outpdimm->all_DIMMs_registered | |
217 | && !outpdimm->all_DIMMs_unbuffered) { | |
218 | printf("ERROR: Mix of registered buffered and unbuffered " | |
219 | "DIMMs detected!\n"); | |
220 | } | |
221 | ||
c360ceac DL |
222 | #if defined(CONFIG_FSL_DDR3) |
223 | if (compute_cas_latency_ddr3(dimm_params, outpdimm, number_of_dimms)) | |
224 | return 1; | |
225 | #else | |
58e5e9af KG |
226 | /* |
227 | * Compute a CAS latency suitable for all DIMMs | |
228 | * | |
229 | * Strategy for SPD-defined latencies: compute only | |
230 | * CAS latency defined by all DIMMs. | |
231 | */ | |
232 | ||
233 | /* | |
234 | * Step 1: find CAS latency common to all DIMMs using bitwise | |
235 | * operation. | |
236 | */ | |
237 | temp1 = 0xFF; | |
238 | for (i = 0; i < number_of_dimms; i++) { | |
239 | if (dimm_params[i].n_ranks) { | |
240 | temp2 = 0; | |
241 | temp2 |= 1 << dimm_params[i].caslat_X; | |
242 | temp2 |= 1 << dimm_params[i].caslat_X_minus_1; | |
243 | temp2 |= 1 << dimm_params[i].caslat_X_minus_2; | |
244 | /* | |
245 | * FIXME: If there was no entry for X-2 (X-1) in | |
246 | * the SPD, then caslat_X_minus_2 | |
247 | * (caslat_X_minus_1) contains either 255 or | |
248 | * 0xFFFFFFFF because that's what the glorious | |
249 | * __ilog2 function returns for an input of 0. | |
250 | * On 32-bit PowerPC, left shift counts with bit | |
251 | * 26 set (that the value of 255 or 0xFFFFFFFF | |
252 | * will have), cause the destination register to | |
253 | * be 0. That is why this works. | |
254 | */ | |
255 | temp1 &= temp2; | |
256 | } | |
257 | } | |
258 | ||
259 | /* | |
260 | * Step 2: check each common CAS latency against tCK of each | |
261 | * DIMM's SPD. | |
262 | */ | |
263 | lowest_good_caslat = 0; | |
264 | temp2 = 0; | |
265 | while (temp1) { | |
266 | not_ok = 0; | |
267 | temp2 = __ilog2(temp1); | |
268 | debug("checking common caslat = %u\n", temp2); | |
269 | ||
270 | /* Check if this CAS latency will work on all DIMMs at tCK. */ | |
271 | for (i = 0; i < number_of_dimms; i++) { | |
272 | if (!dimm_params[i].n_ranks) { | |
273 | continue; | |
274 | } | |
275 | if (dimm_params[i].caslat_X == temp2) { | |
276 | if (mclk_ps >= dimm_params[i].tCKmin_X_ps) { | |
277 | debug("CL = %u ok on DIMM %u at tCK=%u" | |
278 | " ps with its tCKmin_X_ps of %u\n", | |
279 | temp2, i, mclk_ps, | |
280 | dimm_params[i].tCKmin_X_ps); | |
281 | continue; | |
282 | } else { | |
283 | not_ok++; | |
284 | } | |
285 | } | |
286 | ||
287 | if (dimm_params[i].caslat_X_minus_1 == temp2) { | |
288 | unsigned int tCKmin_X_minus_1_ps | |
289 | = dimm_params[i].tCKmin_X_minus_1_ps; | |
290 | if (mclk_ps >= tCKmin_X_minus_1_ps) { | |
291 | debug("CL = %u ok on DIMM %u at " | |
292 | "tCK=%u ps with its " | |
293 | "tCKmin_X_minus_1_ps of %u\n", | |
294 | temp2, i, mclk_ps, | |
295 | tCKmin_X_minus_1_ps); | |
296 | continue; | |
297 | } else { | |
298 | not_ok++; | |
299 | } | |
300 | } | |
301 | ||
302 | if (dimm_params[i].caslat_X_minus_2 == temp2) { | |
303 | unsigned int tCKmin_X_minus_2_ps | |
304 | = dimm_params[i].tCKmin_X_minus_2_ps; | |
305 | if (mclk_ps >= tCKmin_X_minus_2_ps) { | |
306 | debug("CL = %u ok on DIMM %u at " | |
307 | "tCK=%u ps with its " | |
308 | "tCKmin_X_minus_2_ps of %u\n", | |
309 | temp2, i, mclk_ps, | |
310 | tCKmin_X_minus_2_ps); | |
311 | continue; | |
312 | } else { | |
313 | not_ok++; | |
314 | } | |
315 | } | |
316 | } | |
317 | ||
318 | if (!not_ok) { | |
319 | lowest_good_caslat = temp2; | |
320 | } | |
321 | ||
322 | temp1 &= ~(1 << temp2); | |
323 | } | |
324 | ||
325 | debug("lowest common SPD-defined CAS latency = %u\n", | |
326 | lowest_good_caslat); | |
327 | outpdimm->lowest_common_SPD_caslat = lowest_good_caslat; | |
328 | ||
329 | ||
330 | /* | |
331 | * Compute a common 'de-rated' CAS latency. | |
332 | * | |
333 | * The strategy here is to find the *highest* dereated cas latency | |
334 | * with the assumption that all of the DIMMs will support a dereated | |
335 | * CAS latency higher than or equal to their lowest dereated value. | |
336 | */ | |
337 | temp1 = 0; | |
338 | for (i = 0; i < number_of_dimms; i++) { | |
339 | temp1 = max(temp1, dimm_params[i].caslat_lowest_derated); | |
340 | } | |
341 | outpdimm->highest_common_derated_caslat = temp1; | |
342 | debug("highest common dereated CAS latency = %u\n", temp1); | |
c360ceac | 343 | #endif /* #if defined(CONFIG_FSL_DDR3) */ |
58e5e9af KG |
344 | |
345 | /* Determine if all DIMMs ECC capable. */ | |
346 | temp1 = 1; | |
347 | for (i = 0; i < number_of_dimms; i++) { | |
348 | if (dimm_params[i].n_ranks && dimm_params[i].edc_config != 2) { | |
349 | temp1 = 0; | |
350 | break; | |
351 | } | |
352 | } | |
353 | if (temp1) { | |
354 | debug("all DIMMs ECC capable\n"); | |
355 | } else { | |
356 | debug("Warning: not all DIMMs ECC capable, cant enable ECC\n"); | |
357 | } | |
358 | outpdimm->all_DIMMs_ECC_capable = temp1; | |
359 | ||
c360ceac | 360 | #ifndef CONFIG_FSL_DDR3 |
58e5e9af KG |
361 | /* FIXME: move to somewhere else to validate. */ |
362 | if (mclk_ps > tCKmax_max_ps) { | |
363 | printf("Warning: some of the installed DIMMs " | |
364 | "can not operate this slowly.\n"); | |
365 | return 1; | |
366 | } | |
c360ceac | 367 | #endif |
58e5e9af KG |
368 | /* |
369 | * Compute additive latency. | |
370 | * | |
371 | * For DDR1, additive latency should be 0. | |
372 | * | |
373 | * For DDR2, with ODT enabled, use "a value" less than ACTTORW, | |
374 | * which comes from Trcd, and also note that: | |
375 | * add_lat + caslat must be >= 4 | |
376 | * | |
c360ceac | 377 | * For DDR3, we use the AL=0 |
58e5e9af KG |
378 | * |
379 | * When to use additive latency for DDR2: | |
380 | * | |
381 | * I. Because you are using CL=3 and need to do ODT on writes and | |
382 | * want functionality. | |
383 | * 1. Are you going to use ODT? (Does your board not have | |
384 | * additional termination circuitry for DQ, DQS, DQS_, | |
385 | * DM, RDQS, RDQS_ for x4/x8 configs?) | |
386 | * 2. If so, is your lowest supported CL going to be 3? | |
387 | * 3. If so, then you must set AL=1 because | |
388 | * | |
389 | * WL >= 3 for ODT on writes | |
390 | * RL = AL + CL | |
391 | * WL = RL - 1 | |
392 | * -> | |
393 | * WL = AL + CL - 1 | |
394 | * AL + CL - 1 >= 3 | |
395 | * AL + CL >= 4 | |
396 | * QED | |
397 | * | |
398 | * RL >= 3 for ODT on reads | |
399 | * RL = AL + CL | |
400 | * | |
401 | * Since CL aren't usually less than 2, AL=0 is a minimum, | |
402 | * so the WL-derived AL should be the -- FIXME? | |
403 | * | |
404 | * II. Because you are using auto-precharge globally and want to | |
405 | * use additive latency (posted CAS) to get more bandwidth. | |
406 | * 1. Are you going to use auto-precharge mode globally? | |
407 | * | |
408 | * Use addtivie latency and compute AL to be 1 cycle less than | |
409 | * tRCD, i.e. the READ or WRITE command is in the cycle | |
410 | * immediately following the ACTIVATE command.. | |
411 | * | |
412 | * III. Because you feel like it or want to do some sort of | |
413 | * degraded-performance experiment. | |
414 | * 1. Do you just want to use additive latency because you feel | |
415 | * like it? | |
416 | * | |
417 | * Validation: AL is less than tRCD, and within the other | |
418 | * read-to-precharge constraints. | |
419 | */ | |
420 | ||
421 | additive_latency = 0; | |
422 | ||
423 | #if defined(CONFIG_FSL_DDR2) | |
424 | if (lowest_good_caslat < 4) { | |
425 | additive_latency = picos_to_mclk(tRCD_ps) - lowest_good_caslat; | |
426 | if (mclk_to_picos(additive_latency) > tRCD_ps) { | |
427 | additive_latency = picos_to_mclk(tRCD_ps); | |
428 | debug("setting additive_latency to %u because it was " | |
429 | " greater than tRCD_ps\n", additive_latency); | |
430 | } | |
431 | } | |
432 | ||
433 | #elif defined(CONFIG_FSL_DDR3) | |
c360ceac DL |
434 | /* |
435 | * The system will not use the global auto-precharge mode. | |
436 | * However, it uses the page mode, so we set AL=0 | |
437 | */ | |
438 | additive_latency = 0; | |
58e5e9af KG |
439 | #endif |
440 | ||
441 | /* | |
442 | * Validate additive latency | |
443 | * FIXME: move to somewhere else to validate | |
444 | * | |
445 | * AL <= tRCD(min) | |
446 | */ | |
447 | if (mclk_to_picos(additive_latency) > tRCD_ps) { | |
448 | printf("Error: invalid additive latency exceeds tRCD(min).\n"); | |
449 | return 1; | |
450 | } | |
451 | ||
452 | /* | |
453 | * RL = CL + AL; RL >= 3 for ODT_RD_CFG to be enabled | |
454 | * WL = RL - 1; WL >= 3 for ODT_WL_CFG to be enabled | |
455 | * ADD_LAT (the register) must be set to a value less | |
456 | * than ACTTORW if WL = 1, then AL must be set to 1 | |
457 | * RD_TO_PRE (the register) must be set to a minimum | |
458 | * tRTP + AL if AL is nonzero | |
459 | */ | |
460 | ||
461 | /* | |
462 | * Additive latency will be applied only if the memctl option to | |
463 | * use it. | |
464 | */ | |
465 | outpdimm->additive_latency = additive_latency; | |
466 | ||
467 | return 0; | |
468 | } |