]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.drivers/cxgb3i-fix-skb-overrun
Merge branch 'master' of git://git.ipfire.org/ipfire-2.x
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.drivers / cxgb3i-fix-skb-overrun
1 Subject: cxgb3i - fixes over-run of skb MAX_SKB_FRAGS
2 From: Karen Xie <kxie@chelsio.com>
3 References: bnc#468314
4
5 This patch fixes the over-run of skb's MAX_SKB_FRAGS between the cxgb3i and
6 cxgb3 driver on PPC64 systems.
7
8 Signed-off-by: Karen Xie <kxie@chelsio.com>
9 Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
10 Acked-by: Hannes Reinecke <hare@suse.de>
11 ---
12
13 diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h
14 --- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i.h 2009-01-14 15:17:57.000000000 -0800
15 +++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i.h 2009-01-26 22:00:17.000000000 -0800
16 @@ -36,6 +36,12 @@
17 #define CXGB3I_MAX_TARGET CXGB3I_MAX_CONN
18 #define CXGB3I_MAX_LUN 512
19 #define ISCSI_PDU_HEADER_MAX (56 + 256) /* bhs + digests + ahs */
20 +#define ULP2_MAX_PKT_SIZE 16224
21 +#define ISCSI_PDU_NONPAYLOAD_MAX \
22 + (sizeof(struct iscsi_hdr) + ISCSI_MAX_AHS_SIZE + 2*ISCSI_DIGEST_SIZE)
23 +#define ULP2_MAX_PDU_PAYLOAD \
24 + (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_MAX)
25 +
26
27 struct cxgb3i_adapter;
28 struct cxgb3i_hba;
29 @@ -53,12 +59,11 @@
30 *
31 */
32 struct cxgb3i_tag_format {
33 - unsigned char idx_bits;
34 - unsigned char age_bits;
35 + unsigned char sw_bits;
36 unsigned char rsvd_bits;
37 unsigned char rsvd_shift;
38 + unsigned char filler[1];
39 u32 rsvd_mask;
40 - u32 rsvd_tag_mask;
41 };
42
43 /**
44 @@ -95,11 +100,137 @@
45 unsigned int ulimit;
46 unsigned int nppods;
47 unsigned int idx_last;
48 + unsigned char idx_bits;
49 + unsigned char filler[3];
50 + u32 idx_mask;
51 + u32 rsvd_tag_mask;
52 spinlock_t map_lock;
53 struct cxgb3i_gather_list **gl_map;
54 struct sk_buff **gl_skb;
55 };
56
57 +/*
58 + * cxgb3i ddp tag are 32 bits, it consists of reserved bits used by h/w and
59 + * non-reserved bits that can be used by the iscsi s/w.
60 + * The reserved bits are identified by the rsvd_bits and rsvd_shift fields
61 + * in struct cxgb3i_tag_format.
62 + *
63 + * The upper most reserved bit can be used to check if a tag is ddp tag or not:
64 + * if the bit is 0, the tag is a valid ddp tag
65 + */
66 +
67 +/**
68 + * cxgb3i_is_ddp_tag - check if a given tag is a hw/ddp tag
69 + * @tformat: tag format information
70 + * @tag: tag to be checked
71 + *
72 + * return true if the tag is a ddp tag, false otherwise.
73 + */
74 +static inline int cxgb3i_is_ddp_tag(struct cxgb3i_tag_format *tformat, u32 tag)
75 +{
76 + return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)));
77 +}
78 +
79 +/**
80 + * cxgb3i_sw_tag_usable - check if a given s/w tag has enough bits left for
81 + * the reserved/hw bits
82 + * @tformat: tag format information
83 + * @sw_tag: s/w tag to be checked
84 + *
85 + * return true if the tag is a ddp tag, false otherwise.
86 + */
87 +static inline int cxgb3i_sw_tag_usable(struct cxgb3i_tag_format *tformat,
88 + u32 sw_tag)
89 +{
90 + sw_tag >>= (32 - tformat->rsvd_bits);
91 + return !sw_tag;
92 +}
93 +
94 +/**
95 + * cxgb3i_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag
96 + * @tformat: tag format information
97 + * @sw_tag: s/w tag to be checked
98 + *
99 + * insert 1 at the upper most reserved bit to mark it as an invalid ddp tag.
100 + */
101 +static inline u32 cxgb3i_set_non_ddp_tag(struct cxgb3i_tag_format *tformat,
102 + u32 sw_tag)
103 +{
104 + unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
105 + u32 mask = (1 << shift) - 1;
106 +
107 + if (sw_tag && (sw_tag & ~mask)) {
108 + u32 v1 = sw_tag & ((1 << shift) - 1);
109 + u32 v2 = (sw_tag >> (shift - 1)) << shift;
110 +
111 + return v2 | v1 | 1 << shift;
112 + }
113 + return sw_tag | 1 << shift;
114 +}
115 +
116 +/**
117 + * cxgb3i_ddp_tag_base - shift the s/w tag bits so that reserved bits are not
118 + * used.
119 + * @tformat: tag format information
120 + * @sw_tag: s/w tag to be checked
121 + */
122 +static inline u32 cxgb3i_ddp_tag_base(struct cxgb3i_tag_format *tformat,
123 + u32 sw_tag)
124 +{
125 + u32 mask = (1 << tformat->rsvd_shift) - 1;
126 +
127 + if (sw_tag && (sw_tag & ~mask)) {
128 + u32 v1 = sw_tag & mask;
129 + u32 v2 = sw_tag >> tformat->rsvd_shift;
130 +
131 + v2 <<= tformat->rsvd_shift + tformat->rsvd_bits;
132 + return v2 | v1;
133 + }
134 + return sw_tag;
135 +}
136 +
137 +/**
138 + * cxgb3i_tag_rsvd_bits - get the reserved bits used by the h/w
139 + * @tformat: tag format information
140 + * @tag: tag to be checked
141 + *
142 + * return the reserved bits in the tag
143 + */
144 +static inline u32 cxgb3i_tag_rsvd_bits(struct cxgb3i_tag_format *tformat,
145 + u32 tag)
146 +{
147 + if (cxgb3i_is_ddp_tag(tformat, tag))
148 + return (tag >> tformat->rsvd_shift) & tformat->rsvd_mask;
149 + return 0;
150 +}
151 +
152 +/**
153 + * cxgb3i_tag_nonrsvd_bits - get the non-reserved bits used by the s/w
154 + * @tformat: tag format information
155 + * @tag: tag to be checked
156 + *
157 + * return the non-reserved bits in the tag.
158 + */
159 +static inline u32 cxgb3i_tag_nonrsvd_bits(struct cxgb3i_tag_format *tformat,
160 + u32 tag)
161 +{
162 + unsigned char shift = tformat->rsvd_bits + tformat->rsvd_shift - 1;
163 + u32 v1, v2;
164 +
165 + if (cxgb3i_is_ddp_tag(tformat, tag)) {
166 + v1 = tag & ((1 << tformat->rsvd_shift) - 1);
167 + v2 = (tag >> (shift + 1)) << tformat->rsvd_shift;
168 + } else {
169 + u32 mask = (1 << shift) - 1;
170 +
171 + tag &= ~(1 << shift);
172 + v1 = tag & mask;
173 + v2 = (tag >> 1) & ~mask;
174 + }
175 + return v1 | v2;
176 +}
177 +
178 +
179 /**
180 * struct cxgb3i_hba - cxgb3i iscsi structure (per port)
181 *
182 @@ -146,16 +277,22 @@
183 * struct cxgb3i_conn - cxgb3i iscsi connection
184 *
185 * @tcp_conn: pointer to iscsi_tcp_conn structure
186 - * @listhead: list head to link elements
187 + * @list_head: list head to link elements
188 + * @cep: pointer to iscsi_endpoint structure
189 * @conn: pointer to iscsi_conn structure
190 * @hba: pointer to the hba this conn. is going through
191 + * @task_idx_bits: # of bits needed for session->cmds_max
192 + * @frags: temp. holding area for tx coalesced sg list pages.
193 */
194 +#define TX_PDU_PAGES_MAX (16384/512 + 1)
195 struct cxgb3i_conn {
196 struct iscsi_tcp_conn tcp_conn;
197 struct list_head list_head;
198 struct cxgb3i_endpoint *cep;
199 struct iscsi_conn *conn;
200 struct cxgb3i_hba *hba;
201 + unsigned int task_idx_bits;
202 + skb_frag_t frags[TX_PDU_PAGES_MAX];
203 };
204
205 /**
206 @@ -190,8 +327,7 @@
207 int cxgb3i_ulp2_init(void);
208 void cxgb3i_ulp2_cleanup(void);
209 int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *, int, int);
210 -void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32,
211 - struct scatterlist *, unsigned int);
212 +void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *, u32);
213 u32 cxgb3i_ddp_tag_reserve(struct cxgb3i_adapter *, unsigned int,
214 u32, unsigned int, struct scatterlist *,
215 unsigned int, int);
216 diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c
217 --- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_init.c 2009-01-14 15:17:57.000000000 -0800
218 +++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_init.c 2009-01-26 22:00:17.000000000 -0800
219 @@ -11,8 +11,8 @@
220
221 #include "cxgb3i.h"
222
223 -#define DRV_MODULE_NAME "cxgb3i"
224 -#define DRV_MODULE_VERSION "0.1.0"
225 +#define DRV_MODULE_NAME "cxgb3i"
226 +#define DRV_MODULE_VERSION "0.9.0"
227 #define DRV_MODULE_RELDATE "Jun. 1, 2008"
228
229 static char version[] =
230 diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
231 --- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_iscsi.c 2009-01-14 15:17:57.000000000 -0800
232 +++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_iscsi.c 2009-01-26 22:00:17.000000000 -0800
233 @@ -36,10 +36,10 @@
234 #define cxgb3i_api_debug(fmt...)
235 #endif
236
237 -#define align_to_4k_boundary(n) \
238 - do { \
239 - n = (n) & ~((1 << 12) - 1); \
240 - } while(0)
241 +/*
242 + * align pdu size to multiple of 512 for better performance
243 + */
244 +#define align_pdu_size(n) do { n = (n) & (~511); } while (0)
245
246 static struct scsi_transport_template *cxgb3i_scsi_transport;
247 static struct scsi_host_template cxgb3i_host_template;
248 @@ -102,7 +102,7 @@
249 struct cxgb3i_adapter *snic;
250
251 /* remove from the list */
252 - read_lock(&cxgb3i_snic_rwlock);
253 + write_lock(&cxgb3i_snic_rwlock);
254 list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
255 if (snic->tdev == t3dev) {
256 list_del(&snic->list_head);
257 @@ -295,6 +295,8 @@
258 * stop the xmit path so the xmit_segment function is
259 * not being called
260 */
261 + iscsi_suspend_tx(cconn->conn);
262 +
263 write_lock_bh(&cep->c3cn->callback_lock);
264 set_bit(ISCSI_SUSPEND_BIT, &cconn->conn->suspend_rx);
265 cep->c3cn->user_data = NULL;
266 @@ -391,20 +393,17 @@
267 static inline int cxgb3i_conn_max_xmit_dlength(struct iscsi_conn *conn)
268 {
269 struct cxgb3i_conn *cconn = conn->dd_data;
270 - unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
271 - cconn->hba->snic->tx_max_size -
272 - ISCSI_PDU_HEADER_MAX);
273 -
274 - cxgb3i_api_debug("conn 0x%p, max xmit %u.\n",
275 - conn, conn->max_xmit_dlength);
276 + unsigned int max = min_t(unsigned int,
277 + ULP2_MAX_PDU_PAYLOAD,
278 + cconn->hba->snic->tx_max_size -
279 + ISCSI_PDU_NONPAYLOAD_MAX);
280
281 if (conn->max_xmit_dlength)
282 conn->max_xmit_dlength = min_t(unsigned int,
283 - conn->max_xmit_dlength, max);
284 + conn->max_xmit_dlength, max);
285 else
286 conn->max_xmit_dlength = max;
287 -
288 - align_to_4k_boundary(conn->max_xmit_dlength);
289 + align_pdu_size(conn->max_xmit_dlength);
290
291 cxgb3i_api_debug("conn 0x%p, set max xmit %u.\n",
292 conn, conn->max_xmit_dlength);
293 @@ -415,14 +414,10 @@
294 static inline int cxgb3i_conn_max_recv_dlength(struct iscsi_conn *conn)
295 {
296 struct cxgb3i_conn *cconn = conn->dd_data;
297 - unsigned int max = min_t(unsigned int, ULP2_MAX_PDU_SIZE,
298 - cconn->hba->snic->rx_max_size -
299 - ISCSI_PDU_HEADER_MAX);
300 -
301 - cxgb3i_api_debug("conn 0x%p, max recv %u.\n",
302 - conn, conn->max_recv_dlength);
303 -
304 - align_to_4k_boundary(max);
305 + unsigned int max = min_t(unsigned int,
306 + ULP2_MAX_PDU_PAYLOAD,
307 + cconn->hba->snic->tx_max_size -
308 + ISCSI_PDU_NONPAYLOAD_MAX);
309
310 if (conn->max_recv_dlength) {
311 if (conn->max_recv_dlength > max) {
312 @@ -433,9 +428,9 @@
313 }
314 conn->max_recv_dlength = min_t(unsigned int,
315 conn->max_recv_dlength, max);
316 - align_to_4k_boundary(conn->max_recv_dlength);
317 } else
318 conn->max_recv_dlength = max;
319 + align_pdu_size(conn->max_recv_dlength);
320
321 cxgb3i_api_debug("conn 0x%p, set max recv %u.\n",
322 conn, conn->max_recv_dlength);
323 @@ -516,12 +511,14 @@
324
325 cep = ep->dd_data;
326 c3cn = cep->c3cn;
327 + /* calculate the tag idx bits needed for this conn based on cmds_max */
328 + cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1;
329
330 - read_lock(&c3cn->callback_lock);
331 + write_lock(&c3cn->callback_lock);
332 /* mnc: TODO don't abuse iscsi_tcp fields */
333 tcp_conn->sock = (struct socket *)c3cn;
334 c3cn->user_data = conn;
335 - read_unlock(&c3cn->callback_lock);
336 + write_unlock(&c3cn->callback_lock);
337
338 cconn->hba = cep->hba;
339 cconn->cep = cep;
340 @@ -609,11 +606,13 @@
341 return -ENOMEM;
342 case ISCSI_PARAM_MAX_RECV_DLENGTH:
343 err = iscsi_set_param(cls_conn, param, buf, buflen);
344 - err = cxgb3i_conn_max_recv_dlength(conn);
345 + if (!err)
346 + err = cxgb3i_conn_max_recv_dlength(conn);
347 break;
348 case ISCSI_PARAM_MAX_XMIT_DLENGTH:
349 err = iscsi_set_param(cls_conn, param, buf, buflen);
350 - err = cxgb3i_conn_max_xmit_dlength(conn);
351 + if (!err)
352 + err = cxgb3i_conn_max_xmit_dlength(conn);
353 break;
354 default:
355 return iscsi_set_param(cls_conn, param, buf, buflen);
356 @@ -718,49 +717,23 @@
357 stats->custom[0].value = conn->eh_abort_cnt;
358 }
359
360 -static inline u32 tag_base(struct cxgb3i_tag_format *format,
361 - unsigned int idx, unsigned int age)
362 -{
363 - u32 sw_bits = idx | (age << format->idx_bits);
364 - u32 tag = sw_bits >> format->rsvd_shift;
365 -
366 - tag <<= format->rsvd_bits + format->rsvd_shift;
367 - tag |= sw_bits & ((1 << format->rsvd_shift) - 1);
368 - return tag;
369 -}
370 -
371 -static inline void cxgb3i_parse_tag(struct cxgb3i_tag_format *format,
372 - u32 tag, u32 *rsvd_bits, u32 *sw_bits)
373 -{
374 - if (rsvd_bits)
375 - *rsvd_bits = (tag >> format->rsvd_shift) & format->rsvd_mask;
376 - if (sw_bits) {
377 - *sw_bits = (tag >> (format->rsvd_shift + format->rsvd_bits))
378 - << format->rsvd_shift;
379 - *sw_bits |= tag & ((1 << format->rsvd_shift) - 1);
380 - }
381 -
382 - cxgb3i_tag_debug("parse tag 0x%x, rsvd 0x%x, sw 0x%x.\n",
383 - tag, rsvd_bits ? *rsvd_bits : 0xFFFFFFFF,
384 - sw_bits ? *sw_bits : 0xFFFFFFFF);
385 -}
386 -
387 -
388 static void cxgb3i_parse_itt(struct iscsi_conn *conn, itt_t itt,
389 int *idx, int *age)
390 {
391 struct cxgb3i_conn *cconn = conn->dd_data;
392 struct cxgb3i_adapter *snic = cconn->hba->snic;
393 + u32 tag = itt;
394 u32 sw_bits;
395
396 - cxgb3i_parse_tag(&snic->tag_format, itt, NULL, &sw_bits);
397 + sw_bits = cxgb3i_tag_nonrsvd_bits(&snic->tag_format, tag);
398 if (idx)
399 - *idx = sw_bits & ISCSI_ITT_MASK;
400 + *idx = sw_bits & ((1 << cconn->task_idx_bits) - 1);
401 if (age)
402 - *age = (sw_bits >> snic->tag_format.idx_bits) & ISCSI_AGE_MASK;
403 + *age = (sw_bits >> cconn->task_idx_bits) & ISCSI_AGE_MASK;
404
405 - cxgb3i_tag_debug("parse itt 0x%x, idx 0x%x, age 0x%x.\n",
406 - itt, idx ? *idx : 0xFFFFF, age ? *age : 0xFF);
407 + cxgb3i_tag_debug("parse tag 0x%x/0x%x, sw 0x%x, idx 0x%x, age 0x%x.\n",
408 + tag, itt, sw_bits, idx ? *idx : 0xFFFFF,
409 + age ? *age : 0xFF);
410 }
411
412 static int cxgb3i_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
413 @@ -771,26 +744,40 @@
414 struct cxgb3i_conn *cconn = conn->dd_data;
415 struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
416 struct cxgb3i_adapter *snic = cconn->hba->snic;
417 - u32 sw_tag = tag_base(&snic->tag_format, task->itt, sess->age);
418 + struct cxgb3i_tag_format *tformat = &snic->tag_format;
419 + u32 sw_tag = (sess->age << cconn->task_idx_bits) | task->itt;
420 u32 tag = RESERVED_ITT;
421
422 - if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE)) {
423 + if (sc && (scsi_bidi_cmnd(sc) ||
424 + sc->sc_data_direction == DMA_FROM_DEVICE) &&
425 + cxgb3i_sw_tag_usable(tformat, sw_tag)) {
426 struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
427 - tag =
428 - cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
429 + tag = cxgb3i_ddp_tag_reserve(snic, c3cn->tid, sw_tag,
430 scsi_in(sc)->length,
431 scsi_in(sc)->table.sgl,
432 scsi_in(sc)->table.nents,
433 GFP_ATOMIC);
434 }
435 if (tag == RESERVED_ITT)
436 - tag = sw_tag | (snic->tag_format.rsvd_mask <<
437 - snic->tag_format.rsvd_shift);
438 + tag = cxgb3i_set_non_ddp_tag(tformat, sw_tag);
439 + /* the itt need to sent in big-endian order */
440 *hdr_itt = htonl(tag);
441
442 - cxgb3i_tag_debug("new tag 0x%x/0x%x (itt 0x%x, age 0x%x).\n",
443 - tag, *hdr_itt, task->itt, sess->age);
444 -
445 + if (sc) {
446 + if (sc->sc_data_direction == DMA_FROM_DEVICE)
447 + cxgb3i_tag_debug("read, len %u, tag 0x%x/0x%x "
448 + "(itt 0x%x, age 0x%x, sw 0x%x).\n",
449 + scsi_in(sc)->length, tag, *hdr_itt,
450 + task->itt, sess->age, sw_tag);
451 + else
452 + cxgb3i_tag_debug("write, len %u, tag 0x%x/0x%x "
453 + "(itt 0x%x, age 0x%x, sw 0x%x).\n",
454 + scsi_out(sc)->length, tag, *hdr_itt,
455 + task->itt, sess->age, sw_tag);
456 + } else
457 + cxgb3i_tag_debug("ctrl, tag 0x%x/0x%x (itt 0x%x, age 0x%x, "
458 + "sw 0x%x).\n",
459 + tag, *hdr_itt, task->itt, sess->age, sw_tag);
460 return 0;
461 }
462
463 @@ -800,14 +787,15 @@
464 struct iscsi_conn *conn = task->conn;
465 struct cxgb3i_conn *cconn = conn->dd_data;
466 struct cxgb3i_adapter *snic = cconn->hba->snic;
467 + struct cxgb3i_tag_format *tformat = &snic->tag_format;
468 u32 tag = ntohl(hdr_itt);
469
470 - cxgb3i_tag_debug("release tag 0x%x.\n", tag);
471 + cxgb3i_tag_debug("release %s tag 0x%x.\n", sc ? "scsi" : "ctrl", tag);
472
473 - if (sc && (sc->sc_data_direction == DMA_FROM_DEVICE))
474 - cxgb3i_ddp_tag_release(snic, tag,
475 - scsi_in(sc)->table.sgl,
476 - scsi_in(sc)->table.nents);
477 + if (sc && (scsi_bidi_cmnd(sc) ||
478 + sc->sc_data_direction == DMA_FROM_DEVICE) &&
479 + cxgb3i_is_ddp_tag(tformat, tag))
480 + cxgb3i_ddp_tag_release(snic, tag);
481 }
482
483 /**
484 @@ -820,7 +808,7 @@
485 .proc_name = "cxgb3i",
486 .queuecommand = iscsi_queuecommand,
487 .change_queue_depth = iscsi_change_queue_depth,
488 - .can_queue = 128 * (ISCSI_DEF_XMIT_CMDS_MAX - 1),
489 + .can_queue = CXGB3I_SCSI_QDEPTH_DFLT - 1,
490 .sg_tablesize = SG_ALL,
491 .max_sectors = 0xFFFF,
492 .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
493 diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c
494 --- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.c 2009-01-14 15:17:57.000000000 -0800
495 +++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.c 2009-01-26 22:00:17.000000000 -0800
496 @@ -22,19 +22,19 @@
497 #include "cxgb3i_ulp2.h"
498
499 #ifdef __DEBUG_C3CN_CONN__
500 -#define c3cn_conn_debug cxgb3i_log_debug
501 +#define c3cn_conn_debug cxgb3i_log_debug
502 #else
503 #define c3cn_conn_debug(fmt...)
504 #endif
505
506 #ifdef __DEBUG_C3CN_TX__
507 -#define c3cn_tx_debug cxgb3i_log_debug
508 +#define c3cn_tx_debug cxgb3i_log_debug
509 #else
510 #define c3cn_tx_debug(fmt...)
511 #endif
512
513 #ifdef __DEBUG_C3CN_RX__
514 -#define c3cn_rx_debug cxgb3i_log_debug
515 +#define c3cn_rx_debug cxgb3i_log_debug
516 #else
517 #define c3cn_rx_debug(fmt...)
518 #endif
519 @@ -42,9 +42,9 @@
520 /*
521 * module parameters releated to offloaded iscsi connection
522 */
523 -static int cxgb3_rcv_win = 256 * 1024;
524 +static int cxgb3_rcv_win = 128 * 1024;
525 module_param(cxgb3_rcv_win, int, 0644);
526 -MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=256KB)");
527 +MODULE_PARM_DESC(cxgb3_rcv_win, "TCP receive window in bytes (default=128KB)");
528
529 static int cxgb3_snd_win = 64 * 1024;
530 module_param(cxgb3_snd_win, int, 0644);
531 @@ -456,12 +456,9 @@
532 * The number of WRs needed for an skb depends on the number of fragments
533 * in the skb and whether it has any payload in its main body. This maps the
534 * length of the gather list represented by an skb into the # of necessary WRs.
535 - *
536 - * The max. length of an skb is controlled by the max pdu size which is ~16K.
537 - * Also, assume the min. fragment length is the sector size (512), then add
538 - * extra fragment counts for iscsi bhs and payload padding.
539 + * The extra two fragments are for iscsi bhs and payload padding.
540 */
541 -#define SKB_WR_LIST_SIZE (16384/512 + 3)
542 +#define SKB_WR_LIST_SIZE (MAX_SKB_FRAGS + 2)
543 static unsigned int skb_wrs[SKB_WR_LIST_SIZE] __read_mostly;
544
545 static void s3_init_wr_tab(unsigned int wr_len)
546 @@ -484,7 +481,7 @@
547
548 static inline void reset_wr_list(struct s3_conn *c3cn)
549 {
550 - c3cn->wr_pending_head = NULL;
551 + c3cn->wr_pending_head = c3cn->wr_pending_tail = NULL;
552 }
553
554 /*
555 @@ -495,7 +492,7 @@
556 static inline void enqueue_wr(struct s3_conn *c3cn,
557 struct sk_buff *skb)
558 {
559 - skb->sp = NULL;
560 + skb_wr_next(skb) = NULL;
561
562 /*
563 * We want to take an extra reference since both us and the driver
564 @@ -508,10 +505,22 @@
565 if (!c3cn->wr_pending_head)
566 c3cn->wr_pending_head = skb;
567 else
568 - c3cn->wr_pending_tail->sp = (void *)skb;
569 + skb_wr_next(c3cn->wr_pending_tail) = skb;
570 c3cn->wr_pending_tail = skb;
571 }
572
573 +static int count_pending_wrs(struct s3_conn *c3cn)
574 +{
575 + int n = 0;
576 + const struct sk_buff *skb = c3cn->wr_pending_head;
577 +
578 + while (skb) {
579 + n += skb->csum;
580 + skb = skb_wr_next(skb);
581 + }
582 + return n;
583 +}
584 +
585 static inline struct sk_buff *peek_wr(const struct s3_conn *c3cn)
586 {
587 return c3cn->wr_pending_head;
588 @@ -528,8 +537,8 @@
589
590 if (likely(skb)) {
591 /* Don't bother clearing the tail */
592 - c3cn->wr_pending_head = (struct sk_buff *)skb->sp;
593 - skb->sp = NULL;
594 + c3cn->wr_pending_head = skb_wr_next(skb);
595 + skb_wr_next(skb) = NULL;
596 }
597 return skb;
598 }
599 @@ -542,13 +551,15 @@
600 }
601
602 static inline void make_tx_data_wr(struct s3_conn *c3cn, struct sk_buff *skb,
603 - int len)
604 + int len, int req_completion)
605 {
606 struct tx_data_wr *req;
607
608 skb_reset_transport_header(skb);
609 req = (struct tx_data_wr *)__skb_push(skb, sizeof(*req));
610 - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
611 +
612 + req->wr_hi = htonl((V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)) |
613 + (req_completion ? F_WR_COMPL : 0));
614 req->wr_lo = htonl(V_WR_TID(c3cn->tid));
615 req->sndseq = htonl(c3cn->snd_nxt);
616 /* len includes the length of any HW ULP additions */
617 @@ -556,11 +567,11 @@
618 req->param = htonl(V_TX_PORT(c3cn->l2t->smt_idx));
619 /* V_TX_ULP_SUBMODE sets both the mode and submode */
620 req->flags = htonl(V_TX_ULP_SUBMODE(skb_ulp_mode(skb)) |
621 - V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
622 + V_TX_SHOVE((skb_peek(&c3cn->write_queue) ? 0 : 1)));
623
624 if (!c3cn_flag(c3cn, C3CN_TX_DATA_SENT)) {
625 - req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
626 - V_TX_CPU_IDX(c3cn->qset));
627 + req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
628 + V_TX_CPU_IDX(c3cn->qset));
629 /* Sendbuffer is in units of 32KB. */
630 req->param |= htonl(V_TX_SNDBUF(cxgb3_snd_win >> 15));
631 c3cn_set_flag(c3cn, C3CN_TX_DATA_SENT);
632 @@ -591,7 +602,7 @@
633
634 if (unlikely(c3cn->state == C3CN_STATE_CONNECTING ||
635 c3cn->state == C3CN_STATE_CLOSE_WAIT_1 ||
636 - c3cn->state == C3CN_STATE_ABORTING)) {
637 + c3cn->state >= C3CN_STATE_ABORTING)) {
638 c3cn_tx_debug("c3cn 0x%p, in closing state %u.\n",
639 c3cn, c3cn->state);
640 return 0;
641 @@ -626,19 +637,22 @@
642 c3cn->wr_unacked += wrs_needed;
643 enqueue_wr(c3cn, skb);
644
645 - if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
646 - len += ulp_extra_len(skb);
647 - make_tx_data_wr(c3cn, skb, len);
648 - c3cn->snd_nxt += len;
649 - if ((req_completion
650 - && c3cn->wr_unacked == wrs_needed)
651 - || (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL)
652 - || c3cn->wr_unacked >= c3cn->wr_max / 2) {
653 - struct work_request_hdr *wr = cplhdr(skb);
654 + c3cn_tx_debug("c3cn 0x%p, enqueue, skb len %u/%u, frag %u, "
655 + "wr %d, left %u, unack %u.\n",
656 + c3cn, skb->len, skb->data_len, frags,
657 + wrs_needed, c3cn->wr_avail, c3cn->wr_unacked);
658
659 - wr->wr_hi |= htonl(F_WR_COMPL);
660 + if (likely(CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_NEED_HDR)) {
661 + if ((req_completion &&
662 + c3cn->wr_unacked == wrs_needed) ||
663 + (CXGB3_SKB_CB(skb)->flags & C3CB_FLAG_COMPL) ||
664 + c3cn->wr_unacked >= c3cn->wr_max / 2) {
665 + req_completion = 1;
666 c3cn->wr_unacked = 0;
667 }
668 + len += ulp_extra_len(skb);
669 + make_tx_data_wr(c3cn, skb, len, req_completion);
670 + c3cn->snd_nxt += len;
671 CXGB3_SKB_CB(skb)->flags &= ~C3CB_FLAG_NEED_HDR;
672 }
673
674 @@ -1153,12 +1167,28 @@
675 * Process an acknowledgment of WR completion. Advance snd_una and send the
676 * next batch of work requests from the write queue.
677 */
678 +
679 +static void check_wr_invariants(struct s3_conn *c3cn)
680 +{
681 + int pending = count_pending_wrs(c3cn);
682 +
683 + if (unlikely(c3cn->wr_avail + pending != c3cn->wr_max))
684 + cxgb3i_log_error("TID %u: credit imbalance: avail %u, "
685 + "pending %u, total should be %u\n",
686 + c3cn->tid, c3cn->wr_avail, pending,
687 + c3cn->wr_max);
688 +}
689 +
690 static void process_wr_ack(struct s3_conn *c3cn, struct sk_buff *skb)
691 {
692 struct cpl_wr_ack *hdr = cplhdr(skb);
693 unsigned int credits = ntohs(hdr->credits);
694 u32 snd_una = ntohl(hdr->snd_una);
695
696 + c3cn_tx_debug("%u WR credits, avail %u, unack %u, TID %u, state %u.\n",
697 + credits, c3cn->wr_avail, c3cn->wr_unacked,
698 + c3cn->tid, c3cn->state);
699 +
700 c3cn->wr_avail += credits;
701 if (c3cn->wr_unacked > c3cn->wr_max - c3cn->wr_avail)
702 c3cn->wr_unacked = c3cn->wr_max - c3cn->wr_avail;
703 @@ -1173,6 +1203,17 @@
704 break;
705 }
706 if (unlikely(credits < p->csum)) {
707 + struct tx_data_wr *w = cplhdr(p);
708 + cxgb3i_log_error("TID %u got %u WR credits need %u, "
709 + "len %u, main body %u, frags %u, "
710 + "seq # %u, ACK una %u, ACK nxt %u, "
711 + "WR_AVAIL %u, WRs pending %u\n",
712 + c3cn->tid, credits, p->csum, p->len,
713 + p->len - p->data_len,
714 + skb_shinfo(p)->nr_frags,
715 + ntohl(w->sndseq), snd_una,
716 + ntohl(hdr->snd_nxt), c3cn->wr_avail,
717 + count_pending_wrs(c3cn) - credits);
718 p->csum -= credits;
719 break;
720 } else {
721 @@ -1182,8 +1223,14 @@
722 }
723 }
724
725 - if (unlikely(before(snd_una, c3cn->snd_una)))
726 + check_wr_invariants(c3cn);
727 +
728 + if (unlikely(before(snd_una, c3cn->snd_una))) {
729 + cxgb3i_log_error("TID %u, unexpected sequence # %u in WR_ACK "
730 + "snd_una %u\n",
731 + c3cn->tid, snd_una, c3cn->snd_una);
732 goto out_free;
733 + }
734
735 if (c3cn->snd_una != snd_una) {
736 c3cn->snd_una = snd_una;
737 @@ -1454,11 +1501,14 @@
738 struct dst_entry *dst)
739 {
740 BUG_ON(c3cn->cdev != cdev);
741 - c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs;
742 + c3cn->wr_max = c3cn->wr_avail = T3C_DATA(cdev)->max_wrs - 1;
743 c3cn->wr_unacked = 0;
744 c3cn->mss_idx = select_mss(c3cn, dst_mtu(dst));
745
746 reset_wr_list(c3cn);
747 +
748 + c3cn_conn_debug("c3cn 0x%p, wr max %u, avail %u.\n",
749 + c3cn, c3cn->wr_max, c3cn->wr_avail);
750 }
751
752 static int initiate_act_open(struct s3_conn *c3cn, struct net_device *dev)
753 @@ -1673,9 +1723,17 @@
754 goto out_err;
755 }
756
757 - err = -EPIPE;
758 if (c3cn->err) {
759 c3cn_tx_debug("c3cn 0x%p, err %d.\n", c3cn, c3cn->err);
760 + err = -EPIPE;
761 + goto out_err;
762 + }
763 +
764 + if (c3cn->write_seq - c3cn->snd_una >= cxgb3_snd_win) {
765 + c3cn_tx_debug("c3cn 0x%p, snd %u - %u > %u.\n",
766 + c3cn, c3cn->write_seq, c3cn->snd_una,
767 + cxgb3_snd_win);
768 + err = -EAGAIN;
769 goto out_err;
770 }
771
772 diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h
773 --- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_offload.h 2009-01-14 15:17:57.000000000 -0800
774 +++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_offload.h 2009-01-26 22:00:17.000000000 -0800
775 @@ -180,7 +180,7 @@
776 * @seq: tcp sequence number
777 * @ddigest: pdu data digest
778 * @pdulen: recovered pdu length
779 - * @ulp_data: scratch area for ULP
780 + * @wr_next: scratch area for tx wr
781 */
782 struct cxgb3_skb_cb {
783 __u8 flags;
784 @@ -188,7 +188,7 @@
785 __u32 seq;
786 __u32 ddigest;
787 __u32 pdulen;
788 - __u8 ulp_data[16];
789 + struct sk_buff *wr_next;
790 };
791
792 #define CXGB3_SKB_CB(skb) ((struct cxgb3_skb_cb *)&((skb)->cb[0]))
793 @@ -196,7 +196,7 @@
794 #define skb_ulp_mode(skb) (CXGB3_SKB_CB(skb)->ulp_mode)
795 #define skb_ulp_ddigest(skb) (CXGB3_SKB_CB(skb)->ddigest)
796 #define skb_ulp_pdulen(skb) (CXGB3_SKB_CB(skb)->pdulen)
797 -#define skb_ulp_data(skb) (CXGB3_SKB_CB(skb)->ulp_data)
798 +#define skb_wr_next(skb) (CXGB3_SKB_CB(skb)->wr_next)
799
800 enum c3cb_flags {
801 C3CB_FLAG_NEED_HDR = 1 << 0, /* packet needs a TX_DATA_WR header */
802 diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c
803 --- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.c 2009-01-14 15:17:57.000000000 -0800
804 +++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.c 2009-01-26 22:00:17.000000000 -0800
805 @@ -51,6 +51,7 @@
806 static unsigned char sw_tag_idx_bits;
807 static unsigned char sw_tag_age_bits;
808 static unsigned char page_idx = ULP2_PGIDX_MAX;
809 +static unsigned int skb_copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
810
811 static void cxgb3i_ddp_page_init(void)
812 {
813 @@ -59,6 +60,10 @@
814 sw_tag_idx_bits = (__ilog2_u32(ISCSI_ITT_MASK)) + 1;
815 sw_tag_age_bits = (__ilog2_u32(ISCSI_AGE_MASK)) + 1;
816
817 + cxgb3i_log_info("tag itt 0x%x, %u bits, age 0x%x, %u bits.\n",
818 + ISCSI_ITT_MASK, sw_tag_idx_bits,
819 + ISCSI_AGE_MASK, sw_tag_age_bits);
820 +
821 for (i = 0; i < ULP2_PGIDX_MAX; i++) {
822 if (PAGE_SIZE == (1UL << ddp_page_shift[i])) {
823 page_idx = i;
824 @@ -312,7 +317,6 @@
825 page_idx, sgcnt, xferlen, ULP2_DDP_THRESHOLD);
826 return RESERVED_ITT;
827 }
828 - return RESERVED_ITT;
829
830 gl = ddp_make_gl(xferlen, sgl, sgcnt, gfp);
831 if (!gl) {
832 @@ -322,9 +326,9 @@
833 }
834
835 npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
836 - idx_max = ddp->nppods - npods + 1;
837 + idx_max = ddp->nppods - npods;
838
839 - if (ddp->idx_last == ddp->nppods)
840 + if (ddp->idx_last >= idx_max)
841 idx = ddp_find_unused_entries(ddp, 0, idx_max, npods, gl);
842 else {
843 idx = ddp_find_unused_entries(ddp, ddp->idx_last + 1, idx_max,
844 @@ -345,12 +349,13 @@
845
846 if (ddp_gl_map(snic->pdev, gl) < 0)
847 goto unmap_sgl;
848 -
849 - tag = sw_tag | (idx << snic->tag_format.rsvd_shift);
850 +
851 + tag = cxgb3i_ddp_tag_base(&snic->tag_format, sw_tag);
852 + tag |= idx << PPOD_IDX_SHIFT;
853
854 hdr.rsvd = 0;
855 hdr.vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
856 - hdr.pgsz_tag_clr = htonl(tag & snic->tag_format.rsvd_tag_mask);
857 + hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
858 hdr.maxoffset = htonl(xferlen);
859 hdr.pgoffset = htonl(gl->offset);
860
861 @@ -372,30 +377,35 @@
862 return RESERVED_ITT;
863 }
864
865 -void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag,
866 - struct scatterlist *sgl, unsigned int sgcnt)
867 +void cxgb3i_ddp_tag_release(struct cxgb3i_adapter *snic, u32 tag)
868 {
869 - u32 idx = (tag >> snic->tag_format.rsvd_shift) &
870 - snic->tag_format.rsvd_mask;
871 + struct cxgb3i_ddp_info *ddp = snic->ddp;
872 + u32 idx;
873
874 - if (idx < snic->tag_format.rsvd_mask) {
875 - struct cxgb3i_ddp_info *ddp = snic->ddp;
876 + if (!ddp) {
877 + cxgb3i_log_error("release ddp tag 0x%x, ddp NULL.\n", tag);
878 + return;
879 + }
880 +
881 + idx = (tag >> PPOD_IDX_SHIFT) & ddp->idx_mask;
882 + if (idx < ddp->nppods) {
883 struct cxgb3i_gather_list *gl = ddp->gl_map[idx];
884 unsigned int npods;
885
886 if (!gl || !gl->nelem) {
887 - cxgb3i_log_warn("release tag 0x%x, idx 0x%x, no gl.\n",
888 - tag, idx);
889 + cxgb3i_log_error("release tag 0x%x, idx 0x%x, no gl.\n",
890 + tag, idx);
891 return;
892 }
893 npods = (gl->nelem + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
894 -
895 cxgb3i_tag_debug("ddp tag 0x%x, release idx 0x%x, npods %u.\n",
896 tag, idx, npods);
897 clear_ddp_map(snic, idx, npods);
898 ddp_unmark_entries(ddp, idx, npods);
899 ddp_gl_unmap(snic->pdev, gl);
900 - }
901 + } else
902 + cxgb3i_log_error("ddp tag 0x%x, idx 0x%x > max 0x%x.\n",
903 + tag, idx, ddp->nppods);
904 }
905
906 int cxgb3i_conn_ulp_setup(struct cxgb3i_conn *cconn, int hcrc, int dcrc)
907 @@ -403,12 +413,18 @@
908 struct iscsi_tcp_conn *tcp_conn = cconn->conn->dd_data;
909 struct s3_conn *c3cn = (struct s3_conn *)(tcp_conn->sock);
910 struct sk_buff *skb = alloc_skb(sizeof(struct cpl_set_tcb_field),
911 - GFP_KERNEL | __GFP_NOFAIL);
912 + GFP_KERNEL);
913 struct cpl_set_tcb_field *req;
914 u64 val = (hcrc ? 1 : 0) | (dcrc ? 2 : 0);
915
916 + if (!skb)
917 + return -ENOMEM;
918 +
919 if (page_idx < ULP2_PGIDX_MAX)
920 val |= page_idx << 4;
921 + else
922 + cxgb3i_log_warn("TID 0x%x, host page 0x%lx default to 4K.\n",
923 + c3cn->tid, PAGE_SIZE);
924
925 /* set up ulp submode and page size */
926 req = (struct cpl_set_tcb_field *)skb_put(skb, sizeof(*req));
927 @@ -476,14 +492,14 @@
928 (skb_ulp_mode(skb) & ULP2_FLAG_DCRC_ERROR)) ?
929 ISCSI_SEGMENT_DGST_ERR : 0;
930 if (skb_ulp_mode(skb) & ULP2_FLAG_DATA_DDPED) {
931 - cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
932 - "ddp'ed, itt 0x%x.\n",
933 - skb, hdr->opcode & ISCSI_OPCODE_MASK,
934 - tcp_conn->in.datalen, hdr->itt);
935 + cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
936 + "ddp'ed, itt 0x%x.\n",
937 + skb, hdr->opcode & ISCSI_OPCODE_MASK,
938 + tcp_conn->in.datalen, hdr->itt);
939 segment->total_copied = segment->total_size;
940 } else {
941 - cxgb3i_ddp_debug("skb 0x%p, opcode 0x%x, data %u, "
942 - "not ddp'ed, itt 0x%x.\n",
943 + cxgb3i_rx_debug("skb 0x%p, opcode 0x%x, data %u, "
944 + "not ddp'ed, itt 0x%x.\n",
945 skb, hdr->opcode & ISCSI_OPCODE_MASK,
946 tcp_conn->in.datalen, hdr->itt);
947 offset += sizeof(struct cpl_iscsi_hdr_norss);
948 @@ -520,24 +536,141 @@
949 skb_ulp_mode(skb) = (ULP_MODE_ISCSI << 4) | submode;
950 }
951
952 +static int sg_page_coalesce(struct scatterlist *sg, unsigned int offset,
953 + unsigned int dlen, skb_frag_t *frags, int frag_max)
954 +{
955 + unsigned int sglen = sg->length - offset;
956 + struct page *page = sg_page(sg);
957 + unsigned int datalen = dlen, copy;
958 + int i;
959 +
960 + i = 0;
961 + do {
962 + if (!sglen) {
963 + sg = sg_next(sg);
964 + offset = 0;
965 + sglen = sg->length;
966 + page = sg_page(sg);
967 + }
968 + copy = min(datalen, sglen);
969 + if (i && page == frags[i - 1].page &&
970 + offset + sg->offset ==
971 + frags[i - 1].page_offset + frags[i - 1].size) {
972 + frags[i - 1].size += copy;
973 + } else {
974 + if (i >= frag_max) {
975 + cxgb3i_log_error("%s, too many pages > %u, "
976 + "dlen %u.\n", __func__,
977 + frag_max, dlen);
978 + return -EINVAL;
979 + }
980 +
981 + frags[i].page = page;
982 + frags[i].page_offset = sg->offset + offset;
983 + frags[i].size = copy;
984 + i++;
985 + }
986 + datalen -= copy;
987 + offset += copy;
988 + sglen -= copy;
989 + } while (datalen);
990 +
991 + return i;
992 +}
993 +
994 +static int copy_frags_to_skb_pages(struct sk_buff *skb, skb_frag_t *frags,
995 + int frag_cnt, unsigned int datalen)
996 +{
997 + struct page *page = NULL;
998 + unsigned char *dp;
999 + unsigned int pg_left = 0;
1000 + unsigned int copy_total = 0;
1001 + int i;
1002 +
1003 + for (i = 0; i < frag_cnt; i++, frags++) {
1004 + while (frags->size) {
1005 + unsigned char *sp = page_address(frags->page);
1006 + unsigned int copy;
1007 +
1008 + if (!pg_left) {
1009 + int cnt = skb_shinfo(skb)->nr_frags;
1010 +
1011 + if (cnt >= MAX_SKB_FRAGS) {
1012 + cxgb3i_log_error("%s: pdu data %u.\n",
1013 + __func__, datalen);
1014 + return -EINVAL;
1015 + }
1016 + page = alloc_page(GFP_ATOMIC);
1017 + if (!page)
1018 + return -ENOMEM;
1019 + dp = page_address(page);
1020 + pg_left = PAGE_SIZE;
1021 +
1022 + copy = min(pg_left, datalen);
1023 + skb_fill_page_desc(skb, cnt, page, 0, copy);
1024 +
1025 + skb->len += copy;
1026 + skb->data_len += copy;
1027 + skb->truesize += copy;
1028 + datalen -= copy;
1029 + }
1030 + copy = min(pg_left, frags->size);
1031 + memcpy(dp, sp + frags->page_offset, copy);
1032 +
1033 + frags->size -= copy;
1034 + frags->page_offset += copy;
1035 + dp += copy;
1036 + pg_left -= copy;
1037 + copy_total += copy;
1038 + }
1039 + }
1040 +
1041 + return copy_total;
1042 +}
1043 +
1044 int cxgb3i_conn_ulp2_xmit(struct iscsi_conn *conn)
1045 {
1046 - struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
1047 + struct cxgb3i_conn *cconn = conn->dd_data;
1048 + struct iscsi_tcp_conn *tcp_conn = &cconn->tcp_conn;
1049 struct iscsi_segment *hdr_seg = &tcp_conn->out.segment;
1050 struct iscsi_segment *data_seg = &tcp_conn->out.data_segment;
1051 unsigned int hdrlen = hdr_seg->total_size;
1052 unsigned int datalen = data_seg->total_size;
1053 unsigned int padlen = iscsi_padding(datalen);
1054 - unsigned int copymax = SKB_MAX_HEAD(TX_HEADER_LEN);
1055 - unsigned int copylen;
1056 + unsigned int copylen = hdrlen;
1057 + unsigned int copy_dlen = 0;
1058 struct sk_buff *skb;
1059 unsigned char *dst;
1060 + int i, frag_cnt = 0;
1061 int err = -EAGAIN;
1062
1063 - if (data_seg->data && ((datalen + padlen) < copymax))
1064 - copylen = hdrlen + datalen + padlen;
1065 - else
1066 - copylen = hdrlen;
1067 + /*
1068 + * the whole pdu needs to fit into one skb, make sure we don't overrun
1069 + * the skb's frag_list. If there are more sg pages than MAX_SKB_FRAGS,
1070 + * we have to copy the data either to the head or newly allocated
1071 + * whole new page(s). This could happen if the sg contains a lot of
1072 + * fragmented data chunks (pages).
1073 + */
1074 + if (datalen) {
1075 + if (!data_seg->data) {
1076 + err = sg_page_coalesce(data_seg->sg,
1077 + data_seg->sg_offset,
1078 + data_seg->total_size,
1079 + cconn->frags,
1080 + TX_PDU_PAGES_MAX);
1081 + if (err < 0)
1082 + return err;
1083 + frag_cnt = err;
1084 +
1085 + if (frag_cnt > MAX_SKB_FRAGS ||
1086 + (padlen && frag_cnt + 1 > MAX_SKB_FRAGS))
1087 + copy_dlen = datalen + padlen;
1088 + } else
1089 + copy_dlen += datalen + padlen;
1090 + }
1091 +
1092 + if (copylen + copy_dlen < skb_copymax)
1093 + copylen += copy_dlen;
1094
1095 /* supports max. 16K pdus, so one skb is enough to hold all the data */
1096 skb = alloc_skb(TX_HEADER_LEN + copylen, GFP_ATOMIC);
1097 @@ -575,70 +708,84 @@
1098 skb->data_len += datalen;
1099 skb->truesize += datalen;
1100 }
1101 - } else {
1102 - struct scatterlist *sg = data_seg->sg;
1103 - unsigned int offset = data_seg->sg_offset;
1104 - struct page *page = sg_page(sg);
1105 - unsigned int sglen = sg->length - offset;
1106 -
1107 - do {
1108 - int i = skb_shinfo(skb)->nr_frags;
1109 - unsigned int copy;
1110 + } else if (copy_dlen) {
1111 + /* need to copy the page fragments */
1112 + if (copylen > hdrlen) {
1113 + skb_frag_t *frag = cconn->frags;
1114
1115 - if (!sglen) {
1116 - sg = sg_next(sg);
1117 - page = sg_page(sg);
1118 - offset = 0;
1119 - sglen = sg->length;
1120 + /* data fits in the skb's headroom */
1121 + for (i = 0; i < frag_cnt; i++, frag++) {
1122 + memcpy(dst,
1123 + page_address(frag->page) +
1124 + frag->page_offset,
1125 + frag->size);
1126 + dst += frag->size;
1127 }
1128 - copy = min(sglen, datalen);
1129 -
1130 - if (i && skb_can_coalesce(skb, i, page,
1131 - sg->offset + offset)) {
1132 - skb_shinfo(skb)->frags[i - 1].size += copy;
1133 - } else {
1134 - get_page(page);
1135 - skb_fill_page_desc(skb, i, page,
1136 - sg->offset + offset, copy);
1137 + if (padlen)
1138 + memset(dst, 0, padlen);
1139 + } else {
1140 + /* allocate pages to hold the data */
1141 + err = copy_frags_to_skb_pages(skb, cconn->frags,
1142 + frag_cnt, datalen);
1143 + if (err < 0) {
1144 + err = -EAGAIN;
1145 + goto free_skb;
1146 }
1147 - skb->len += copy;
1148 - skb->data_len += copy;
1149 - skb->truesize += copy;
1150 - offset += copy;
1151 - sglen -= copy;
1152 - datalen -= copy;
1153 - } while (datalen);
1154 - }
1155 -
1156 - if (padlen && skb_shinfo(skb)->nr_frags) {
1157 - int idx = skb_shinfo(skb)->nr_frags;
1158 - get_page(pad_page);
1159 - skb_fill_page_desc(skb, idx, pad_page, 0, padlen);
1160 - skb->data_len += padlen;
1161 - skb->truesize += padlen;
1162 - skb->len += padlen;
1163 + WARN_ON(err != datalen);
1164 + if (padlen) {
1165 + skb_frag_t *frag;
1166 +
1167 + i = skb_shinfo(skb)->nr_frags;
1168 + frag = &skb_shinfo(skb)->frags[i];
1169 + dst = page_address(frag->page);
1170 +
1171 + memset(dst + frag->page_offset + frag->size,
1172 + 0, padlen);
1173 + frag->size += padlen;
1174 + }
1175 + }
1176 + } else {
1177 + /* sg pages fit into frag_list */
1178 + for (i = 0; i < frag_cnt; i++)
1179 + get_page(cconn->frags[i].page);
1180 + memcpy(skb_shinfo(skb)->frags, cconn->frags,
1181 + sizeof(skb_frag_t) * frag_cnt);
1182 + skb_shinfo(skb)->nr_frags = frag_cnt;
1183 + skb->len += datalen;
1184 + skb->data_len += datalen;
1185 + skb->truesize += datalen;
1186 +
1187 + if (padlen) {
1188 + i = skb_shinfo(skb)->nr_frags;
1189 + get_page(pad_page);
1190 + skb_fill_page_desc(skb, i, pad_page, 0, padlen);
1191 + skb->len += padlen;
1192 + skb->data_len += padlen;
1193 + skb->truesize += padlen;
1194 + }
1195 }
1196
1197 send_pdu:
1198 err = cxgb3i_c3cn_send_pdus((struct s3_conn *)tcp_conn->sock, skb);
1199 -
1200 if (err > 0) {
1201 int pdulen = hdrlen + datalen + padlen;
1202 +
1203 if (conn->hdrdgst_en)
1204 pdulen += ISCSI_DIGEST_SIZE;
1205 if (datalen && conn->datadgst_en)
1206 pdulen += ISCSI_DIGEST_SIZE;
1207
1208 hdr_seg->total_copied = hdr_seg->total_size;
1209 - if (datalen)
1210 - data_seg->total_copied = data_seg->total_size;
1211 + data_seg->total_copied = data_seg->total_size;
1212 conn->txdata_octets += pdulen;
1213 return pdulen;
1214 }
1215
1216 +free_skb:
1217 kfree_skb(skb);
1218 if (err < 0 && err != -EAGAIN) {
1219 - cxgb3i_log_error("conn 0x%p, xmit err %d.\n", conn, err);
1220 + cxgb3i_log_error("conn 0x%p, xmit err %d, skb len %u/%u.\n",
1221 + conn, err, skb->len, skb->data_len);
1222 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
1223 return err;
1224 }
1225 @@ -652,6 +799,9 @@
1226 return -ENOMEM;
1227 memset(page_address(pad_page), 0, PAGE_SIZE);
1228 cxgb3i_ddp_page_init();
1229 + cxgb3i_log_info("skb max. frag %u, head %u.\n",
1230 + (unsigned int)MAX_SKB_FRAGS,
1231 + (unsigned int)skb_copymax);
1232 return 0;
1233 }
1234
1235 @@ -720,7 +870,7 @@
1236
1237 read_lock(&c3cn->callback_lock);
1238 conn = c3cn->user_data;
1239 - if (conn && c3cn->state != C3CN_STATE_ESTABLISHED)
1240 + if (conn)
1241 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
1242 read_unlock(&c3cn->callback_lock);
1243 }
1244 @@ -730,7 +880,7 @@
1245 struct t3cdev *tdev = snic->tdev;
1246 struct cxgb3i_ddp_info *ddp;
1247 struct ulp_iscsi_info uinfo;
1248 - unsigned int ppmax, bits, max_bits;
1249 + unsigned int ppmax, bits;
1250 int i, err;
1251
1252 err = tdev->ctl(tdev, ULP_ISCSI_GET_PARAMS, &uinfo);
1253 @@ -740,26 +890,21 @@
1254 return err;
1255 }
1256
1257 - ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
1258 - max_bits = min(PPOD_IDX_MAX_SIZE,
1259 - (32 - sw_tag_idx_bits - sw_tag_age_bits));
1260 - bits = __ilog2_u32(ppmax) + 1;
1261 - if (bits > max_bits)
1262 - bits = max_bits;
1263 - ppmax = (1 << bits) - 1;
1264 -
1265 snic->tx_max_size = min_t(unsigned int,
1266 uinfo.max_txsz, ULP2_MAX_PKT_SIZE);
1267 snic->rx_max_size = min_t(unsigned int,
1268 uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
1269 + cxgb3i_log_info("ddp max pkt size: %u/%u,%u, %u/%u,%u.\n",
1270 + snic->tx_max_size, uinfo.max_txsz, ULP2_MAX_PKT_SIZE,
1271 + snic->rx_max_size, uinfo.max_rxsz, ULP2_MAX_PKT_SIZE);
1272
1273 - snic->tag_format.idx_bits = sw_tag_idx_bits;
1274 - snic->tag_format.age_bits = sw_tag_age_bits;
1275 - snic->tag_format.rsvd_bits = bits;
1276 - snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
1277 - snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
1278 - snic->tag_format.rsvd_tag_mask =
1279 - (1 << (snic->tag_format.rsvd_bits + PPOD_IDX_SHIFT)) - 1;
1280 + snic->tag_format.sw_bits = sw_tag_idx_bits + sw_tag_age_bits;
1281 +
1282 + ppmax = (uinfo.ulimit - uinfo.llimit + 1) >> PPOD_SIZE_SHIFT;
1283 + bits = __ilog2_u32(ppmax) + 1;
1284 + if (bits > PPOD_IDX_MAX_SIZE)
1285 + bits = PPOD_IDX_MAX_SIZE;
1286 + ppmax = (1 << (bits - 1)) - 1;
1287
1288 ddp = cxgb3i_alloc_big_mem(sizeof(struct cxgb3i_ddp_info) +
1289 ppmax *
1290 @@ -779,12 +924,15 @@
1291 spin_lock_init(&ddp->map_lock);
1292 ddp->llimit = uinfo.llimit;
1293 ddp->ulimit = uinfo.ulimit;
1294 + ddp->nppods = ppmax;
1295 + ddp->idx_last = ppmax;
1296 + ddp->idx_bits = bits;
1297 + ddp->idx_mask = (1 << bits) - 1;
1298 + ddp->rsvd_tag_mask = (1 << (bits + PPOD_IDX_SHIFT)) - 1;
1299
1300 - uinfo.tagmask =
1301 - snic->tag_format.rsvd_mask << snic->tag_format.rsvd_shift;
1302 + uinfo.tagmask = ddp->idx_mask << PPOD_IDX_SHIFT;
1303 for (i = 0; i < ULP2_PGIDX_MAX; i++)
1304 uinfo.pgsz_factor[i] = ddp_page_order[i];
1305 -
1306 uinfo.ulimit = uinfo.llimit + (ppmax << PPOD_SIZE_SHIFT);
1307
1308 err = tdev->ctl(tdev, ULP_ISCSI_SET_PARAMS, &uinfo);
1309 @@ -794,19 +942,20 @@
1310 goto free_ppod_map;
1311 }
1312
1313 - ddp->nppods = ppmax;
1314 - ddp->idx_last = ppmax;
1315 -
1316 tdev->ulp_iscsi = snic->ddp = ddp;
1317
1318 - cxgb3i_log_info("snic nppods %u (0x%x ~ 0x%x), rsvd shift %u, "
1319 - "bits %u, mask 0x%x, 0x%x, pkt %u,%u.\n",
1320 - ppmax, ddp->llimit, ddp->ulimit,
1321 - snic->tag_format.rsvd_shift,
1322 - snic->tag_format.rsvd_bits,
1323 - snic->tag_format.rsvd_mask, uinfo.tagmask,
1324 - snic->tx_max_size, snic->rx_max_size);
1325 + cxgb3i_log_info("nppods %u (0x%x ~ 0x%x), bits %u, mask 0x%x,0x%x.\n",
1326 + ppmax, ddp->llimit, ddp->ulimit, ddp->idx_bits,
1327 + ddp->idx_mask, ddp->rsvd_tag_mask);
1328
1329 + snic->tag_format.rsvd_bits = ddp->idx_bits;
1330 + snic->tag_format.rsvd_shift = PPOD_IDX_SHIFT;
1331 + snic->tag_format.rsvd_mask = (1 << snic->tag_format.rsvd_bits) - 1;
1332 +
1333 + cxgb3i_log_info("tag format: sw %u, rsvd %u,%u, mask 0x%x.\n",
1334 + snic->tag_format.sw_bits, snic->tag_format.rsvd_bits,
1335 + snic->tag_format.rsvd_shift,
1336 + snic->tag_format.rsvd_mask);
1337 return 0;
1338
1339 free_ppod_map:
1340 diff -uNr linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h
1341 --- linux-2.6.27.11-1.orig/drivers/scsi/cxgb3i/cxgb3i_ulp2.h 2009-01-14 15:17:57.000000000 -0800
1342 +++ linux-2.6.27.11-1.chng/drivers/scsi/cxgb3i/cxgb3i_ulp2.h 2009-01-26 22:00:17.000000000 -0800
1343 @@ -13,7 +13,6 @@
1344 #ifndef __CXGB3I_ULP2_H__
1345 #define __CXGB3I_ULP2_H__
1346
1347 -#define ULP2_PDU_PAYLOAD_DFLT (16224 - ISCSI_PDU_HEADER_MAX)
1348 #define PPOD_PAGES_MAX 4
1349 #define PPOD_PAGES_SHIFT 2 /* 4 pages per pod */
1350
1351 @@ -100,9 +99,6 @@
1352 #define ULP2_FLAG_DCRC_ERROR 0x20
1353 #define ULP2_FLAG_PAD_ERROR 0x40
1354
1355 -#define ULP2_MAX_PKT_SIZE 16224
1356 -#define ULP2_MAX_PDU_SIZE 8192
1357 -
1358 void cxgb3i_conn_closing(struct s3_conn *);
1359 void cxgb3i_conn_pdu_ready(struct s3_conn *c3cn);
1360 void cxgb3i_conn_tx_open(struct s3_conn *c3cn);