]> git.ipfire.org Git - thirdparty/qemu.git/blame - net/eth.c
hw/net/net_tx_pkt: Implement TCP segmentation
[thirdparty/qemu.git] / net / eth.c
CommitLineData
75020a70
DF
1/*
2 * QEMU network structures definitions and helper functions
3 *
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
5 *
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
7 *
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
15 *
16 */
17
2744d920 18#include "qemu/osdep.h"
7564bf77 19#include "qemu/log.h"
75020a70
DF
20#include "net/eth.h"
21#include "net/checksum.h"
75020a70
DF
22#include "net/tap.h"
23
eb700029
DF
24void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
25 uint16_t vlan_ethtype, bool *is_new)
75020a70
DF
26{
27 struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
28
29 switch (be16_to_cpu(ehdr->h_proto)) {
30 case ETH_P_VLAN:
31 case ETH_P_DVLAN:
32 /* vlan hdr exists */
33 *is_new = false;
34 break;
35
36 default:
37 /* No VLAN header, put a new one */
38 vhdr->h_proto = ehdr->h_proto;
eb700029 39 ehdr->h_proto = cpu_to_be16(vlan_ethtype);
75020a70
DF
40 *is_new = true;
41 break;
42 }
43 vhdr->h_tci = cpu_to_be16(vlan_tag);
44}
45
46uint8_t
47eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
48{
49 uint8_t ecn_state = 0;
50
51 if (l3_proto == ETH_P_IP) {
52 struct ip_header *iphdr = (struct ip_header *) l3_hdr;
53
54 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
55 if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
56 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
57 }
58 if (l4proto == IP_PROTO_TCP) {
59 return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
60 } else if (l4proto == IP_PROTO_UDP) {
61 return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
62 }
63 }
64 } else if (l3_proto == ETH_P_IPV6) {
65 struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
66
67 if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
68 ecn_state = VIRTIO_NET_HDR_GSO_ECN;
69 }
70
71 if (l4proto == IP_PROTO_TCP) {
72 return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
73 }
74 }
7564bf77
PP
75 qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
76 "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
75020a70
DF
77
78 return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
79}
80
eb700029
DF
81uint16_t
82eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
83{
84 uint16_t proto;
85 size_t copied;
86 size_t size = iov_size(l2hdr_iov, iovcnt);
87 size_t proto_offset = l2hdr_len - sizeof(proto);
88
89 if (size < proto_offset) {
90 return ETH_P_UNKNOWN;
91 }
92
93 copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
94 &proto, sizeof(proto));
95
96 return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
97}
98
99static bool
100_eth_copy_chunk(size_t input_size,
101 const struct iovec *iov, int iovcnt,
102 size_t offset, size_t length,
103 void *buffer)
104{
105 size_t copied;
106
107 if (input_size < offset) {
108 return false;
109 }
110
111 copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
112
113 if (copied < length) {
114 return false;
115 }
116
117 return true;
118}
119
120static bool
121_eth_tcp_has_data(bool is_ip4,
122 const struct ip_header *ip4_hdr,
123 const struct ip6_header *ip6_hdr,
124 size_t full_ip6hdr_len,
125 const struct tcp_header *tcp)
126{
127 uint32_t l4len;
128
129 if (is_ip4) {
130 l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
131 } else {
132 size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
133 l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
134 }
135
136 return l4len > TCP_HEADER_DATA_OFFSET(tcp);
137}
138
139void eth_get_protocols(const struct iovec *iov, int iovcnt,
75020a70 140 bool *isip4, bool *isip6,
eb700029
DF
141 bool *isudp, bool *istcp,
142 size_t *l3hdr_off,
143 size_t *l4hdr_off,
144 size_t *l5hdr_off,
145 eth_ip6_hdr_info *ip6hdr_info,
146 eth_ip4_hdr_info *ip4hdr_info,
147 eth_l4_hdr_info *l4hdr_info)
75020a70
DF
148{
149 int proto;
eb700029
DF
150 bool fragment = false;
151 size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
152 size_t input_size = iov_size(iov, iovcnt);
153 size_t copied;
154
75020a70
DF
155 *isip4 = *isip6 = *isudp = *istcp = false;
156
eb700029
DF
157 proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
158
159 *l3hdr_off = l2hdr_len;
160
75020a70 161 if (proto == ETH_P_IP) {
eb700029 162 struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
75020a70 163
eb700029
DF
164 if (input_size < l2hdr_len) {
165 return;
166 }
167
168 copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
75020a70 169
eb700029 170 *isip4 = true;
75020a70 171
eb700029
DF
172 if (copied < sizeof(*iphdr)) {
173 return;
174 }
75020a70
DF
175
176 if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
177 if (iphdr->ip_p == IP_PROTO_TCP) {
178 *istcp = true;
179 } else if (iphdr->ip_p == IP_PROTO_UDP) {
180 *isudp = true;
181 }
182 }
75020a70 183
eb700029
DF
184 ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
185 *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
186
187 fragment = ip4hdr_info->fragment;
188 } else if (proto == ETH_P_IPV6) {
75020a70
DF
189
190 *isip6 = true;
eb700029
DF
191 if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
192 ip6hdr_info)) {
193 if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
75020a70 194 *istcp = true;
eb700029 195 } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
75020a70
DF
196 *isudp = true;
197 }
eb700029
DF
198 } else {
199 return;
200 }
201
202 *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
203 fragment = ip6hdr_info->fragment;
204 }
205
206 if (!fragment) {
207 if (*istcp) {
208 *istcp = _eth_copy_chunk(input_size,
209 iov, iovcnt,
210 *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
211 &l4hdr_info->hdr.tcp);
212
2c5e564f 213 if (*istcp) {
eb700029
DF
214 *l5hdr_off = *l4hdr_off +
215 TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
216
217 l4hdr_info->has_tcp_data =
218 _eth_tcp_has_data(proto == ETH_P_IP,
219 &ip4hdr_info->ip4_hdr,
220 &ip6hdr_info->ip6_hdr,
221 *l4hdr_off - *l3hdr_off,
222 &l4hdr_info->hdr.tcp);
223 }
224 } else if (*isudp) {
225 *isudp = _eth_copy_chunk(input_size,
226 iov, iovcnt,
227 *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
228 &l4hdr_info->hdr.udp);
229 *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
230 }
231 }
232}
233
566342c3 234size_t
eb700029
DF
235eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
236 uint8_t *new_ehdr_buf,
237 uint16_t *payload_offset, uint16_t *tci)
238{
239 struct vlan_header vlan_hdr;
240 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
241
242 size_t copied = iov_to_buf(iov, iovcnt, iovoff,
243 new_ehdr, sizeof(*new_ehdr));
244
245 if (copied < sizeof(*new_ehdr)) {
566342c3 246 return 0;
eb700029
DF
247 }
248
249 switch (be16_to_cpu(new_ehdr->h_proto)) {
250 case ETH_P_VLAN:
251 case ETH_P_DVLAN:
252 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
253 &vlan_hdr, sizeof(vlan_hdr));
254
255 if (copied < sizeof(vlan_hdr)) {
566342c3 256 return 0;
eb700029
DF
257 }
258
259 new_ehdr->h_proto = vlan_hdr.h_proto;
260
261 *tci = be16_to_cpu(vlan_hdr.h_tci);
262 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
263
264 if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
265
266 copied = iov_to_buf(iov, iovcnt, *payload_offset,
267 PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
268
269 if (copied < sizeof(vlan_hdr)) {
566342c3 270 return 0;
eb700029
DF
271 }
272
273 *payload_offset += sizeof(vlan_hdr);
566342c3
DF
274
275 return sizeof(struct eth_header) + sizeof(struct vlan_header);
276 } else {
277 return sizeof(struct eth_header);
eb700029 278 }
eb700029 279 default:
566342c3 280 return 0;
eb700029
DF
281 }
282}
283
566342c3 284size_t
eb700029
DF
285eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
286 uint16_t vet, uint8_t *new_ehdr_buf,
287 uint16_t *payload_offset, uint16_t *tci)
288{
289 struct vlan_header vlan_hdr;
290 struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
291
292 size_t copied = iov_to_buf(iov, iovcnt, iovoff,
293 new_ehdr, sizeof(*new_ehdr));
294
295 if (copied < sizeof(*new_ehdr)) {
566342c3 296 return 0;
eb700029
DF
297 }
298
299 if (be16_to_cpu(new_ehdr->h_proto) == vet) {
300 copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
301 &vlan_hdr, sizeof(vlan_hdr));
302
303 if (copied < sizeof(vlan_hdr)) {
566342c3 304 return 0;
75020a70 305 }
eb700029
DF
306
307 new_ehdr->h_proto = vlan_hdr.h_proto;
308
309 *tci = be16_to_cpu(vlan_hdr.h_tci);
310 *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
566342c3 311 return sizeof(struct eth_header);
75020a70 312 }
eb700029 313
566342c3 314 return 0;
75020a70
DF
315}
316
75020a70
DF
317void
318eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
319{
320 struct ip_header *iphdr = (struct ip_header *) l3hdr;
321 iphdr->ip_sum = 0;
322 iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
323}
324
325uint32_t
eb700029
DF
326eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
327 uint16_t csl,
328 uint32_t *cso)
75020a70
DF
329{
330 struct ip_pseudo_header ipph;
331 ipph.ip_src = iphdr->ip_src;
332 ipph.ip_dst = iphdr->ip_dst;
333 ipph.ip_payload = cpu_to_be16(csl);
334 ipph.ip_proto = iphdr->ip_p;
335 ipph.zeros = 0;
eb700029
DF
336 *cso = sizeof(ipph);
337 return net_checksum_add(*cso, (uint8_t *) &ipph);
338}
339
340uint32_t
341eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
342 uint16_t csl,
343 uint8_t l4_proto,
344 uint32_t *cso)
345{
346 struct ip6_pseudo_header ipph;
347 ipph.ip6_src = iphdr->ip6_src;
348 ipph.ip6_dst = iphdr->ip6_dst;
349 ipph.len = cpu_to_be16(csl);
350 ipph.zero[0] = 0;
351 ipph.zero[1] = 0;
352 ipph.zero[2] = 0;
353 ipph.next_hdr = l4_proto;
354 *cso = sizeof(ipph);
355 return net_checksum_add(*cso, (uint8_t *)&ipph);
75020a70
DF
356}
357
358static bool
359eth_is_ip6_extension_header_type(uint8_t hdr_type)
360{
361 switch (hdr_type) {
362 case IP6_HOP_BY_HOP:
363 case IP6_ROUTING:
364 case IP6_FRAGMENT:
75020a70
DF
365 case IP6_AUTHENTICATION:
366 case IP6_DESTINATON:
367 case IP6_MOBILITY:
368 return true;
369 default:
370 return false;
371 }
372}
373
eb700029
DF
374static bool
375_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
dbd8d3f9 376 size_t ext_hdr_offset,
eb700029
DF
377 struct ip6_ext_hdr *ext_hdr,
378 struct in6_address *dst_addr)
379{
7d6a4f12 380 struct ip6_ext_hdr_routing rt_hdr;
6f10f77d
PMD
381 size_t input_size = iov_size(pkt, pkt_frags);
382 size_t bytes_read;
eb700029 383
7d6a4f12 384 if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
6f10f77d
PMD
385 return false;
386 }
eb700029 387
7d6a4f12
PMD
388 bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
389 &rt_hdr, sizeof(rt_hdr));
390 assert(bytes_read == sizeof(rt_hdr));
c7274b5e
PMD
391 if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
392 return false;
eb700029 393 }
c7274b5e
PMD
394 bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
395 dst_addr, sizeof(*dst_addr));
396 assert(bytes_read == sizeof(*dst_addr));
eb700029 397
c7274b5e 398 return true;
eb700029
DF
399}
400
401static bool
402_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
403 size_t dsthdr_offset,
404 struct ip6_ext_hdr *ext_hdr,
405 struct in6_address *src_addr)
406{
407 size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
408 struct ip6_option_hdr opthdr;
409 size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
410
411 while (bytes_left > sizeof(opthdr)) {
412 size_t input_size = iov_size(pkt, pkt_frags);
413 size_t bytes_read, optlen;
414
415 if (input_size < opt_offset) {
416 return false;
417 }
418
419 bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
420 &opthdr, sizeof(opthdr));
421
422 if (bytes_read != sizeof(opthdr)) {
423 return false;
424 }
425
426 optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
427 : (opthdr.len + sizeof(opthdr));
428
429 if (optlen > bytes_left) {
430 return false;
431 }
432
433 if (opthdr.type == IP6_OPT_HOME) {
434 size_t input_size = iov_size(pkt, pkt_frags);
435
436 if (input_size < opt_offset + sizeof(opthdr)) {
437 return false;
438 }
439
440 bytes_read = iov_to_buf(pkt, pkt_frags,
441 opt_offset + sizeof(opthdr),
4555ca68 442 src_addr, sizeof(*src_addr));
eb700029 443
b2caa3b8 444 return bytes_read == sizeof(*src_addr);
eb700029
DF
445 }
446
447 opt_offset += optlen;
448 bytes_left -= optlen;
449 }
450
451 return false;
452}
453
454bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
455 size_t ip6hdr_off, eth_ip6_hdr_info *info)
75020a70 456{
75020a70
DF
457 struct ip6_ext_hdr ext_hdr;
458 size_t bytes_read;
eb700029
DF
459 uint8_t curr_ext_hdr_type;
460 size_t input_size = iov_size(pkt, pkt_frags);
461
462 info->rss_ex_dst_valid = false;
463 info->rss_ex_src_valid = false;
464 info->fragment = false;
465
466 if (input_size < ip6hdr_off) {
467 return false;
468 }
75020a70
DF
469
470 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
eb700029
DF
471 &info->ip6_hdr, sizeof(info->ip6_hdr));
472 if (bytes_read < sizeof(info->ip6_hdr)) {
75020a70
DF
473 return false;
474 }
475
eb700029
DF
476 info->full_hdr_len = sizeof(struct ip6_header);
477
478 curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
75020a70 479
eb700029
DF
480 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
481 info->l4proto = info->ip6_hdr.ip6_nxt;
482 info->has_ext_hdrs = false;
75020a70
DF
483 return true;
484 }
485
eb700029
DF
486 info->has_ext_hdrs = true;
487
75020a70 488 do {
eb700029
DF
489 if (input_size < ip6hdr_off + info->full_hdr_len) {
490 return false;
491 }
492
493 bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
75020a70 494 &ext_hdr, sizeof(ext_hdr));
75020a70 495
eb700029
DF
496 if (bytes_read < sizeof(ext_hdr)) {
497 return false;
498 }
499
500 if (curr_ext_hdr_type == IP6_ROUTING) {
38462440
PMD
501 if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
502 info->rss_ex_dst_valid =
503 _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
504 ip6hdr_off + info->full_hdr_len,
505 &ext_hdr, &info->rss_ex_dst);
506 }
eb700029
DF
507 } else if (curr_ext_hdr_type == IP6_DESTINATON) {
508 info->rss_ex_src_valid =
509 _eth_get_rss_ex_src_addr(pkt, pkt_frags,
510 ip6hdr_off + info->full_hdr_len,
511 &ext_hdr, &info->rss_ex_src);
512 } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
513 info->fragment = true;
514 }
515
516 info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
517 curr_ext_hdr_type = ext_hdr.ip6r_nxt;
518 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
519
520 info->l4proto = ext_hdr.ip6r_nxt;
75020a70
DF
521 return true;
522}
af774513
BM
523
524bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
525 const void *pkt, size_t pkt_size)
526{
527 assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
528
529 if (pkt_size >= ETH_ZLEN) {
530 return false;
531 }
532
533 /* pad to minimum Ethernet frame length */
534 memcpy(padded_pkt, pkt, pkt_size);
535 memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
536 *padded_buflen = ETH_ZLEN;
537
538 return true;
539}