]> git.ipfire.org Git - thirdparty/linux.git/blame - net/openvswitch/datapath.c
Linux 6.16-rc5
[thirdparty/linux.git] / net / openvswitch / datapath.c
CommitLineData
c9422999 1// SPDX-License-Identifier: GPL-2.0-only
ccb1352e 2/*
ad552007 3 * Copyright (c) 2007-2014 Nicira, Inc.
ccb1352e
JG
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/if_arp.h>
11#include <linux/if_vlan.h>
12#include <linux/in.h>
13#include <linux/ip.h>
14#include <linux/jhash.h>
15#include <linux/delay.h>
16#include <linux/time.h>
17#include <linux/etherdevice.h>
ccb1352e
JG
18#include <linux/kernel.h>
19#include <linux/kthread.h>
20#include <linux/mutex.h>
21#include <linux/percpu.h>
22#include <linux/rcupdate.h>
23#include <linux/tcp.h>
24#include <linux/udp.h>
ccb1352e
JG
25#include <linux/ethtool.h>
26#include <linux/wait.h>
ccb1352e
JG
27#include <asm/div64.h>
28#include <linux/highmem.h>
29#include <linux/netfilter_bridge.h>
30#include <linux/netfilter_ipv4.h>
31#include <linux/inetdevice.h>
32#include <linux/list.h>
33#include <linux/openvswitch.h>
34#include <linux/rculist.h>
35#include <linux/dmi.h>
ccb1352e 36#include <net/genetlink.h>
d457a0e3 37#include <net/gso.h>
46df7b81
PS
38#include <net/net_namespace.h>
39#include <net/netns/generic.h>
35d39fec 40#include <net/pkt_cls.h>
ccb1352e
JG
41
42#include "datapath.h"
9d802da4 43#include "drop.h"
ccb1352e 44#include "flow.h"
e80857cc 45#include "flow_table.h"
e6445719 46#include "flow_netlink.h"
96fbc13d 47#include "meter.h"
c4ab7b56 48#include "openvswitch_trace.h"
ccb1352e 49#include "vport-internal_dev.h"
cff63a52 50#include "vport-netdev.h"
ccb1352e 51
c7d03a00 52unsigned int ovs_net_id __read_mostly;
8e4e1713 53
0c200ef9
PS
54static struct genl_family dp_packet_genl_family;
55static struct genl_family dp_flow_genl_family;
56static struct genl_family dp_datapath_genl_family;
57
74ed7ab9
JS
58static const struct nla_policy flow_policy[];
59
48e48a70 60static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
61 .name = OVS_FLOW_MCGROUP,
0c200ef9
PS
62};
63
48e48a70 64static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
65 .name = OVS_DATAPATH_MCGROUP,
0c200ef9
PS
66};
67
48e48a70 68static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
69 .name = OVS_VPORT_MCGROUP,
0c200ef9
PS
70};
71
fb5d1e9e
JR
72/* Check if need to build a reply message.
73 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
9b67aa4a
SG
74static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
75 unsigned int group)
fb5d1e9e
JR
76{
77 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
f8403a2e 78 genl_has_listeners(family, genl_info_net(info), group);
fb5d1e9e
JR
79}
80
68eb5503 81static void ovs_notify(struct genl_family *family,
2a94fe48 82 struct sk_buff *skb, struct genl_info *info)
ed661185 83{
92c14d9b 84 genl_notify(family, skb, info, 0, GFP_KERNEL);
ed661185
TG
85}
86
ccb1352e
JG
87/**
88 * DOC: Locking:
89 *
8e4e1713
PS
90 * All writes e.g. Writes to device state (add/remove datapath, port, set
91 * operations on vports, etc.), Writes to other state (flow table
92 * modifications, set miscellaneous datapath parameters, etc.) are protected
93 * by ovs_lock.
ccb1352e
JG
94 *
95 * Reads are protected by RCU.
96 *
97 * There are a few special cases (mostly stats) that have their own
98 * synchronization but they nest under all of above and don't interact with
99 * each other.
8e4e1713
PS
100 *
101 * The RTNL lock nests inside ovs_mutex.
ccb1352e
JG
102 */
103
8e4e1713
PS
104static DEFINE_MUTEX(ovs_mutex);
105
106void ovs_lock(void)
107{
108 mutex_lock(&ovs_mutex);
109}
110
111void ovs_unlock(void)
112{
113 mutex_unlock(&ovs_mutex);
114}
115
116#ifdef CONFIG_LOCKDEP
117int lockdep_ovsl_is_held(void)
118{
119 if (debug_locks)
120 return lockdep_is_held(&ovs_mutex);
121 else
122 return 1;
123}
124#endif
125
ccb1352e 126static struct vport *new_vport(const struct vport_parms *);
8055a89c 127static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
e8eedb85 128 const struct sw_flow_key *,
f2a4d086
WT
129 const struct dp_upcall_info *,
130 uint32_t cutlen);
8055a89c 131static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
e8eedb85 132 const struct sw_flow_key *,
f2a4d086
WT
133 const struct dp_upcall_info *,
134 uint32_t cutlen);
ccb1352e 135
eac87c41
EC
136static void ovs_dp_masks_rebalance(struct work_struct *work);
137
b83d23a2
MG
138static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
139
8e4e1713 140/* Must be called with rcu_read_lock or ovs_mutex. */
971427f3 141const char *ovs_dp_name(const struct datapath *dp)
ccb1352e 142{
8e4e1713 143 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
c9db965c 144 return ovs_vport_name(vport);
ccb1352e
JG
145}
146
12eb18f7 147static int get_dpifindex(const struct datapath *dp)
ccb1352e
JG
148{
149 struct vport *local;
150 int ifindex;
151
152 rcu_read_lock();
153
15eac2a7 154 local = ovs_vport_rcu(dp, OVSP_LOCAL);
ccb1352e 155 if (local)
be4ace6e 156 ifindex = local->dev->ifindex;
ccb1352e
JG
157 else
158 ifindex = 0;
159
160 rcu_read_unlock();
161
162 return ifindex;
163}
164
165static void destroy_dp_rcu(struct rcu_head *rcu)
166{
167 struct datapath *dp = container_of(rcu, struct datapath, rcu);
168
9b996e54 169 ovs_flow_tbl_destroy(&dp->table);
ccb1352e 170 free_percpu(dp->stats_percpu);
15eac2a7 171 kfree(dp->ports);
96fbc13d 172 ovs_meters_exit(dp);
076999e4 173 kfree(rcu_dereference_raw(dp->upcall_portids));
ccb1352e
JG
174 kfree(dp);
175}
176
15eac2a7
PS
177static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
178 u16 port_no)
179{
180 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
181}
182
bb6f9a70 183/* Called with ovs_mutex or RCU read lock. */
15eac2a7
PS
184struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
185{
186 struct vport *vport;
15eac2a7
PS
187 struct hlist_head *head;
188
189 head = vport_hash_bucket(dp, port_no);
53742e69 190 hlist_for_each_entry_rcu(vport, head, dp_hash_node,
cf3266ad 191 lockdep_ovsl_is_held()) {
15eac2a7
PS
192 if (vport->port_no == port_no)
193 return vport;
194 }
195 return NULL;
196}
197
8e4e1713 198/* Called with ovs_mutex. */
ccb1352e
JG
199static struct vport *new_vport(const struct vport_parms *parms)
200{
201 struct vport *vport;
202
203 vport = ovs_vport_add(parms);
204 if (!IS_ERR(vport)) {
205 struct datapath *dp = parms->dp;
15eac2a7 206 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
ccb1352e 207
15eac2a7 208 hlist_add_head_rcu(&vport->dp_hash_node, head);
ccb1352e 209 }
ccb1352e
JG
210 return vport;
211}
212
1933ea36 213static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
214 const struct dp_upcall_info *upcall_info,
215 bool upcall_result)
216{
217 struct vport *p = OVS_CB(skb)->input_vport;
218 struct vport_upcall_stats_percpu *stats;
219
220 if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
221 upcall_info->cmd != OVS_PACKET_CMD_ACTION)
222 return;
223
224 stats = this_cpu_ptr(p->upcall_stats);
225 u64_stats_update_begin(&stats->syncp);
226 if (upcall_result)
227 u64_stats_inc(&stats->n_success);
228 else
229 u64_stats_inc(&stats->n_fail);
230 u64_stats_update_end(&stats->syncp);
231}
232
ccb1352e
JG
233void ovs_dp_detach_port(struct vport *p)
234{
8e4e1713 235 ASSERT_OVSL();
ccb1352e
JG
236
237 /* First drop references to device. */
15eac2a7 238 hlist_del_rcu(&p->dp_hash_node);
ccb1352e
JG
239
240 /* Then destroy it. */
241 ovs_vport_del(p);
242}
243
244/* Must be called with rcu_read_lock. */
8c8b1b83 245void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ccb1352e 246{
7b4ac12c 247 struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
83c8df26 248 const struct vport *p = OVS_CB(skb)->input_vport;
ccb1352e
JG
249 struct datapath *dp = p->dp;
250 struct sw_flow *flow;
d98612b8 251 struct sw_flow_actions *sf_acts;
ccb1352e 252 struct dp_stats_percpu *stats;
67231833 253 bool ovs_pcpu_locked = false;
ccb1352e 254 u64 *stats_counter;
1bd7116f 255 u32 n_mask_hit;
9d2f627b 256 u32 n_cache_hit;
aa733660 257 int error;
ccb1352e 258
404f2f10 259 stats = this_cpu_ptr(dp->stats_percpu);
ccb1352e 260
ccb1352e 261 /* Look up flow. */
04b7d136 262 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
9d2f627b 263 &n_mask_hit, &n_cache_hit);
ccb1352e
JG
264 if (unlikely(!flow)) {
265 struct dp_upcall_info upcall;
266
ccea7445 267 memset(&upcall, 0, sizeof(upcall));
ccb1352e 268 upcall.cmd = OVS_PACKET_CMD_MISS;
b83d23a2
MG
269
270 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
784dcfa5
MG
271 upcall.portid =
272 ovs_dp_get_upcall_portid(dp, smp_processor_id());
b83d23a2
MG
273 else
274 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
275
7f8a436e 276 upcall.mru = OVS_CB(skb)->mru;
f2a4d086 277 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
1100248a
MP
278 switch (error) {
279 case 0:
280 case -EAGAIN:
281 case -ERESTARTSYS:
282 case -EINTR:
c5eba0b6 283 consume_skb(skb);
1100248a
MP
284 break;
285 default:
286 kfree_skb(skb);
287 break;
288 }
ccb1352e
JG
289 stats_counter = &stats->n_missed;
290 goto out;
291 }
292
d98612b8
LJ
293 ovs_flow_stats_update(flow, key->tp.flags, skb);
294 sf_acts = rcu_dereference(flow->sf_acts);
67231833
SAS
295 /* This path can be invoked recursively: Use the current task to
296 * identify recursive invocation - the lock must be acquired only once.
297 * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
298 * Limit the locking to RT to avoid assigning `owner' if it can be
299 * avoided.
300 */
301 if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
7b4ac12c 302 local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
67231833
SAS
303 ovs_pcpu->owner = current;
304 ovs_pcpu_locked = true;
305 }
306
aa733660
YS
307 error = ovs_execute_actions(dp, skb, sf_acts, key);
308 if (unlikely(error))
309 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
cf3266ad 310 ovs_dp_name(dp), error);
67231833
SAS
311 if (ovs_pcpu_locked) {
312 ovs_pcpu->owner = NULL;
7b4ac12c 313 local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
67231833 314 }
ccb1352e 315
e298e505 316 stats_counter = &stats->n_hit;
ccb1352e
JG
317
318out:
319 /* Update datapath statistics. */
df9d9fdf 320 u64_stats_update_begin(&stats->syncp);
ccb1352e 321 (*stats_counter)++;
1bd7116f 322 stats->n_mask_hit += n_mask_hit;
9d2f627b 323 stats->n_cache_hit += n_cache_hit;
df9d9fdf 324 u64_stats_update_end(&stats->syncp);
ccb1352e
JG
325}
326
ccb1352e 327int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
e8eedb85 328 const struct sw_flow_key *key,
f2a4d086
WT
329 const struct dp_upcall_info *upcall_info,
330 uint32_t cutlen)
ccb1352e
JG
331{
332 struct dp_stats_percpu *stats;
ccb1352e
JG
333 int err;
334
c4ab7b56
AC
335 if (trace_ovs_dp_upcall_enabled())
336 trace_ovs_dp_upcall(dp, skb, key, upcall_info);
337
15e47304 338 if (upcall_info->portid == 0) {
ccb1352e
JG
339 err = -ENOTCONN;
340 goto err;
341 }
342
ccb1352e 343 if (!skb_is_gso(skb))
f2a4d086 344 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
ccb1352e 345 else
f2a4d086 346 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
1933ea36 347
348 ovs_vport_update_upcall_stats(skb, upcall_info, !err);
ccb1352e
JG
349 if (err)
350 goto err;
351
352 return 0;
353
354err:
404f2f10 355 stats = this_cpu_ptr(dp->stats_percpu);
ccb1352e 356
df9d9fdf 357 u64_stats_update_begin(&stats->syncp);
ccb1352e 358 stats->n_lost++;
df9d9fdf 359 u64_stats_update_end(&stats->syncp);
ccb1352e
JG
360
361 return err;
362}
363
8055a89c 364static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
e8eedb85 365 const struct sw_flow_key *key,
f2a4d086 366 const struct dp_upcall_info *upcall_info,
cf3266ad 367 uint32_t cutlen)
ccb1352e 368{
2734166e 369 unsigned int gso_type = skb_shinfo(skb)->gso_type;
0c19f846 370 struct sw_flow_key later_key;
ccb1352e
JG
371 struct sk_buff *segs, *nskb;
372 int err;
373
a08e7fd9 374 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
09c5e605 375 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
92e5dfc3
PS
376 if (IS_ERR(segs))
377 return PTR_ERR(segs);
330966e5
FW
378 if (segs == NULL)
379 return -EINVAL;
ccb1352e 380
0c19f846
WB
381 if (gso_type & SKB_GSO_UDP) {
382 /* The initial flow key extracted by ovs_flow_key_extract()
383 * in this case is for a first fragment, so we need to
384 * properly mark later fragments.
385 */
386 later_key = *key;
387 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
388 }
389
ccb1352e 390 /* Queue all of the segments. */
2cec4448 391 skb_list_walk_safe(segs, skb, nskb) {
0c19f846
WB
392 if (gso_type & SKB_GSO_UDP && skb != segs)
393 key = &later_key;
394
f2a4d086 395 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
ccb1352e
JG
396 if (err)
397 break;
398
2cec4448 399 }
ccb1352e
JG
400
401 /* Free all of the segments. */
2cec4448 402 skb_list_walk_safe(segs, skb, nskb) {
ccb1352e
JG
403 if (err)
404 kfree_skb(skb);
405 else
406 consume_skb(skb);
2cec4448 407 }
ccb1352e
JG
408 return err;
409}
410
8f0aad6f 411static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
494bea39 412 unsigned int hdrlen, int actions_attrlen)
c3ff8cfe
TG
413{
414 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
bda56f14 415 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
b95e5928 416 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
bd1903b7
TZ
417 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
418 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
c3ff8cfe
TG
419
420 /* OVS_PACKET_ATTR_USERDATA */
8f0aad6f
WZ
421 if (upcall_info->userdata)
422 size += NLA_ALIGN(upcall_info->userdata->nla_len);
423
424 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
425 if (upcall_info->egress_tun_info)
426 size += nla_total_size(ovs_tun_key_attr_size());
c3ff8cfe 427
ccea7445
NM
428 /* OVS_PACKET_ATTR_ACTIONS */
429 if (upcall_info->actions_len)
494bea39 430 size += nla_total_size(actions_attrlen);
ccea7445 431
7f8a436e
JS
432 /* OVS_PACKET_ATTR_MRU */
433 if (upcall_info->mru)
434 size += nla_total_size(sizeof(upcall_info->mru));
435
c3ff8cfe
TG
436 return size;
437}
438
7f8a436e
JS
439static void pad_packet(struct datapath *dp, struct sk_buff *skb)
440{
441 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
442 size_t plen = NLA_ALIGN(skb->len) - skb->len;
443
444 if (plen > 0)
b080db58 445 skb_put_zero(skb, plen);
7f8a436e
JS
446 }
447}
448
8055a89c 449static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
e8eedb85 450 const struct sw_flow_key *key,
f2a4d086
WT
451 const struct dp_upcall_info *upcall_info,
452 uint32_t cutlen)
ccb1352e
JG
453{
454 struct ovs_header *upcall;
455 struct sk_buff *nskb = NULL;
4ee45ea0 456 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
ccb1352e 457 struct nlattr *nla;
795449d8 458 size_t len;
bda56f14 459 unsigned int hlen;
8055a89c 460 int err, dp_ifindex;
bd1903b7 461 u64 hash;
8055a89c
TG
462
463 dp_ifindex = get_dpifindex(dp);
464 if (!dp_ifindex)
465 return -ENODEV;
ccb1352e 466
df8a39de 467 if (skb_vlan_tag_present(skb)) {
ccb1352e
JG
468 nskb = skb_clone(skb, GFP_ATOMIC);
469 if (!nskb)
470 return -ENOMEM;
471
5968250c 472 nskb = __vlan_hwaccel_push_inside(nskb);
8aa51d64 473 if (!nskb)
ccb1352e
JG
474 return -ENOMEM;
475
ccb1352e
JG
476 skb = nskb;
477 }
478
479 if (nla_attr_size(skb->len) > USHRT_MAX) {
480 err = -EFBIG;
481 goto out;
482 }
483
bda56f14
TG
484 /* Complete checksum if needed */
485 if (skb->ip_summed == CHECKSUM_PARTIAL &&
7529390d 486 (err = skb_csum_hwoffload_help(skb, 0)))
bda56f14
TG
487 goto out;
488
489 /* Older versions of OVS user space enforce alignment of the last
490 * Netlink attribute to NLA_ALIGNTO which would require extensive
491 * padding logic. Only perform zerocopy if padding is not required.
492 */
493 if (dp->user_features & OVS_DP_F_UNALIGNED)
494 hlen = skb_zerocopy_headlen(skb);
495 else
496 hlen = skb->len;
497
494bea39
LZ
498 len = upcall_msg_size(upcall_info, hlen - cutlen,
499 OVS_CB(skb)->acts_origlen);
551ddc05 500 user_skb = genlmsg_new(len, GFP_ATOMIC);
ccb1352e
JG
501 if (!user_skb) {
502 err = -ENOMEM;
503 goto out;
504 }
505
506 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
507 0, upcall_info->cmd);
6f19893b
KL
508 if (!upcall) {
509 err = -EINVAL;
510 goto out;
511 }
ccb1352e
JG
512 upcall->dp_ifindex = dp_ifindex;
513
5b4237bb 514 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
a734d1f4
EC
515 if (err)
516 goto out;
ccb1352e
JG
517
518 if (upcall_info->userdata)
4490108b
BP
519 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
520 nla_len(upcall_info->userdata),
521 nla_data(upcall_info->userdata));
ccb1352e 522
8f0aad6f 523 if (upcall_info->egress_tun_info) {
ae0be8de
MK
524 nla = nla_nest_start_noflag(user_skb,
525 OVS_PACKET_ATTR_EGRESS_TUN_KEY);
0fff9bd4
KL
526 if (!nla) {
527 err = -EMSGSIZE;
528 goto out;
529 }
fc4099f1
PS
530 err = ovs_nla_put_tunnel_info(user_skb,
531 upcall_info->egress_tun_info);
a734d1f4
EC
532 if (err)
533 goto out;
534
8f0aad6f
WZ
535 nla_nest_end(user_skb, nla);
536 }
537
ccea7445 538 if (upcall_info->actions_len) {
ae0be8de 539 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
0fff9bd4
KL
540 if (!nla) {
541 err = -EMSGSIZE;
542 goto out;
543 }
ccea7445
NM
544 err = ovs_nla_put_actions(upcall_info->actions,
545 upcall_info->actions_len,
546 user_skb);
547 if (!err)
548 nla_nest_end(user_skb, nla);
549 else
550 nla_nest_cancel(user_skb, nla);
551 }
552
7f8a436e 553 /* Add OVS_PACKET_ATTR_MRU */
61ca533c
TZ
554 if (upcall_info->mru &&
555 nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
556 err = -ENOBUFS;
557 goto out;
7f8a436e
JS
558 }
559
b95e5928 560 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
61ca533c
TZ
561 if (cutlen > 0 &&
562 nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
563 err = -ENOBUFS;
564 goto out;
b95e5928
WT
565 }
566
bd1903b7
TZ
567 /* Add OVS_PACKET_ATTR_HASH */
568 hash = skb_get_hash_raw(skb);
569 if (skb->sw_hash)
570 hash |= OVS_PACKET_HASH_SW_BIT;
571
572 if (skb->l4_hash)
573 hash |= OVS_PACKET_HASH_L4_BIT;
574
575 if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
576 err = -ENOBUFS;
577 goto out;
578 }
579
bda56f14
TG
580 /* Only reserve room for attribute header, packet data is added
581 * in skb_zerocopy() */
582 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
583 err = -ENOBUFS;
584 goto out;
585 }
f2a4d086 586 nla->nla_len = nla_attr_size(skb->len - cutlen);
ccb1352e 587
f2a4d086 588 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
36d5fe6a
ZK
589 if (err)
590 goto out;
ccb1352e 591
aea0bb4f 592 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
7f8a436e 593 pad_packet(dp, user_skb);
aea0bb4f 594
bda56f14 595 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
ccb1352e 596
bda56f14 597 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
4ee45ea0 598 user_skb = NULL;
ccb1352e 599out:
36d5fe6a
ZK
600 if (err)
601 skb_tx_error(skb);
c21ab2af
MP
602 consume_skb(user_skb);
603 consume_skb(nskb);
604
ccb1352e
JG
605 return err;
606}
607
ccb1352e
JG
608static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
609{
bffcc688 610 struct ovs_header *ovs_header = genl_info_userhdr(info);
7f8a436e 611 struct net *net = sock_net(skb->sk);
ccb1352e
JG
612 struct nlattr **a = info->attrs;
613 struct sw_flow_actions *acts;
614 struct sk_buff *packet;
615 struct sw_flow *flow;
d98612b8 616 struct sw_flow_actions *sf_acts;
ccb1352e 617 struct datapath *dp;
83c8df26 618 struct vport *input_vport;
7f8a436e 619 u16 mru = 0;
bd1903b7 620 u64 hash;
ccb1352e
JG
621 int len;
622 int err;
1ba39804 623 bool log = !a[OVS_PACKET_ATTR_PROBE];
ccb1352e
JG
624
625 err = -EINVAL;
626 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
dded45fc 627 !a[OVS_PACKET_ATTR_ACTIONS])
ccb1352e
JG
628 goto err;
629
630 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
631 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
632 err = -ENOMEM;
633 if (!packet)
634 goto err;
635 skb_reserve(packet, NET_IP_ALIGN);
636
32686a9d 637 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
ccb1352e 638
7f8a436e
JS
639 /* Set packet's mru */
640 if (a[OVS_PACKET_ATTR_MRU]) {
641 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
642 packet->ignore_df = 1;
643 }
644 OVS_CB(packet)->mru = mru;
645
bd1903b7
TZ
646 if (a[OVS_PACKET_ATTR_HASH]) {
647 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
648
649 __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
650 !!(hash & OVS_PACKET_HASH_SW_BIT),
651 !!(hash & OVS_PACKET_HASH_L4_BIT));
652 }
653
ccb1352e 654 /* Build an sw_flow for sending this packet. */
23dabf88 655 flow = ovs_flow_alloc();
ccb1352e
JG
656 err = PTR_ERR(flow);
657 if (IS_ERR(flow))
658 goto err_kfree_skb;
659
c2ac6673
JS
660 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
661 packet, &flow->key, log);
ccb1352e
JG
662 if (err)
663 goto err_flow_free;
664
7f8a436e 665 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
05da5898 666 &flow->key, &acts, log);
74f84a57
PS
667 if (err)
668 goto err_flow_free;
ccb1352e 669
f5796684 670 rcu_assign_pointer(flow->sf_acts, acts);
ccb1352e 671 packet->priority = flow->key.phy.priority;
39c7caeb 672 packet->mark = flow->key.phy.skb_mark;
ccb1352e
JG
673
674 rcu_read_lock();
7f8a436e 675 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
ccb1352e
JG
676 err = -ENODEV;
677 if (!dp)
678 goto err_unlock;
679
83c8df26
PS
680 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
681 if (!input_vport)
682 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
683
684 if (!input_vport)
685 goto err_unlock;
686
7f8a436e 687 packet->dev = input_vport->dev;
83c8df26 688 OVS_CB(packet)->input_vport = input_vport;
d98612b8 689 sf_acts = rcu_dereference(flow->sf_acts);
83c8df26 690
ccb1352e 691 local_bh_disable();
7b4ac12c 692 local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
67231833 693 if (IS_ENABLED(CONFIG_PREEMPT_RT))
7b4ac12c 694 this_cpu_write(ovs_pcpu_storage->owner, current);
d98612b8 695 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
67231833 696 if (IS_ENABLED(CONFIG_PREEMPT_RT))
7b4ac12c
SAS
697 this_cpu_write(ovs_pcpu_storage->owner, NULL);
698 local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
ccb1352e
JG
699 local_bh_enable();
700 rcu_read_unlock();
701
03f0d916 702 ovs_flow_free(flow, false);
ccb1352e
JG
703 return err;
704
705err_unlock:
706 rcu_read_unlock();
707err_flow_free:
03f0d916 708 ovs_flow_free(flow, false);
ccb1352e
JG
709err_kfree_skb:
710 kfree_skb(packet);
711err:
712 return err;
713}
714
715static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
dded45fc 716 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
ccb1352e
JG
717 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
718 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
1ba39804 719 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
7f8a436e 720 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
b5ab1f1b 721 [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
ccb1352e
JG
722};
723
66a9b928 724static const struct genl_small_ops dp_packet_genl_ops[] = {
ccb1352e 725 { .cmd = OVS_PACKET_CMD_EXECUTE,
ef6243ac 726 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 727 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
728 .doit = ovs_packet_cmd_execute
729 }
730};
731
56989f6d 732static struct genl_family dp_packet_genl_family __ro_after_init = {
0c200ef9
PS
733 .hdrsize = sizeof(struct ovs_header),
734 .name = OVS_PACKET_FAMILY,
735 .version = OVS_PACKET_VERSION,
736 .maxattr = OVS_PACKET_ATTR_MAX,
3b0f31f2 737 .policy = packet_policy,
0c200ef9
PS
738 .netnsok = true,
739 .parallel_ops = true,
66a9b928
JK
740 .small_ops = dp_packet_genl_ops,
741 .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
9c5d03d3 742 .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
489111e5 743 .module = THIS_MODULE,
0c200ef9
PS
744};
745
12eb18f7 746static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
1bd7116f 747 struct ovs_dp_megaflow_stats *mega_stats)
ccb1352e
JG
748{
749 int i;
ccb1352e 750
1bd7116f
AZ
751 memset(mega_stats, 0, sizeof(*mega_stats));
752
b637e498 753 stats->n_flows = ovs_flow_tbl_count(&dp->table);
1bd7116f 754 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
ccb1352e
JG
755
756 stats->n_hit = stats->n_missed = stats->n_lost = 0;
1bd7116f 757
ccb1352e
JG
758 for_each_possible_cpu(i) {
759 const struct dp_stats_percpu *percpu_stats;
760 struct dp_stats_percpu local_stats;
761 unsigned int start;
762
763 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
764
765 do {
d120d1a6 766 start = u64_stats_fetch_begin(&percpu_stats->syncp);
ccb1352e 767 local_stats = *percpu_stats;
d120d1a6 768 } while (u64_stats_fetch_retry(&percpu_stats->syncp, start));
ccb1352e
JG
769
770 stats->n_hit += local_stats.n_hit;
771 stats->n_missed += local_stats.n_missed;
772 stats->n_lost += local_stats.n_lost;
1bd7116f 773 mega_stats->n_mask_hit += local_stats.n_mask_hit;
9d2f627b 774 mega_stats->n_cache_hit += local_stats.n_cache_hit;
ccb1352e
JG
775 }
776}
777
74ed7ab9
JS
778static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
779{
780 return ovs_identifier_is_ufid(sfid) &&
781 !(ufid_flags & OVS_UFID_F_OMIT_KEY);
782}
783
784static bool should_fill_mask(uint32_t ufid_flags)
785{
786 return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
787}
788
789static bool should_fill_actions(uint32_t ufid_flags)
c3ff8cfe 790{
74ed7ab9
JS
791 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
792}
793
794static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
795 const struct sw_flow_id *sfid,
796 uint32_t ufid_flags)
797{
798 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
799
4e81c0b3
PA
800 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
801 * see ovs_nla_put_identifier()
802 */
74ed7ab9
JS
803 if (sfid && ovs_identifier_is_ufid(sfid))
804 len += nla_total_size(sfid->ufid_len);
4e81c0b3
PA
805 else
806 len += nla_total_size(ovs_key_attr_size());
74ed7ab9
JS
807
808 /* OVS_FLOW_ATTR_KEY */
809 if (!sfid || should_fill_key(sfid, ufid_flags))
810 len += nla_total_size(ovs_key_attr_size());
811
812 /* OVS_FLOW_ATTR_MASK */
813 if (should_fill_mask(ufid_flags))
814 len += nla_total_size(ovs_key_attr_size());
815
816 /* OVS_FLOW_ATTR_ACTIONS */
817 if (should_fill_actions(ufid_flags))
8e2fed1c 818 len += nla_total_size(acts->orig_len);
74ed7ab9
JS
819
820 return len
66c7a5ee 821 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
c3ff8cfe 822 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
66c7a5ee 823 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
c3ff8cfe
TG
824}
825
ca7105f2
JS
826/* Called with ovs_mutex or RCU read lock. */
827static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
828 struct sk_buff *skb)
829{
830 struct ovs_flow_stats stats;
831 __be16 tcp_flags;
832 unsigned long used;
ccb1352e 833
e298e505 834 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
0e9796b4 835
028d6a67 836 if (used &&
0238b720
ND
837 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
838 OVS_FLOW_ATTR_PAD))
ca7105f2 839 return -EMSGSIZE;
ccb1352e 840
028d6a67 841 if (stats.n_packets &&
66c7a5ee
ND
842 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
843 sizeof(struct ovs_flow_stats), &stats,
844 OVS_FLOW_ATTR_PAD))
ca7105f2 845 return -EMSGSIZE;
ccb1352e 846
e298e505
PS
847 if ((u8)ntohs(tcp_flags) &&
848 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
ca7105f2
JS
849 return -EMSGSIZE;
850
851 return 0;
852}
853
854/* Called with ovs_mutex or RCU read lock. */
855static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
856 struct sk_buff *skb, int skb_orig_len)
857{
858 struct nlattr *start;
859 int err;
ccb1352e
JG
860
861 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
862 * this is the first flow to be dumped into 'skb'. This is unusual for
863 * Netlink but individual action lists can be longer than
864 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
865 * The userspace caller can always fetch the actions separately if it
866 * really wants them. (Most userspace callers in fact don't care.)
867 *
868 * This can only fail for dump operations because the skb is always
869 * properly sized for single flows.
870 */
ae0be8de 871 start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
74f84a57 872 if (start) {
d57170b1
PS
873 const struct sw_flow_actions *sf_acts;
874
663efa36 875 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
e6445719
PS
876 err = ovs_nla_put_actions(sf_acts->actions,
877 sf_acts->actions_len, skb);
0e9796b4 878
74f84a57
PS
879 if (!err)
880 nla_nest_end(skb, start);
881 else {
882 if (skb_orig_len)
ca7105f2 883 return err;
74f84a57
PS
884
885 nla_nest_cancel(skb, start);
886 }
ca7105f2
JS
887 } else if (skb_orig_len) {
888 return -EMSGSIZE;
889 }
890
891 return 0;
892}
893
894/* Called with ovs_mutex or RCU read lock. */
895static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
896 struct sk_buff *skb, u32 portid,
74ed7ab9 897 u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
ca7105f2
JS
898{
899 const int skb_orig_len = skb->len;
900 struct ovs_header *ovs_header;
901 int err;
902
903 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
904 flags, cmd);
905 if (!ovs_header)
906 return -EMSGSIZE;
907
908 ovs_header->dp_ifindex = dp_ifindex;
909
74ed7ab9 910 err = ovs_nla_put_identifier(flow, skb);
5b4237bb
JS
911 if (err)
912 goto error;
913
74ed7ab9
JS
914 if (should_fill_key(&flow->id, ufid_flags)) {
915 err = ovs_nla_put_masked_key(flow, skb);
916 if (err)
917 goto error;
918 }
919
920 if (should_fill_mask(ufid_flags)) {
921 err = ovs_nla_put_mask(flow, skb);
922 if (err)
923 goto error;
924 }
ca7105f2
JS
925
926 err = ovs_flow_cmd_fill_stats(flow, skb);
927 if (err)
928 goto error;
929
74ed7ab9
JS
930 if (should_fill_actions(ufid_flags)) {
931 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
932 if (err)
933 goto error;
934 }
ccb1352e 935
053c095a
JB
936 genlmsg_end(skb, ovs_header);
937 return 0;
ccb1352e 938
ccb1352e
JG
939error:
940 genlmsg_cancel(skb, ovs_header);
941 return err;
942}
943
0e9796b4
JR
944/* May not be called with RCU read lock. */
945static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
74ed7ab9 946 const struct sw_flow_id *sfid,
fb5d1e9e 947 struct genl_info *info,
74ed7ab9
JS
948 bool always,
949 uint32_t ufid_flags)
ccb1352e 950{
fb5d1e9e 951 struct sk_buff *skb;
74ed7ab9 952 size_t len;
ccb1352e 953
9b67aa4a 954 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
fb5d1e9e
JR
955 return NULL;
956
74ed7ab9 957 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
551ddc05 958 skb = genlmsg_new(len, GFP_KERNEL);
fb5d1e9e
JR
959 if (!skb)
960 return ERR_PTR(-ENOMEM);
961
962 return skb;
ccb1352e
JG
963}
964
0e9796b4
JR
965/* Called with ovs_mutex. */
966static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
967 int dp_ifindex,
968 struct genl_info *info, u8 cmd,
74ed7ab9 969 bool always, u32 ufid_flags)
ccb1352e
JG
970{
971 struct sk_buff *skb;
972 int retval;
973
74ed7ab9
JS
974 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
975 &flow->id, info, always, ufid_flags);
d0e992aa 976 if (IS_ERR_OR_NULL(skb))
fb5d1e9e 977 return skb;
ccb1352e 978
0e9796b4
JR
979 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
980 info->snd_portid, info->snd_seq, 0,
74ed7ab9 981 cmd, ufid_flags);
8ffeb03f
PA
982 if (WARN_ON_ONCE(retval < 0)) {
983 kfree_skb(skb);
984 skb = ERR_PTR(retval);
985 }
ccb1352e
JG
986 return skb;
987}
988
37bdc87b 989static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ccb1352e 990{
7f8a436e 991 struct net *net = sock_net(skb->sk);
ccb1352e 992 struct nlattr **a = info->attrs;
bffcc688 993 struct ovs_header *ovs_header = genl_info_userhdr(info);
74ed7ab9 994 struct sw_flow *flow = NULL, *new_flow;
03f0d916 995 struct sw_flow_mask mask;
ccb1352e
JG
996 struct sk_buff *reply;
997 struct datapath *dp;
68bb1010 998 struct sw_flow_key *key;
37bdc87b 999 struct sw_flow_actions *acts;
03f0d916 1000 struct sw_flow_match match;
74ed7ab9 1001 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
ccb1352e 1002 int error;
05da5898 1003 bool log = !a[OVS_FLOW_ATTR_PROBE];
ccb1352e 1004
893f139b 1005 /* Must have key and actions. */
ccb1352e 1006 error = -EINVAL;
426cda5c 1007 if (!a[OVS_FLOW_ATTR_KEY]) {
05da5898 1008 OVS_NLERR(log, "Flow key attr not present in new flow.");
ccb1352e 1009 goto error;
426cda5c
JG
1010 }
1011 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
05da5898 1012 OVS_NLERR(log, "Flow actions attr not present in new flow.");
893f139b 1013 goto error;
426cda5c 1014 }
03f0d916 1015
893f139b
JR
1016 /* Most of the time we need to allocate a new flow, do it before
1017 * locking.
1018 */
1019 new_flow = ovs_flow_alloc();
1020 if (IS_ERR(new_flow)) {
1021 error = PTR_ERR(new_flow);
1022 goto error;
1023 }
1024
1025 /* Extract key. */
68bb1010
EC
1026 key = kzalloc(sizeof(*key), GFP_KERNEL);
1027 if (!key) {
1028 error = -ENOMEM;
0c598aed 1029 goto err_kfree_flow;
68bb1010
EC
1030 }
1031
1032 ovs_match_init(&match, key, false, &mask);
c2ac6673 1033 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
05da5898 1034 a[OVS_FLOW_ATTR_MASK], log);
ccb1352e 1035 if (error)
0c598aed 1036 goto err_kfree_key;
ccb1352e 1037
68bb1010
EC
1038 ovs_flow_mask_key(&new_flow->key, key, true, &mask);
1039
74ed7ab9
JS
1040 /* Extract flow identifier. */
1041 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
68bb1010 1042 key, log);
74ed7ab9 1043 if (error)
0c598aed 1044 goto err_kfree_key;
74f84a57 1045
893f139b 1046 /* Validate actions. */
7f8a436e
JS
1047 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
1048 &new_flow->key, &acts, log);
37bdc87b 1049 if (error) {
05da5898 1050 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
0c598aed 1051 goto err_kfree_key;
893f139b
JR
1052 }
1053
74ed7ab9
JS
1054 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
1055 ufid_flags);
893f139b
JR
1056 if (IS_ERR(reply)) {
1057 error = PTR_ERR(reply);
1058 goto err_kfree_acts;
ccb1352e
JG
1059 }
1060
8e4e1713 1061 ovs_lock();
7f8a436e 1062 dp = get_dp(net, ovs_header->dp_ifindex);
893f139b
JR
1063 if (unlikely(!dp)) {
1064 error = -ENODEV;
8e4e1713 1065 goto err_unlock_ovs;
893f139b 1066 }
74ed7ab9 1067
03f0d916 1068 /* Check if this is a duplicate flow */
74ed7ab9
JS
1069 if (ovs_identifier_is_ufid(&new_flow->id))
1070 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1071 if (!flow)
68bb1010 1072 flow = ovs_flow_tbl_lookup(&dp->table, key);
893f139b
JR
1073 if (likely(!flow)) {
1074 rcu_assign_pointer(new_flow->sf_acts, acts);
ccb1352e
JG
1075
1076 /* Put flow in bucket. */
893f139b
JR
1077 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1078 if (unlikely(error)) {
618ed0c8 1079 acts = NULL;
893f139b
JR
1080 goto err_unlock_ovs;
1081 }
1082
1083 if (unlikely(reply)) {
1084 error = ovs_flow_cmd_fill_info(new_flow,
1085 ovs_header->dp_ifindex,
1086 reply, info->snd_portid,
1087 info->snd_seq, 0,
74ed7ab9
JS
1088 OVS_FLOW_CMD_NEW,
1089 ufid_flags);
893f139b 1090 BUG_ON(error < 0);
618ed0c8 1091 }
893f139b 1092 ovs_unlock();
ccb1352e 1093 } else {
37bdc87b
JR
1094 struct sw_flow_actions *old_acts;
1095
ccb1352e
JG
1096 /* Bail out if we're not allowed to modify an existing flow.
1097 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1098 * because Generic Netlink treats the latter as a dump
1099 * request. We also accept NLM_F_EXCL in case that bug ever
1100 * gets fixed.
1101 */
893f139b
JR
1102 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1103 | NLM_F_EXCL))) {
1104 error = -EEXIST;
8e4e1713 1105 goto err_unlock_ovs;
893f139b 1106 }
74ed7ab9
JS
1107 /* The flow identifier has to be the same for flow updates.
1108 * Look for any overlapping flow.
1109 */
1110 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1111 if (ovs_identifier_is_key(&flow->id))
1112 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1113 &match);
1114 else /* UFID matches but key is different */
1115 flow = NULL;
4a46b24e
AW
1116 if (!flow) {
1117 error = -ENOENT;
1118 goto err_unlock_ovs;
1119 }
893f139b 1120 }
37bdc87b
JR
1121 /* Update actions. */
1122 old_acts = ovsl_dereference(flow->sf_acts);
1123 rcu_assign_pointer(flow->sf_acts, acts);
37bdc87b 1124
893f139b
JR
1125 if (unlikely(reply)) {
1126 error = ovs_flow_cmd_fill_info(flow,
1127 ovs_header->dp_ifindex,
1128 reply, info->snd_portid,
1129 info->snd_seq, 0,
74ed7ab9
JS
1130 OVS_FLOW_CMD_NEW,
1131 ufid_flags);
893f139b
JR
1132 BUG_ON(error < 0);
1133 }
1134 ovs_unlock();
37bdc87b 1135
34ae932a 1136 ovs_nla_free_flow_actions_rcu(old_acts);
893f139b 1137 ovs_flow_free(new_flow, false);
37bdc87b 1138 }
893f139b
JR
1139
1140 if (reply)
1141 ovs_notify(&dp_flow_genl_family, reply, info);
68bb1010
EC
1142
1143 kfree(key);
37bdc87b
JR
1144 return 0;
1145
37bdc87b
JR
1146err_unlock_ovs:
1147 ovs_unlock();
893f139b
JR
1148 kfree_skb(reply);
1149err_kfree_acts:
34ae932a 1150 ovs_nla_free_flow_actions(acts);
68bb1010
EC
1151err_kfree_key:
1152 kfree(key);
0c598aed
FP
1153err_kfree_flow:
1154 ovs_flow_free(new_flow, false);
37bdc87b
JR
1155error:
1156 return error;
1157}
ccb1352e 1158
2fdb957d 1159/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
cf3266ad
TZ
1160static noinline_for_stack
1161struct sw_flow_actions *get_flow_actions(struct net *net,
1162 const struct nlattr *a,
1163 const struct sw_flow_key *key,
1164 const struct sw_flow_mask *mask,
1165 bool log)
6b205b2c
JG
1166{
1167 struct sw_flow_actions *acts;
1168 struct sw_flow_key masked_key;
1169 int error;
1170
ae5f2fb1 1171 ovs_flow_mask_key(&masked_key, key, true, mask);
7f8a436e 1172 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
6b205b2c 1173 if (error) {
05da5898
JR
1174 OVS_NLERR(log,
1175 "Actions may not be safe on all matching packets");
6b205b2c
JG
1176 return ERR_PTR(error);
1177 }
1178
1179 return acts;
1180}
1181
9cc9a5cb
TZ
1182/* Factor out match-init and action-copy to avoid
1183 * "Wframe-larger-than=1024" warning. Because mask is only
1184 * used to get actions, we new a function to save some
1185 * stack space.
1186 *
1187 * If there are not key and action attrs, we return 0
1188 * directly. In the case, the caller will also not use the
1189 * match as before. If there is action attr, we try to get
1190 * actions and save them to *acts. Before returning from
1191 * the function, we reset the match->mask pointer. Because
1192 * we should not to return match object with dangling reference
1193 * to mask.
1194 * */
26063790
AB
1195static noinline_for_stack int
1196ovs_nla_init_match_and_action(struct net *net,
1197 struct sw_flow_match *match,
1198 struct sw_flow_key *key,
1199 struct nlattr **a,
1200 struct sw_flow_actions **acts,
1201 bool log)
9cc9a5cb
TZ
1202{
1203 struct sw_flow_mask mask;
1204 int error = 0;
1205
1206 if (a[OVS_FLOW_ATTR_KEY]) {
1207 ovs_match_init(match, key, true, &mask);
1208 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1209 a[OVS_FLOW_ATTR_MASK], log);
1210 if (error)
1211 goto error;
1212 }
1213
1214 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1215 if (!a[OVS_FLOW_ATTR_KEY]) {
1216 OVS_NLERR(log,
1217 "Flow key attribute not present in set flow.");
5829e62a
CJ
1218 error = -EINVAL;
1219 goto error;
9cc9a5cb
TZ
1220 }
1221
1222 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1223 &mask, log);
1224 if (IS_ERR(*acts)) {
1225 error = PTR_ERR(*acts);
1226 goto error;
1227 }
1228 }
1229
1230 /* On success, error is 0. */
1231error:
1232 match->mask = NULL;
1233 return error;
1234}
1235
37bdc87b
JR
1236static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1237{
7f8a436e 1238 struct net *net = sock_net(skb->sk);
37bdc87b 1239 struct nlattr **a = info->attrs;
bffcc688 1240 struct ovs_header *ovs_header = genl_info_userhdr(info);
6b205b2c 1241 struct sw_flow_key key;
37bdc87b 1242 struct sw_flow *flow;
37bdc87b
JR
1243 struct sk_buff *reply = NULL;
1244 struct datapath *dp;
893f139b 1245 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
37bdc87b 1246 struct sw_flow_match match;
74ed7ab9
JS
1247 struct sw_flow_id sfid;
1248 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
6f15cdbf 1249 int error = 0;
05da5898 1250 bool log = !a[OVS_FLOW_ATTR_PROBE];
74ed7ab9 1251 bool ufid_present;
37bdc87b 1252
74ed7ab9 1253 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
9cc9a5cb 1254 if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
6f15cdbf
SG
1255 OVS_NLERR(log,
1256 "Flow set message rejected, Key attribute missing.");
9cc9a5cb 1257 return -EINVAL;
6f15cdbf 1258 }
9cc9a5cb
TZ
1259
1260 error = ovs_nla_init_match_and_action(net, &match, &key, a,
1261 &acts, log);
37bdc87b
JR
1262 if (error)
1263 goto error;
1264
9cc9a5cb 1265 if (acts) {
2fdb957d 1266 /* Can allocate before locking if have acts. */
74ed7ab9
JS
1267 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1268 ufid_flags);
893f139b
JR
1269 if (IS_ERR(reply)) {
1270 error = PTR_ERR(reply);
1271 goto err_kfree_acts;
be52c9e9 1272 }
37bdc87b 1273 }
0e9796b4 1274
37bdc87b 1275 ovs_lock();
7f8a436e 1276 dp = get_dp(net, ovs_header->dp_ifindex);
893f139b
JR
1277 if (unlikely(!dp)) {
1278 error = -ENODEV;
37bdc87b 1279 goto err_unlock_ovs;
893f139b 1280 }
37bdc87b 1281 /* Check that the flow exists. */
74ed7ab9
JS
1282 if (ufid_present)
1283 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1284 else
1285 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
893f139b
JR
1286 if (unlikely(!flow)) {
1287 error = -ENOENT;
37bdc87b 1288 goto err_unlock_ovs;
893f139b 1289 }
4a46b24e 1290
37bdc87b 1291 /* Update actions, if present. */
893f139b 1292 if (likely(acts)) {
37bdc87b
JR
1293 old_acts = ovsl_dereference(flow->sf_acts);
1294 rcu_assign_pointer(flow->sf_acts, acts);
893f139b
JR
1295
1296 if (unlikely(reply)) {
1297 error = ovs_flow_cmd_fill_info(flow,
1298 ovs_header->dp_ifindex,
1299 reply, info->snd_portid,
1300 info->snd_seq, 0,
804fe108 1301 OVS_FLOW_CMD_SET,
74ed7ab9 1302 ufid_flags);
893f139b
JR
1303 BUG_ON(error < 0);
1304 }
1305 } else {
1306 /* Could not alloc without acts before locking. */
1307 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
804fe108 1308 info, OVS_FLOW_CMD_SET, false,
74ed7ab9
JS
1309 ufid_flags);
1310
b5ffe634 1311 if (IS_ERR(reply)) {
893f139b
JR
1312 error = PTR_ERR(reply);
1313 goto err_unlock_ovs;
1314 }
ccb1352e 1315 }
37bdc87b 1316
37bdc87b
JR
1317 /* Clear stats. */
1318 if (a[OVS_FLOW_ATTR_CLEAR])
1319 ovs_flow_stats_clear(flow);
8e4e1713 1320 ovs_unlock();
ccb1352e 1321
893f139b
JR
1322 if (reply)
1323 ovs_notify(&dp_flow_genl_family, reply, info);
1324 if (old_acts)
34ae932a 1325 ovs_nla_free_flow_actions_rcu(old_acts);
fb5d1e9e 1326
ccb1352e
JG
1327 return 0;
1328
8e4e1713
PS
1329err_unlock_ovs:
1330 ovs_unlock();
893f139b
JR
1331 kfree_skb(reply);
1332err_kfree_acts:
34ae932a 1333 ovs_nla_free_flow_actions(acts);
ccb1352e
JG
1334error:
1335 return error;
1336}
1337
1338static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1339{
1340 struct nlattr **a = info->attrs;
bffcc688 1341 struct ovs_header *ovs_header = genl_info_userhdr(info);
c2ac6673 1342 struct net *net = sock_net(skb->sk);
ccb1352e
JG
1343 struct sw_flow_key key;
1344 struct sk_buff *reply;
1345 struct sw_flow *flow;
1346 struct datapath *dp;
03f0d916 1347 struct sw_flow_match match;
74ed7ab9
JS
1348 struct sw_flow_id ufid;
1349 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1350 int err = 0;
05da5898 1351 bool log = !a[OVS_FLOW_ATTR_PROBE];
74ed7ab9 1352 bool ufid_present;
ccb1352e 1353
74ed7ab9
JS
1354 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1355 if (a[OVS_FLOW_ATTR_KEY]) {
2279994d 1356 ovs_match_init(&match, &key, true, NULL);
c2ac6673 1357 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
74ed7ab9
JS
1358 log);
1359 } else if (!ufid_present) {
05da5898
JR
1360 OVS_NLERR(log,
1361 "Flow get message rejected, Key attribute missing.");
74ed7ab9 1362 err = -EINVAL;
03f0d916 1363 }
ccb1352e
JG
1364 if (err)
1365 return err;
1366
8e4e1713 1367 ovs_lock();
46df7b81 1368 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
8e4e1713
PS
1369 if (!dp) {
1370 err = -ENODEV;
1371 goto unlock;
1372 }
ccb1352e 1373
74ed7ab9
JS
1374 if (ufid_present)
1375 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1376 else
1377 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
4a46b24e 1378 if (!flow) {
8e4e1713
PS
1379 err = -ENOENT;
1380 goto unlock;
1381 }
ccb1352e 1382
0e9796b4 1383 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
804fe108 1384 OVS_FLOW_CMD_GET, true, ufid_flags);
8e4e1713
PS
1385 if (IS_ERR(reply)) {
1386 err = PTR_ERR(reply);
1387 goto unlock;
1388 }
ccb1352e 1389
8e4e1713 1390 ovs_unlock();
ccb1352e 1391 return genlmsg_reply(reply, info);
8e4e1713
PS
1392unlock:
1393 ovs_unlock();
1394 return err;
ccb1352e
JG
1395}
1396
1397static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1398{
1399 struct nlattr **a = info->attrs;
bffcc688 1400 struct ovs_header *ovs_header = genl_info_userhdr(info);
c2ac6673 1401 struct net *net = sock_net(skb->sk);
ccb1352e
JG
1402 struct sw_flow_key key;
1403 struct sk_buff *reply;
74ed7ab9 1404 struct sw_flow *flow = NULL;
ccb1352e 1405 struct datapath *dp;
03f0d916 1406 struct sw_flow_match match;
74ed7ab9
JS
1407 struct sw_flow_id ufid;
1408 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
ccb1352e 1409 int err;
05da5898 1410 bool log = !a[OVS_FLOW_ATTR_PROBE];
74ed7ab9 1411 bool ufid_present;
ccb1352e 1412
74ed7ab9
JS
1413 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1414 if (a[OVS_FLOW_ATTR_KEY]) {
2279994d 1415 ovs_match_init(&match, &key, true, NULL);
c2ac6673
JS
1416 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1417 NULL, log);
aed06778
JR
1418 if (unlikely(err))
1419 return err;
1420 }
1421
8e4e1713 1422 ovs_lock();
46df7b81 1423 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
aed06778 1424 if (unlikely(!dp)) {
8e4e1713
PS
1425 err = -ENODEV;
1426 goto unlock;
1427 }
46df7b81 1428
74ed7ab9 1429 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
b637e498 1430 err = ovs_flow_tbl_flush(&dp->table);
8e4e1713
PS
1431 goto unlock;
1432 }
03f0d916 1433
74ed7ab9
JS
1434 if (ufid_present)
1435 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1436 else
1437 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
4a46b24e 1438 if (unlikely(!flow)) {
8e4e1713
PS
1439 err = -ENOENT;
1440 goto unlock;
1441 }
ccb1352e 1442
b637e498 1443 ovs_flow_tbl_remove(&dp->table, flow);
aed06778 1444 ovs_unlock();
ccb1352e 1445
aed06778 1446 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
74ed7ab9 1447 &flow->id, info, false, ufid_flags);
aed06778 1448 if (likely(reply)) {
b90f5aa4 1449 if (!IS_ERR(reply)) {
aed06778
JR
1450 rcu_read_lock(); /*To keep RCU checker happy. */
1451 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1452 reply, info->snd_portid,
1453 info->snd_seq, 0,
74ed7ab9
JS
1454 OVS_FLOW_CMD_DEL,
1455 ufid_flags);
aed06778 1456 rcu_read_unlock();
8a574f86
PA
1457 if (WARN_ON_ONCE(err < 0)) {
1458 kfree_skb(reply);
1459 goto out_free;
1460 }
aed06778
JR
1461
1462 ovs_notify(&dp_flow_genl_family, reply, info);
1463 } else {
cf3266ad
TZ
1464 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1465 PTR_ERR(reply));
aed06778 1466 }
fb5d1e9e 1467 }
ccb1352e 1468
8a574f86 1469out_free:
aed06778 1470 ovs_flow_free(flow, true);
ccb1352e 1471 return 0;
8e4e1713
PS
1472unlock:
1473 ovs_unlock();
1474 return err;
ccb1352e
JG
1475}
1476
1477static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1478{
74ed7ab9 1479 struct nlattr *a[__OVS_FLOW_ATTR_MAX];
ccb1352e 1480 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
b637e498 1481 struct table_instance *ti;
ccb1352e 1482 struct datapath *dp;
74ed7ab9
JS
1483 u32 ufid_flags;
1484 int err;
1485
8cb08174
JB
1486 err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1487 OVS_FLOW_ATTR_MAX, flow_policy, NULL);
74ed7ab9
JS
1488 if (err)
1489 return err;
1490 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
ccb1352e 1491
d57170b1 1492 rcu_read_lock();
cc3a5ae6 1493 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
8e4e1713 1494 if (!dp) {
d57170b1 1495 rcu_read_unlock();
ccb1352e 1496 return -ENODEV;
8e4e1713 1497 }
ccb1352e 1498
b637e498 1499 ti = rcu_dereference(dp->table.ti);
ccb1352e
JG
1500 for (;;) {
1501 struct sw_flow *flow;
1502 u32 bucket, obj;
1503
1504 bucket = cb->args[0];
1505 obj = cb->args[1];
b637e498 1506 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
ccb1352e
JG
1507 if (!flow)
1508 break;
1509
0e9796b4 1510 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
15e47304 1511 NETLINK_CB(cb->skb).portid,
ccb1352e 1512 cb->nlh->nlmsg_seq, NLM_F_MULTI,
804fe108 1513 OVS_FLOW_CMD_GET, ufid_flags) < 0)
ccb1352e
JG
1514 break;
1515
1516 cb->args[0] = bucket;
1517 cb->args[1] = obj;
1518 }
d57170b1 1519 rcu_read_unlock();
ccb1352e
JG
1520 return skb->len;
1521}
1522
0c200ef9
PS
1523static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1524 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
05da5898 1525 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
0c200ef9
PS
1526 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1527 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
05da5898 1528 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
74ed7ab9
JS
1529 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1530 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
0c200ef9
PS
1531};
1532
66a9b928 1533static const struct genl_small_ops dp_flow_genl_ops[] = {
ccb1352e 1534 { .cmd = OVS_FLOW_CMD_NEW,
ef6243ac 1535 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 1536 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
37bdc87b 1537 .doit = ovs_flow_cmd_new
ccb1352e
JG
1538 },
1539 { .cmd = OVS_FLOW_CMD_DEL,
ef6243ac 1540 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 1541 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
1542 .doit = ovs_flow_cmd_del
1543 },
1544 { .cmd = OVS_FLOW_CMD_GET,
ef6243ac 1545 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
ccb1352e 1546 .flags = 0, /* OK for unprivileged users. */
ccb1352e
JG
1547 .doit = ovs_flow_cmd_get,
1548 .dumpit = ovs_flow_cmd_dump
1549 },
1550 { .cmd = OVS_FLOW_CMD_SET,
ef6243ac 1551 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 1552 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
37bdc87b 1553 .doit = ovs_flow_cmd_set,
ccb1352e
JG
1554 },
1555};
1556
56989f6d 1557static struct genl_family dp_flow_genl_family __ro_after_init = {
ccb1352e 1558 .hdrsize = sizeof(struct ovs_header),
0c200ef9
PS
1559 .name = OVS_FLOW_FAMILY,
1560 .version = OVS_FLOW_VERSION,
1561 .maxattr = OVS_FLOW_ATTR_MAX,
3b0f31f2 1562 .policy = flow_policy,
3a4e0d6a
PS
1563 .netnsok = true,
1564 .parallel_ops = true,
66a9b928
JK
1565 .small_ops = dp_flow_genl_ops,
1566 .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
9c5d03d3 1567 .resv_start_op = OVS_FLOW_CMD_SET + 1,
0c200ef9
PS
1568 .mcgrps = &ovs_dp_flow_multicast_group,
1569 .n_mcgrps = 1,
489111e5 1570 .module = THIS_MODULE,
ccb1352e
JG
1571};
1572
c3ff8cfe
TG
1573static size_t ovs_dp_cmd_msg_size(void)
1574{
1575 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1576
1577 msgsize += nla_total_size(IFNAMSIZ);
66c7a5ee
ND
1578 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1579 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
45fb9c35 1580 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
9bf24f59 1581 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
347541e2 1582 msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
c3ff8cfe
TG
1583
1584 return msgsize;
1585}
1586
8ec609d8 1587/* Called with ovs_mutex. */
ccb1352e 1588static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
15e47304 1589 u32 portid, u32 seq, u32 flags, u8 cmd)
ccb1352e
JG
1590{
1591 struct ovs_header *ovs_header;
1592 struct ovs_dp_stats dp_stats;
1bd7116f 1593 struct ovs_dp_megaflow_stats dp_megaflow_stats;
347541e2
AZ
1594 struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
1595 int err, pids_len;
ccb1352e 1596
15e47304 1597 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
cf3266ad 1598 flags, cmd);
ccb1352e
JG
1599 if (!ovs_header)
1600 goto error;
1601
1602 ovs_header->dp_ifindex = get_dpifindex(dp);
1603
ccb1352e 1604 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
ccb1352e
JG
1605 if (err)
1606 goto nla_put_failure;
1607
1bd7116f 1608 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
66c7a5ee
ND
1609 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1610 &dp_stats, OVS_DP_ATTR_PAD))
1bd7116f
AZ
1611 goto nla_put_failure;
1612
66c7a5ee
ND
1613 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1614 sizeof(struct ovs_dp_megaflow_stats),
1615 &dp_megaflow_stats, OVS_DP_ATTR_PAD))
028d6a67 1616 goto nla_put_failure;
ccb1352e 1617
43d4be9c
TG
1618 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1619 goto nla_put_failure;
1620
9bf24f59
EC
1621 if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1622 ovs_flow_tbl_masks_cache_size(&dp->table)))
1623 goto nla_put_failure;
1624
347541e2
AZ
1625 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
1626 pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
1627 if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
1628 goto nla_put_failure;
1629 }
1630
053c095a
JB
1631 genlmsg_end(skb, ovs_header);
1632 return 0;
ccb1352e
JG
1633
1634nla_put_failure:
1635 genlmsg_cancel(skb, ovs_header);
1636error:
1637 return -EMSGSIZE;
1638}
1639
263ea090 1640static struct sk_buff *ovs_dp_cmd_alloc_info(void)
ccb1352e 1641{
551ddc05 1642 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
ccb1352e
JG
1643}
1644
bb6f9a70 1645/* Called with rcu_read_lock or ovs_mutex. */
46df7b81 1646static struct datapath *lookup_datapath(struct net *net,
12eb18f7 1647 const struct ovs_header *ovs_header,
ccb1352e
JG
1648 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1649{
1650 struct datapath *dp;
1651
1652 if (!a[OVS_DP_ATTR_NAME])
46df7b81 1653 dp = get_dp(net, ovs_header->dp_ifindex);
ccb1352e
JG
1654 else {
1655 struct vport *vport;
1656
46df7b81 1657 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
ccb1352e 1658 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
ccb1352e
JG
1659 }
1660 return dp ? dp : ERR_PTR(-ENODEV);
1661}
1662
cf3266ad
TZ
1663static void ovs_dp_reset_user_features(struct sk_buff *skb,
1664 struct genl_info *info)
44da5ae5
TG
1665{
1666 struct datapath *dp;
1667
bffcc688 1668 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
cf3266ad 1669 info->attrs);
3c7eacfc 1670 if (IS_ERR(dp))
44da5ae5
TG
1671 return;
1672
fd954cc1
AC
1673 pr_warn("%s: Dropping previously announced user features\n",
1674 ovs_dp_name(dp));
44da5ae5
TG
1675 dp->user_features = 0;
1676}
1677
b83d23a2
MG
1678static int ovs_dp_set_upcall_portids(struct datapath *dp,
1679 const struct nlattr *ids)
1680{
1681 struct dp_nlsk_pids *old, *dp_nlsk_pids;
1682
1683 if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1684 return -EINVAL;
1685
1686 old = ovsl_dereference(dp->upcall_portids);
1687
1688 dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1689 GFP_KERNEL);
1690 if (!dp_nlsk_pids)
1691 return -ENOMEM;
1692
1693 dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1694 nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1695
1696 rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1697
1698 kfree_rcu(old, rcu);
1699
1700 return 0;
1701}
1702
1703u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1704{
1705 struct dp_nlsk_pids *dp_nlsk_pids;
1706
1707 dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1708
1709 if (dp_nlsk_pids) {
1710 if (cpu_id < dp_nlsk_pids->n_pids) {
1711 return dp_nlsk_pids->pids[cpu_id];
784dcfa5
MG
1712 } else if (dp_nlsk_pids->n_pids > 0 &&
1713 cpu_id >= dp_nlsk_pids->n_pids) {
1714 /* If the number of netlink PIDs is mismatched with
1715 * the number of CPUs as seen by the kernel, log this
1716 * and send the upcall to an arbitrary socket (0) in
1717 * order to not drop packets
b83d23a2
MG
1718 */
1719 pr_info_ratelimited("cpu_id mismatch with handler threads");
784dcfa5
MG
1720 return dp_nlsk_pids->pids[cpu_id %
1721 dp_nlsk_pids->n_pids];
b83d23a2
MG
1722 } else {
1723 return 0;
1724 }
1725 } else {
1726 return 0;
1727 }
1728}
1729
95a7233c 1730static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
43d4be9c 1731{
35d39fec 1732 u32 user_features = 0, old_features = dp->user_features;
b83d23a2 1733 int err;
95a7233c
PB
1734
1735 if (a[OVS_DP_ATTR_USER_FEATURES]) {
1736 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1737
1738 if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1739 OVS_DP_F_UNALIGNED |
b83d23a2
MG
1740 OVS_DP_F_TC_RECIRC_SHARING |
1741 OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
95a7233c
PB
1742 return -EOPNOTSUPP;
1743
1744#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1745 if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1746 return -EOPNOTSUPP;
1747#endif
1748 }
1749
9bf24f59
EC
1750 if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1751 int err;
1752 u32 cache_size;
1753
1754 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1755 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1756 if (err)
1757 return err;
1758 }
1759
95a7233c
PB
1760 dp->user_features = user_features;
1761
b83d23a2
MG
1762 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1763 a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1764 /* Upcall Netlink Port IDs have been updated */
1765 err = ovs_dp_set_upcall_portids(dp,
1766 a[OVS_DP_ATTR_PER_CPU_PIDS]);
1767 if (err)
1768 return err;
1769 }
1770
35d39fec
PB
1771 if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1772 !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1773 tc_skb_ext_tc_enable();
1774 else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1775 (old_features & OVS_DP_F_TC_RECIRC_SHARING))
1776 tc_skb_ext_tc_disable();
95a7233c
PB
1777
1778 return 0;
43d4be9c
TG
1779}
1780
eec62ead
TZ
1781static int ovs_dp_stats_init(struct datapath *dp)
1782{
1783 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1784 if (!dp->stats_percpu)
1785 return -ENOMEM;
1786
1787 return 0;
1788}
1789
1790static int ovs_dp_vport_init(struct datapath *dp)
1791{
1792 int i;
1793
1794 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1795 sizeof(struct hlist_head),
1796 GFP_KERNEL);
1797 if (!dp->ports)
1798 return -ENOMEM;
1799
1800 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1801 INIT_HLIST_HEAD(&dp->ports[i]);
1802
1803 return 0;
1804}
1805
ccb1352e
JG
1806static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1807{
1808 struct nlattr **a = info->attrs;
1809 struct vport_parms parms;
1810 struct sk_buff *reply;
1811 struct datapath *dp;
1812 struct vport *vport;
46df7b81 1813 struct ovs_net *ovs_net;
eec62ead 1814 int err;
ccb1352e
JG
1815
1816 err = -EINVAL;
1817 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1818 goto err;
1819
263ea090 1820 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
1821 if (!reply)
1822 return -ENOMEM;
ccb1352e
JG
1823
1824 err = -ENOMEM;
1825 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1826 if (dp == NULL)
eec62ead 1827 goto err_destroy_reply;
46df7b81 1828
efd7ef1c 1829 ovs_dp_set_net(dp, sock_net(skb->sk));
ccb1352e
JG
1830
1831 /* Allocate table. */
b637e498
PS
1832 err = ovs_flow_tbl_init(&dp->table);
1833 if (err)
eec62ead 1834 goto err_destroy_dp;
ccb1352e 1835
eec62ead
TZ
1836 err = ovs_dp_stats_init(dp);
1837 if (err)
ccb1352e 1838 goto err_destroy_table;
ccb1352e 1839
eec62ead
TZ
1840 err = ovs_dp_vport_init(dp);
1841 if (err)
1842 goto err_destroy_stats;
15eac2a7 1843
96fbc13d
AZ
1844 err = ovs_meters_init(dp);
1845 if (err)
eec62ead 1846 goto err_destroy_ports;
96fbc13d 1847
ccb1352e
JG
1848 /* Set up our datapath device. */
1849 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1850 parms.type = OVS_VPORT_TYPE_INTERNAL;
1851 parms.options = NULL;
1852 parms.dp = dp;
1853 parms.port_no = OVSP_LOCAL;
5cd667b0 1854 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
a885a6b2 1855 parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
ccb1352e 1856
6093ae9a
JR
1857 /* So far only local changes have been made, now need the lock. */
1858 ovs_lock();
1859
fea07a48
EC
1860 err = ovs_dp_change(dp, a);
1861 if (err)
1862 goto err_unlock_and_destroy_meters;
1863
ccb1352e
JG
1864 vport = new_vport(&parms);
1865 if (IS_ERR(vport)) {
1866 err = PTR_ERR(vport);
1867 if (err == -EBUSY)
1868 err = -EEXIST;
1869
44da5ae5
TG
1870 if (err == -EEXIST) {
1871 /* An outdated user space instance that does not understand
1872 * the concept of user_features has attempted to create a new
1873 * datapath and is likely to reuse it. Drop all user features.
1874 */
1875 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1876 ovs_dp_reset_user_features(skb, info);
1877 }
1878
a87406f4 1879 goto err_destroy_portids;
ccb1352e
JG
1880 }
1881
6093ae9a
JR
1882 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1883 info->snd_seq, 0, OVS_DP_CMD_NEW);
1884 BUG_ON(err < 0);
ccb1352e 1885
46df7b81 1886 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
59a35d60 1887 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
8e4e1713
PS
1888
1889 ovs_unlock();
ccb1352e 1890
2a94fe48 1891 ovs_notify(&dp_datapath_genl_family, reply, info);
ccb1352e
JG
1892 return 0;
1893
a87406f4
AZ
1894err_destroy_portids:
1895 kfree(rcu_dereference_raw(dp->upcall_portids));
fea07a48
EC
1896err_unlock_and_destroy_meters:
1897 ovs_unlock();
96fbc13d 1898 ovs_meters_exit(dp);
eec62ead 1899err_destroy_ports:
15eac2a7 1900 kfree(dp->ports);
eec62ead 1901err_destroy_stats:
ccb1352e
JG
1902 free_percpu(dp->stats_percpu);
1903err_destroy_table:
9b996e54 1904 ovs_flow_tbl_destroy(&dp->table);
eec62ead 1905err_destroy_dp:
ccb1352e 1906 kfree(dp);
eec62ead 1907err_destroy_reply:
6093ae9a 1908 kfree_skb(reply);
ccb1352e
JG
1909err:
1910 return err;
1911}
1912
8e4e1713 1913/* Called with ovs_mutex. */
46df7b81 1914static void __dp_destroy(struct datapath *dp)
ccb1352e 1915{
1f3a090b 1916 struct flow_table *table = &dp->table;
15eac2a7 1917 int i;
ccb1352e 1918
35d39fec
PB
1919 if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1920 tc_skb_ext_tc_disable();
1921
15eac2a7
PS
1922 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1923 struct vport *vport;
b67bfe0d 1924 struct hlist_node *n;
15eac2a7 1925
b67bfe0d 1926 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
15eac2a7
PS
1927 if (vport->port_no != OVSP_LOCAL)
1928 ovs_dp_detach_port(vport);
1929 }
ccb1352e 1930
59a35d60 1931 list_del_rcu(&dp->list_node);
ccb1352e 1932
8e4e1713 1933 /* OVSP_LOCAL is datapath internal port. We need to make sure that
e80857cc 1934 * all ports in datapath are destroyed first before freeing datapath.
ccb1352e 1935 */
8e4e1713 1936 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
ccb1352e 1937
1f3a090b
TZ
1938 /* Flush sw_flow in the tables. RCU cb only releases resource
1939 * such as dp, ports and tables. That may avoid some issues
1940 * such as RCU usage warning.
1941 */
1942 table_instance_flow_flush(table, ovsl_dereference(table->ti),
1943 ovsl_dereference(table->ufid_ti));
1944
1945 /* RCU destroy the ports, meters and flow tables. */
ccb1352e 1946 call_rcu(&dp->rcu, destroy_dp_rcu);
46df7b81
PS
1947}
1948
1949static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1950{
1951 struct sk_buff *reply;
1952 struct datapath *dp;
1953 int err;
1954
263ea090 1955 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
1956 if (!reply)
1957 return -ENOMEM;
1958
8e4e1713 1959 ovs_lock();
bffcc688
JK
1960 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1961 info->attrs);
46df7b81
PS
1962 err = PTR_ERR(dp);
1963 if (IS_ERR(dp))
6093ae9a 1964 goto err_unlock_free;
46df7b81 1965
6093ae9a
JR
1966 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1967 info->snd_seq, 0, OVS_DP_CMD_DEL);
1968 BUG_ON(err < 0);
46df7b81
PS
1969
1970 __dp_destroy(dp);
8e4e1713 1971 ovs_unlock();
ccb1352e 1972
2a94fe48 1973 ovs_notify(&dp_datapath_genl_family, reply, info);
ccb1352e
JG
1974
1975 return 0;
6093ae9a
JR
1976
1977err_unlock_free:
8e4e1713 1978 ovs_unlock();
6093ae9a 1979 kfree_skb(reply);
8e4e1713 1980 return err;
ccb1352e
JG
1981}
1982
1983static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1984{
1985 struct sk_buff *reply;
1986 struct datapath *dp;
1987 int err;
1988
263ea090 1989 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
1990 if (!reply)
1991 return -ENOMEM;
1992
8e4e1713 1993 ovs_lock();
bffcc688
JK
1994 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1995 info->attrs);
8e4e1713 1996 err = PTR_ERR(dp);
ccb1352e 1997 if (IS_ERR(dp))
6093ae9a 1998 goto err_unlock_free;
ccb1352e 1999
95a7233c
PB
2000 err = ovs_dp_change(dp, info->attrs);
2001 if (err)
2002 goto err_unlock_free;
43d4be9c 2003
6093ae9a 2004 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
804fe108 2005 info->snd_seq, 0, OVS_DP_CMD_SET);
6093ae9a 2006 BUG_ON(err < 0);
ccb1352e 2007
8e4e1713 2008 ovs_unlock();
2a94fe48 2009 ovs_notify(&dp_datapath_genl_family, reply, info);
ccb1352e
JG
2010
2011 return 0;
6093ae9a
JR
2012
2013err_unlock_free:
8e4e1713 2014 ovs_unlock();
6093ae9a 2015 kfree_skb(reply);
8e4e1713 2016 return err;
ccb1352e
JG
2017}
2018
2019static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
2020{
2021 struct sk_buff *reply;
2022 struct datapath *dp;
8e4e1713 2023 int err;
ccb1352e 2024
263ea090 2025 reply = ovs_dp_cmd_alloc_info();
6093ae9a
JR
2026 if (!reply)
2027 return -ENOMEM;
2028
8ec609d8 2029 ovs_lock();
bffcc688
JK
2030 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
2031 info->attrs);
8e4e1713
PS
2032 if (IS_ERR(dp)) {
2033 err = PTR_ERR(dp);
6093ae9a 2034 goto err_unlock_free;
8e4e1713 2035 }
6093ae9a 2036 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
804fe108 2037 info->snd_seq, 0, OVS_DP_CMD_GET);
6093ae9a 2038 BUG_ON(err < 0);
8ec609d8 2039 ovs_unlock();
ccb1352e
JG
2040
2041 return genlmsg_reply(reply, info);
8e4e1713 2042
6093ae9a 2043err_unlock_free:
8ec609d8 2044 ovs_unlock();
6093ae9a 2045 kfree_skb(reply);
8e4e1713 2046 return err;
ccb1352e
JG
2047}
2048
2049static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2050{
46df7b81 2051 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
ccb1352e
JG
2052 struct datapath *dp;
2053 int skip = cb->args[0];
2054 int i = 0;
2055
8ec609d8
PS
2056 ovs_lock();
2057 list_for_each_entry(dp, &ovs_net->dps, list_node) {
77676fdb 2058 if (i >= skip &&
15e47304 2059 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
ccb1352e 2060 cb->nlh->nlmsg_seq, NLM_F_MULTI,
804fe108 2061 OVS_DP_CMD_GET) < 0)
ccb1352e
JG
2062 break;
2063 i++;
2064 }
8ec609d8 2065 ovs_unlock();
ccb1352e
JG
2066
2067 cb->args[0] = i;
2068
2069 return skb->len;
2070}
2071
0c200ef9
PS
2072static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
2073 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2074 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2075 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
9bf24f59
EC
2076 [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
2077 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
a552bfa1 2078 [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
0c200ef9
PS
2079};
2080
66a9b928 2081static const struct genl_small_ops dp_datapath_genl_ops[] = {
ccb1352e 2082 { .cmd = OVS_DP_CMD_NEW,
ef6243ac 2083 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2084 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2085 .doit = ovs_dp_cmd_new
2086 },
2087 { .cmd = OVS_DP_CMD_DEL,
ef6243ac 2088 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2089 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2090 .doit = ovs_dp_cmd_del
2091 },
2092 { .cmd = OVS_DP_CMD_GET,
ef6243ac 2093 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
ccb1352e 2094 .flags = 0, /* OK for unprivileged users. */
ccb1352e
JG
2095 .doit = ovs_dp_cmd_get,
2096 .dumpit = ovs_dp_cmd_dump
2097 },
2098 { .cmd = OVS_DP_CMD_SET,
ef6243ac 2099 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2100 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2101 .doit = ovs_dp_cmd_set,
2102 },
2103};
2104
56989f6d 2105static struct genl_family dp_datapath_genl_family __ro_after_init = {
ccb1352e 2106 .hdrsize = sizeof(struct ovs_header),
0c200ef9
PS
2107 .name = OVS_DATAPATH_FAMILY,
2108 .version = OVS_DATAPATH_VERSION,
2109 .maxattr = OVS_DP_ATTR_MAX,
3b0f31f2 2110 .policy = datapath_policy,
3a4e0d6a
PS
2111 .netnsok = true,
2112 .parallel_ops = true,
66a9b928
JK
2113 .small_ops = dp_datapath_genl_ops,
2114 .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
9c5d03d3 2115 .resv_start_op = OVS_DP_CMD_SET + 1,
0c200ef9
PS
2116 .mcgrps = &ovs_dp_datapath_multicast_group,
2117 .n_mcgrps = 1,
489111e5 2118 .module = THIS_MODULE,
ccb1352e
JG
2119};
2120
8e4e1713 2121/* Called with ovs_mutex or RCU read lock. */
ccb1352e 2122static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
9354d452 2123 struct net *net, u32 portid, u32 seq,
d4e4fdf9 2124 u32 flags, u8 cmd, gfp_t gfp)
ccb1352e
JG
2125{
2126 struct ovs_header *ovs_header;
2127 struct ovs_vport_stats vport_stats;
90b2f49a 2128 struct net *net_vport;
ccb1352e
JG
2129 int err;
2130
15e47304 2131 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
ccb1352e
JG
2132 flags, cmd);
2133 if (!ovs_header)
2134 return -EMSGSIZE;
2135
2136 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2137
028d6a67
DM
2138 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2139 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
5cd667b0 2140 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
9354d452
JB
2141 ovs_vport_name(vport)) ||
2142 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
028d6a67 2143 goto nla_put_failure;
ccb1352e 2144
90b2f49a
ED
2145 rcu_read_lock();
2146 net_vport = dev_net_rcu(vport->dev);
2147 if (!net_eq(net, net_vport)) {
2148 int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
9354d452
JB
2149
2150 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
90b2f49a 2151 goto nla_put_failure_unlock;
9354d452 2152 }
90b2f49a 2153 rcu_read_unlock();
9354d452 2154
ccb1352e 2155 ovs_vport_get_stats(vport, &vport_stats);
66c7a5ee
ND
2156 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2157 sizeof(struct ovs_vport_stats), &vport_stats,
2158 OVS_VPORT_ATTR_PAD))
028d6a67 2159 goto nla_put_failure;
ccb1352e 2160
1933ea36 2161 if (ovs_vport_get_upcall_stats(vport, skb))
2162 goto nla_put_failure;
2163
5cd667b0
AW
2164 if (ovs_vport_get_upcall_portids(vport, skb))
2165 goto nla_put_failure;
2166
ccb1352e
JG
2167 err = ovs_vport_get_options(vport, skb);
2168 if (err == -EMSGSIZE)
2169 goto error;
2170
053c095a
JB
2171 genlmsg_end(skb, ovs_header);
2172 return 0;
ccb1352e 2173
90b2f49a
ED
2174nla_put_failure_unlock:
2175 rcu_read_unlock();
ccb1352e
JG
2176nla_put_failure:
2177 err = -EMSGSIZE;
2178error:
2179 genlmsg_cancel(skb, ovs_header);
2180 return err;
2181}
2182
6093ae9a
JR
2183static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2184{
2185 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2186}
2187
2188/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
9354d452
JB
2189struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2190 u32 portid, u32 seq, u8 cmd)
ccb1352e
JG
2191{
2192 struct sk_buff *skb;
2193 int retval;
2194
d4e4fdf9 2195 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
ccb1352e
JG
2196 if (!skb)
2197 return ERR_PTR(-ENOMEM);
2198
d4e4fdf9
GN
2199 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2200 GFP_KERNEL);
a9341512
JG
2201 BUG_ON(retval < 0);
2202
ccb1352e
JG
2203 return skb;
2204}
2205
8e4e1713 2206/* Called with ovs_mutex or RCU read lock. */
46df7b81 2207static struct vport *lookup_vport(struct net *net,
12eb18f7 2208 const struct ovs_header *ovs_header,
ccb1352e
JG
2209 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2210{
2211 struct datapath *dp;
2212 struct vport *vport;
2213
9354d452
JB
2214 if (a[OVS_VPORT_ATTR_IFINDEX])
2215 return ERR_PTR(-EOPNOTSUPP);
ccb1352e 2216 if (a[OVS_VPORT_ATTR_NAME]) {
46df7b81 2217 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
ccb1352e
JG
2218 if (!vport)
2219 return ERR_PTR(-ENODEV);
651a68ea
BP
2220 if (ovs_header->dp_ifindex &&
2221 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2222 return ERR_PTR(-ENODEV);
ccb1352e
JG
2223 return vport;
2224 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2225 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2226
2227 if (port_no >= DP_MAX_PORTS)
2228 return ERR_PTR(-EFBIG);
2229
46df7b81 2230 dp = get_dp(net, ovs_header->dp_ifindex);
ccb1352e
JG
2231 if (!dp)
2232 return ERR_PTR(-ENODEV);
2233
8e4e1713 2234 vport = ovs_vport_ovsl_rcu(dp, port_no);
ccb1352e 2235 if (!vport)
14408dba 2236 return ERR_PTR(-ENODEV);
ccb1352e
JG
2237 return vport;
2238 } else
2239 return ERR_PTR(-EINVAL);
9354d452 2240
ccb1352e
JG
2241}
2242
6b660c41 2243static unsigned int ovs_get_max_headroom(struct datapath *dp)
3a927bc7 2244{
6b660c41 2245 unsigned int dev_headroom, max_headroom = 0;
3a927bc7
PA
2246 struct net_device *dev;
2247 struct vport *vport;
2248 int i;
2249
2250 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
53742e69 2251 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
cf3266ad 2252 lockdep_ovsl_is_held()) {
3a927bc7
PA
2253 dev = vport->dev;
2254 dev_headroom = netdev_get_fwd_headroom(dev);
2255 if (dev_headroom > max_headroom)
2256 max_headroom = dev_headroom;
2257 }
2258 }
2259
6b660c41
TY
2260 return max_headroom;
2261}
2262
2263/* Called with ovs_mutex */
2264static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2265{
2266 struct vport *vport;
2267 int i;
2268
2269 dp->max_headroom = new_headroom;
cf3266ad 2270 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
53742e69 2271 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
cf3266ad 2272 lockdep_ovsl_is_held())
6b660c41 2273 netdev_set_rx_headroom(vport->dev, new_headroom);
cf3266ad 2274 }
3a927bc7
PA
2275}
2276
ccb1352e
JG
2277static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2278{
2279 struct nlattr **a = info->attrs;
bffcc688 2280 struct ovs_header *ovs_header = genl_info_userhdr(info);
ccb1352e
JG
2281 struct vport_parms parms;
2282 struct sk_buff *reply;
2283 struct vport *vport;
2284 struct datapath *dp;
6b660c41 2285 unsigned int new_headroom;
ccb1352e
JG
2286 u32 port_no;
2287 int err;
2288
ccb1352e
JG
2289 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2290 !a[OVS_VPORT_ATTR_UPCALL_PID])
6093ae9a 2291 return -EINVAL;
54c4ef34
AZ
2292
2293 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2294
2295 if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
9354d452 2296 return -EOPNOTSUPP;
6093ae9a 2297
a885a6b2 2298 port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
6093ae9a
JR
2299 if (port_no >= DP_MAX_PORTS)
2300 return -EFBIG;
2301
2302 reply = ovs_vport_cmd_alloc_info();
2303 if (!reply)
2304 return -ENOMEM;
ccb1352e 2305
8e4e1713 2306 ovs_lock();
62b9c8d0 2307restart:
46df7b81 2308 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
ccb1352e
JG
2309 err = -ENODEV;
2310 if (!dp)
6093ae9a 2311 goto exit_unlock_free;
ccb1352e 2312
6093ae9a 2313 if (port_no) {
8e4e1713 2314 vport = ovs_vport_ovsl(dp, port_no);
ccb1352e
JG
2315 err = -EBUSY;
2316 if (vport)
6093ae9a 2317 goto exit_unlock_free;
ccb1352e
JG
2318 } else {
2319 for (port_no = 1; ; port_no++) {
2320 if (port_no >= DP_MAX_PORTS) {
2321 err = -EFBIG;
6093ae9a 2322 goto exit_unlock_free;
ccb1352e 2323 }
8e4e1713 2324 vport = ovs_vport_ovsl(dp, port_no);
ccb1352e
JG
2325 if (!vport)
2326 break;
2327 }
2328 }
2329
2330 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
ccb1352e
JG
2331 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2332 parms.dp = dp;
2333 parms.port_no = port_no;
5cd667b0 2334 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
a885a6b2
JB
2335 parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
2336 0);
ccb1352e
JG
2337
2338 vport = new_vport(&parms);
2339 err = PTR_ERR(vport);
62b9c8d0
TG
2340 if (IS_ERR(vport)) {
2341 if (err == -EAGAIN)
2342 goto restart;
6093ae9a 2343 goto exit_unlock_free;
62b9c8d0 2344 }
ccb1352e 2345
9354d452
JB
2346 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2347 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2348 OVS_VPORT_CMD_NEW, GFP_KERNEL);
3a927bc7 2349
6b660c41
TY
2350 new_headroom = netdev_get_fwd_headroom(vport->dev);
2351
2352 if (new_headroom > dp->max_headroom)
2353 ovs_update_headroom(dp, new_headroom);
3a927bc7
PA
2354 else
2355 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2356
6093ae9a
JR
2357 BUG_ON(err < 0);
2358 ovs_unlock();
ed661185 2359
2a94fe48 2360 ovs_notify(&dp_vport_genl_family, reply, info);
6093ae9a 2361 return 0;
ccb1352e 2362
6093ae9a 2363exit_unlock_free:
8e4e1713 2364 ovs_unlock();
6093ae9a 2365 kfree_skb(reply);
ccb1352e
JG
2366 return err;
2367}
2368
2369static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2370{
2371 struct nlattr **a = info->attrs;
2372 struct sk_buff *reply;
2373 struct vport *vport;
2374 int err;
2375
6093ae9a
JR
2376 reply = ovs_vport_cmd_alloc_info();
2377 if (!reply)
2378 return -ENOMEM;
2379
8e4e1713 2380 ovs_lock();
bffcc688 2381 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
ccb1352e
JG
2382 err = PTR_ERR(vport);
2383 if (IS_ERR(vport))
6093ae9a 2384 goto exit_unlock_free;
ccb1352e 2385
ccb1352e 2386 if (a[OVS_VPORT_ATTR_TYPE] &&
f44f3408 2387 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
ccb1352e 2388 err = -EINVAL;
6093ae9a 2389 goto exit_unlock_free;
a9341512
JG
2390 }
2391
f44f3408 2392 if (a[OVS_VPORT_ATTR_OPTIONS]) {
ccb1352e 2393 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
f44f3408 2394 if (err)
6093ae9a 2395 goto exit_unlock_free;
f44f3408 2396 }
a9341512 2397
5cd667b0
AW
2398
2399 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2400 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2401
2402 err = ovs_vport_set_upcall_portids(vport, ids);
2403 if (err)
2404 goto exit_unlock_free;
2405 }
ccb1352e 2406
9354d452
JB
2407 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2408 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2409 OVS_VPORT_CMD_SET, GFP_KERNEL);
a9341512 2410 BUG_ON(err < 0);
ccb1352e 2411
8e4e1713 2412 ovs_unlock();
2a94fe48 2413 ovs_notify(&dp_vport_genl_family, reply, info);
8e4e1713 2414 return 0;
ccb1352e 2415
6093ae9a 2416exit_unlock_free:
8e4e1713 2417 ovs_unlock();
6093ae9a 2418 kfree_skb(reply);
ccb1352e
JG
2419 return err;
2420}
2421
2422static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2423{
6b660c41 2424 bool update_headroom = false;
ccb1352e
JG
2425 struct nlattr **a = info->attrs;
2426 struct sk_buff *reply;
3a927bc7 2427 struct datapath *dp;
ccb1352e 2428 struct vport *vport;
6b660c41 2429 unsigned int new_headroom;
ccb1352e
JG
2430 int err;
2431
6093ae9a
JR
2432 reply = ovs_vport_cmd_alloc_info();
2433 if (!reply)
2434 return -ENOMEM;
2435
8e4e1713 2436 ovs_lock();
bffcc688 2437 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
ccb1352e
JG
2438 err = PTR_ERR(vport);
2439 if (IS_ERR(vport))
6093ae9a 2440 goto exit_unlock_free;
ccb1352e
JG
2441
2442 if (vport->port_no == OVSP_LOCAL) {
2443 err = -EINVAL;
6093ae9a 2444 goto exit_unlock_free;
ccb1352e
JG
2445 }
2446
9354d452
JB
2447 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2448 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2449 OVS_VPORT_CMD_DEL, GFP_KERNEL);
6093ae9a 2450 BUG_ON(err < 0);
3a927bc7
PA
2451
2452 /* the vport deletion may trigger dp headroom update */
2453 dp = vport->dp;
2454 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
6b660c41
TY
2455 update_headroom = true;
2456
3a927bc7 2457 netdev_reset_rx_headroom(vport->dev);
ccb1352e 2458 ovs_dp_detach_port(vport);
3a927bc7 2459
6b660c41
TY
2460 if (update_headroom) {
2461 new_headroom = ovs_get_max_headroom(dp);
2462
2463 if (new_headroom < dp->max_headroom)
2464 ovs_update_headroom(dp, new_headroom);
2465 }
6093ae9a 2466 ovs_unlock();
ccb1352e 2467
2a94fe48 2468 ovs_notify(&dp_vport_genl_family, reply, info);
6093ae9a 2469 return 0;
ccb1352e 2470
6093ae9a 2471exit_unlock_free:
8e4e1713 2472 ovs_unlock();
6093ae9a 2473 kfree_skb(reply);
ccb1352e
JG
2474 return err;
2475}
2476
2477static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2478{
2479 struct nlattr **a = info->attrs;
bffcc688 2480 struct ovs_header *ovs_header = genl_info_userhdr(info);
ccb1352e
JG
2481 struct sk_buff *reply;
2482 struct vport *vport;
2483 int err;
2484
6093ae9a
JR
2485 reply = ovs_vport_cmd_alloc_info();
2486 if (!reply)
2487 return -ENOMEM;
2488
ccb1352e 2489 rcu_read_lock();
46df7b81 2490 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
ccb1352e
JG
2491 err = PTR_ERR(vport);
2492 if (IS_ERR(vport))
6093ae9a 2493 goto exit_unlock_free;
9354d452
JB
2494 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2495 info->snd_portid, info->snd_seq, 0,
d4e4fdf9 2496 OVS_VPORT_CMD_GET, GFP_ATOMIC);
6093ae9a 2497 BUG_ON(err < 0);
ccb1352e
JG
2498 rcu_read_unlock();
2499
2500 return genlmsg_reply(reply, info);
2501
6093ae9a 2502exit_unlock_free:
ccb1352e 2503 rcu_read_unlock();
6093ae9a 2504 kfree_skb(reply);
ccb1352e
JG
2505 return err;
2506}
2507
2508static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2509{
2510 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2511 struct datapath *dp;
15eac2a7
PS
2512 int bucket = cb->args[0], skip = cb->args[1];
2513 int i, j = 0;
ccb1352e 2514
42ee19e2 2515 rcu_read_lock();
cc3a5ae6 2516 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
42ee19e2
JR
2517 if (!dp) {
2518 rcu_read_unlock();
ccb1352e 2519 return -ENODEV;
42ee19e2 2520 }
15eac2a7 2521 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
ccb1352e 2522 struct vport *vport;
15eac2a7
PS
2523
2524 j = 0;
b67bfe0d 2525 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
15eac2a7
PS
2526 if (j >= skip &&
2527 ovs_vport_cmd_fill_info(vport, skb,
9354d452 2528 sock_net(skb->sk),
15e47304 2529 NETLINK_CB(cb->skb).portid,
15eac2a7
PS
2530 cb->nlh->nlmsg_seq,
2531 NLM_F_MULTI,
d4e4fdf9
GN
2532 OVS_VPORT_CMD_GET,
2533 GFP_ATOMIC) < 0)
15eac2a7
PS
2534 goto out;
2535
2536 j++;
2537 }
2538 skip = 0;
ccb1352e 2539 }
15eac2a7 2540out:
ccb1352e
JG
2541 rcu_read_unlock();
2542
15eac2a7
PS
2543 cb->args[0] = i;
2544 cb->args[1] = j;
ccb1352e 2545
15eac2a7 2546 return skb->len;
ccb1352e
JG
2547}
2548
eac87c41
EC
2549static void ovs_dp_masks_rebalance(struct work_struct *work)
2550{
a65878d6
EC
2551 struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2552 masks_rebalance.work);
2553 struct datapath *dp;
eac87c41
EC
2554
2555 ovs_lock();
a65878d6
EC
2556
2557 list_for_each_entry(dp, &ovs_net->dps, list_node)
2558 ovs_flow_masks_rebalance(&dp->table);
2559
eac87c41
EC
2560 ovs_unlock();
2561
a65878d6 2562 schedule_delayed_work(&ovs_net->masks_rebalance,
eac87c41
EC
2563 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2564}
2565
0c200ef9
PS
2566static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2567 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2568 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2569 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2570 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
ea8564c8 2571 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
0c200ef9 2572 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
a552bfa1 2573 [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
9354d452 2574 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
1933ea36 2575 [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
0c200ef9
PS
2576};
2577
66a9b928 2578static const struct genl_small_ops dp_vport_genl_ops[] = {
ccb1352e 2579 { .cmd = OVS_VPORT_CMD_NEW,
ef6243ac 2580 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2581 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2582 .doit = ovs_vport_cmd_new
2583 },
2584 { .cmd = OVS_VPORT_CMD_DEL,
ef6243ac 2585 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2586 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2587 .doit = ovs_vport_cmd_del
2588 },
2589 { .cmd = OVS_VPORT_CMD_GET,
ef6243ac 2590 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
ccb1352e 2591 .flags = 0, /* OK for unprivileged users. */
ccb1352e
JG
2592 .doit = ovs_vport_cmd_get,
2593 .dumpit = ovs_vport_cmd_dump
2594 },
2595 { .cmd = OVS_VPORT_CMD_SET,
ef6243ac 2596 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4a92602a 2597 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
ccb1352e
JG
2598 .doit = ovs_vport_cmd_set,
2599 },
2600};
2601
56989f6d 2602struct genl_family dp_vport_genl_family __ro_after_init = {
0c200ef9
PS
2603 .hdrsize = sizeof(struct ovs_header),
2604 .name = OVS_VPORT_FAMILY,
2605 .version = OVS_VPORT_VERSION,
2606 .maxattr = OVS_VPORT_ATTR_MAX,
3b0f31f2 2607 .policy = vport_policy,
0c200ef9
PS
2608 .netnsok = true,
2609 .parallel_ops = true,
66a9b928
JK
2610 .small_ops = dp_vport_genl_ops,
2611 .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
e4ba4554 2612 .resv_start_op = OVS_VPORT_CMD_SET + 1,
0c200ef9
PS
2613 .mcgrps = &ovs_dp_vport_multicast_group,
2614 .n_mcgrps = 1,
489111e5 2615 .module = THIS_MODULE,
ccb1352e
JG
2616};
2617
0c200ef9
PS
2618static struct genl_family * const dp_genl_families[] = {
2619 &dp_datapath_genl_family,
2620 &dp_vport_genl_family,
2621 &dp_flow_genl_family,
2622 &dp_packet_genl_family,
96fbc13d 2623 &dp_meter_genl_family,
11efd5cb
YHW
2624#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2625 &dp_ct_limit_genl_family,
2626#endif
ccb1352e
JG
2627};
2628
2629static void dp_unregister_genl(int n_families)
2630{
2631 int i;
2632
2633 for (i = 0; i < n_families; i++)
0c200ef9 2634 genl_unregister_family(dp_genl_families[i]);
ccb1352e
JG
2635}
2636
56989f6d 2637static int __init dp_register_genl(void)
ccb1352e 2638{
ccb1352e
JG
2639 int err;
2640 int i;
2641
ccb1352e 2642 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
ccb1352e 2643
0c200ef9 2644 err = genl_register_family(dp_genl_families[i]);
ccb1352e
JG
2645 if (err)
2646 goto error;
ccb1352e
JG
2647 }
2648
2649 return 0;
2650
2651error:
0c200ef9 2652 dp_unregister_genl(i);
ccb1352e
JG
2653 return err;
2654}
2655
46df7b81
PS
2656static int __net_init ovs_init_net(struct net *net)
2657{
2658 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
e0afe914 2659 int err;
46df7b81
PS
2660
2661 INIT_LIST_HEAD(&ovs_net->dps);
8e4e1713 2662 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
a65878d6 2663 INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
e0afe914
EC
2664
2665 err = ovs_ct_init(net);
2666 if (err)
2667 return err;
2668
a65878d6
EC
2669 schedule_delayed_work(&ovs_net->masks_rebalance,
2670 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
e0afe914 2671 return 0;
46df7b81
PS
2672}
2673
7b4577a9
PS
2674static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2675 struct list_head *head)
46df7b81 2676{
8e4e1713 2677 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
7b4577a9
PS
2678 struct datapath *dp;
2679
2680 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2681 int i;
2682
2683 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2684 struct vport *vport;
2685
2686 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
7b4577a9
PS
2687 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2688 continue;
2689
be4ace6e 2690 if (dev_net(vport->dev) == dnet)
7b4577a9
PS
2691 list_add(&vport->detach_list, head);
2692 }
2693 }
2694 }
2695}
2696
2697static void __net_exit ovs_exit_net(struct net *dnet)
2698{
2699 struct datapath *dp, *dp_next;
2700 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2701 struct vport *vport, *vport_next;
2702 struct net *net;
2703 LIST_HEAD(head);
46df7b81 2704
8e4e1713 2705 ovs_lock();
27de77ce
TZ
2706
2707 ovs_ct_exit(dnet);
2708
46df7b81
PS
2709 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2710 __dp_destroy(dp);
7b4577a9 2711
f0b07bb1 2712 down_read(&net_rwsem);
7b4577a9
PS
2713 for_each_net(net)
2714 list_vports_from_net(net, dnet, &head);
f0b07bb1 2715 up_read(&net_rwsem);
7b4577a9
PS
2716
2717 /* Detach all vports from given namespace. */
2718 list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2719 list_del(&vport->detach_list);
2720 ovs_dp_detach_port(vport);
2721 }
2722
8e4e1713
PS
2723 ovs_unlock();
2724
a65878d6 2725 cancel_delayed_work_sync(&ovs_net->masks_rebalance);
8e4e1713 2726 cancel_work_sync(&ovs_net->dp_notify_work);
46df7b81
PS
2727}
2728
2729static struct pernet_operations ovs_net_ops = {
2730 .init = ovs_init_net,
2731 .exit = ovs_exit_net,
2732 .id = &ovs_net_id,
2733 .size = sizeof(struct ovs_net),
2734};
2735
9d802da4 2736static const char * const ovs_drop_reasons[] = {
57fb6778 2737#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
9d802da4
AM
2738 OVS_DROP_REASONS(S)
2739#undef S
2740};
2741
2742static struct drop_reason_list drop_reason_list_ovs = {
2743 .reasons = ovs_drop_reasons,
2744 .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
2745};
2746
7b4ac12c
SAS
2747static int __init ovs_alloc_percpu_storage(void)
2748{
2749 unsigned int cpu;
2750
2751 ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
2752 if (!ovs_pcpu_storage)
2753 return -ENOMEM;
2754
2755 for_each_possible_cpu(cpu) {
2756 struct ovs_pcpu_storage *ovs_pcpu;
2757
2758 ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
2759 local_lock_init(&ovs_pcpu->bh_lock);
2760 }
2761 return 0;
2762}
2763
2764static void ovs_free_percpu_storage(void)
2765{
2766 free_percpu(ovs_pcpu_storage);
2767}
2768
ccb1352e
JG
2769static int __init dp_init(void)
2770{
ccb1352e
JG
2771 int err;
2772
cf3266ad
TZ
2773 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2774 sizeof_field(struct sk_buff, cb));
ccb1352e
JG
2775
2776 pr_info("Open vSwitch switching datapath\n");
2777
7b4ac12c
SAS
2778 err = ovs_alloc_percpu_storage();
2779 if (err)
2780 goto error;
2781
971427f3
AZ
2782 err = ovs_internal_dev_rtnl_link_register();
2783 if (err)
035fcdc4 2784 goto error;
971427f3 2785
5b9e7e16
JP
2786 err = ovs_flow_init();
2787 if (err)
2788 goto error_unreg_rtnl_link;
2789
ccb1352e
JG
2790 err = ovs_vport_init();
2791 if (err)
2792 goto error_flow_exit;
2793
46df7b81 2794 err = register_pernet_device(&ovs_net_ops);
ccb1352e
JG
2795 if (err)
2796 goto error_vport_exit;
2797
46df7b81
PS
2798 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2799 if (err)
2800 goto error_netns_exit;
2801
62b9c8d0
TG
2802 err = ovs_netdev_init();
2803 if (err)
2804 goto error_unreg_notifier;
2805
ccb1352e
JG
2806 err = dp_register_genl();
2807 if (err < 0)
62b9c8d0 2808 goto error_unreg_netdev;
ccb1352e 2809
9d802da4
AM
2810 drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
2811 &drop_reason_list_ovs);
2812
ccb1352e
JG
2813 return 0;
2814
62b9c8d0
TG
2815error_unreg_netdev:
2816 ovs_netdev_exit();
ccb1352e
JG
2817error_unreg_notifier:
2818 unregister_netdevice_notifier(&ovs_dp_device_notifier);
46df7b81
PS
2819error_netns_exit:
2820 unregister_pernet_device(&ovs_net_ops);
ccb1352e
JG
2821error_vport_exit:
2822 ovs_vport_exit();
2823error_flow_exit:
2824 ovs_flow_exit();
5b9e7e16
JP
2825error_unreg_rtnl_link:
2826 ovs_internal_dev_rtnl_link_unregister();
ccb1352e 2827error:
7b4ac12c 2828 ovs_free_percpu_storage();
ccb1352e
JG
2829 return err;
2830}
2831
2832static void dp_cleanup(void)
2833{
ccb1352e 2834 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
62b9c8d0 2835 ovs_netdev_exit();
ccb1352e 2836 unregister_netdevice_notifier(&ovs_dp_device_notifier);
46df7b81 2837 unregister_pernet_device(&ovs_net_ops);
9d802da4 2838 drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
46df7b81 2839 rcu_barrier();
ccb1352e
JG
2840 ovs_vport_exit();
2841 ovs_flow_exit();
5b9e7e16 2842 ovs_internal_dev_rtnl_link_unregister();
7b4ac12c 2843 ovs_free_percpu_storage();
ccb1352e
JG
2844}
2845
2846module_init(dp_init);
2847module_exit(dp_cleanup);
2848
2849MODULE_DESCRIPTION("Open vSwitch switching datapath");
2850MODULE_LICENSE("GPL");
ed227099
TLSC
2851MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2852MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2853MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2854MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
96fbc13d 2855MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
11efd5cb 2856MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);