mirror of
https://github.com/Ysurac/openmptcprouter.git
synced 2025-03-09 15:40:20 +00:00
Update Linux 5.4 kernel and clean some files
This commit is contained in:
parent
272324aa1a
commit
0a55523ea5
26 changed files with 870 additions and 4059 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,53 +0,0 @@
|
|||
From: Bui Quang Minh @ 2021-01-26 8:26 UTC (permalink / raw)
|
||||
To: ast, daniel, davem, kuba, hawk, john.fastabend, andrii, kafai,
|
||||
songliubraving, yhs, kpsingh, jakub, lmb
|
||||
Cc: netdev, bpf, linux-kernel, minhquangbui99
|
||||
|
||||
In 32-bit architecture, the result of sizeof() is a 32-bit integer so
|
||||
the expression becomes the multiplication between 2 32-bit integer which
|
||||
can potentially leads to integer overflow. As a result,
|
||||
bpf_map_area_alloc() allocates less memory than needed.
|
||||
|
||||
Fix this by casting 1 operand to u64.
|
||||
|
||||
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
|
||||
---
|
||||
kernel/bpf/devmap.c | 4 ++--
|
||||
net/core/sock_map.c | 2 +-
|
||||
2 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
Index: linux-5.4.147/kernel/bpf/devmap.c
|
||||
===================================================================
|
||||
--- linux-5.4.147.orig/kernel/bpf/devmap.c
|
||||
+++ linux-5.4.147/kernel/bpf/devmap.c
|
||||
@@ -94,7 +94,7 @@ static struct hlist_head *dev_map_create
|
||||
int i;
|
||||
struct hlist_head *hash;
|
||||
|
||||
- hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
|
||||
+ hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node);
|
||||
if (hash != NULL)
|
||||
for (i = 0; i < entries; i++)
|
||||
INIT_HLIST_HEAD(&hash[i]);
|
||||
@@ -159,7 +159,7 @@ static int dev_map_init_map(struct bpf_d
|
||||
|
||||
spin_lock_init(&dtab->index_lock);
|
||||
} else {
|
||||
- dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
|
||||
+ dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries *
|
||||
sizeof(struct bpf_dtab_netdev *),
|
||||
dtab->map.numa_node);
|
||||
if (!dtab->netdev_map)
|
||||
Index: linux-5.4.147/net/core/sock_map.c
|
||||
===================================================================
|
||||
--- linux-5.4.147.orig/net/core/sock_map.c
|
||||
+++ linux-5.4.147/net/core/sock_map.c
|
||||
@@ -48,7 +48,7 @@ static struct bpf_map *sock_map_alloc(un
|
||||
if (err)
|
||||
goto free_stab;
|
||||
|
||||
- stab->sks = bpf_map_area_alloc(stab->map.max_entries *
|
||||
+ stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
|
||||
sizeof(struct sock *),
|
||||
stab->map.numa_node);
|
||||
if (stab->sks)
|
|
@ -1,115 +0,0 @@
|
|||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Wed, 13 Jun 2018 12:33:39 +0200
|
||||
Subject: [PATCH] netfilter: nf_flow_table: fix offloaded connection timeout
|
||||
corner case
|
||||
|
||||
The full teardown of offloaded flows is deferred to a gc work item,
|
||||
however processing of packets by netfilter needs to happen immediately
|
||||
after a teardown is requested, because the conntrack state needs to be
|
||||
fixed up.
|
||||
|
||||
Since the IPS_OFFLOAD_BIT is still kept until the teardown is complete,
|
||||
the netfilter conntrack gc can accidentally bump the timeout of a
|
||||
connection where offload was just stopped, causing a conntrack entry
|
||||
leak.
|
||||
|
||||
Fix this by moving the conntrack timeout bumping from conntrack core to
|
||||
the nf_flow_offload and add a check to prevent bogus timeout bumps.
|
||||
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/net/netfilter/nf_conntrack_core.c
|
||||
+++ b/net/netfilter/nf_conntrack_core.c
|
||||
@@ -1207,18 +1207,6 @@ static bool gc_worker_can_early_drop(con
|
||||
return false;
|
||||
}
|
||||
|
||||
-#define DAY (86400 * HZ)
|
||||
-
|
||||
-/* Set an arbitrary timeout large enough not to ever expire, this save
|
||||
- * us a check for the IPS_OFFLOAD_BIT from the packet path via
|
||||
- * nf_ct_is_expired().
|
||||
- */
|
||||
-static void nf_ct_offload_timeout(struct nf_conn *ct)
|
||||
-{
|
||||
- if (nf_ct_expires(ct) < DAY / 2)
|
||||
- ct->timeout = nfct_time_stamp + DAY;
|
||||
-}
|
||||
-
|
||||
static void gc_worker(struct work_struct *work)
|
||||
{
|
||||
unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
|
||||
@@ -1250,11 +1238,9 @@ static void gc_worker(struct work_struct
|
||||
|
||||
tmp = nf_ct_tuplehash_to_ctrack(h);
|
||||
|
||||
scanned++;
|
||||
- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
|
||||
- nf_ct_offload_timeout(tmp);
|
||||
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
|
||||
continue;
|
||||
- }
|
||||
|
||||
if (nf_ct_is_expired(tmp)) {
|
||||
nf_ct_gc_expired(tmp);
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -198,10 +198,29 @@ static const struct rhashtable_params nf
|
||||
.automatic_shrinking = true,
|
||||
};
|
||||
|
||||
+#define DAY (86400 * HZ)
|
||||
+
|
||||
+/* Set an arbitrary timeout large enough not to ever expire, this save
|
||||
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
|
||||
+ * nf_ct_is_expired().
|
||||
+ */
|
||||
+static void nf_ct_offload_timeout(struct flow_offload *flow)
|
||||
+{
|
||||
+ struct flow_offload_entry *entry;
|
||||
+ struct nf_conn *ct;
|
||||
+
|
||||
+ entry = container_of(flow, struct flow_offload_entry, flow);
|
||||
+ ct = entry->ct;
|
||||
+
|
||||
+ if (nf_ct_expires(ct) < DAY / 2)
|
||||
+ ct->timeout = nfct_time_stamp + DAY;
|
||||
+}
|
||||
+
|
||||
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
|
||||
{
|
||||
int err;
|
||||
|
||||
+ nf_ct_offload_timeout(flow);
|
||||
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
|
||||
|
||||
err = rhashtable_insert_fast(&flow_table->rhashtable,
|
||||
@@ -304,6 +323,7 @@ nf_flow_table_iterate(struct nf_flowtabl
|
||||
rhashtable_walk_start(&hti);
|
||||
|
||||
while ((tuplehash = rhashtable_walk_next(&hti))) {
|
||||
+
|
||||
if (IS_ERR(tuplehash)) {
|
||||
if (PTR_ERR(tuplehash) != -EAGAIN) {
|
||||
err = PTR_ERR(tuplehash);
|
||||
@@ -328,10 +348,17 @@ static void nf_flow_offload_gc_step(stru
|
||||
{
|
||||
struct nf_flowtable *flow_table = data;
|
||||
struct flow_offload_entry *e;
|
||||
+ bool teardown;
|
||||
|
||||
e = container_of(flow, struct flow_offload_entry, flow);
|
||||
- if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
|
||||
- (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
|
||||
+
|
||||
+ teardown = flow->flags & (FLOW_OFFLOAD_DYING |
|
||||
+ FLOW_OFFLOAD_TEARDOWN);
|
||||
+
|
||||
+ if (!teardown)
|
||||
+ nf_ct_offload_timeout(flow);
|
||||
+
|
||||
+ if (nf_flow_has_expired(flow) || teardown)
|
||||
flow_offload_del(flow_table, flow);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,554 @@
|
|||
From: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Thu, 11 Jan 2018 16:32:00 +0100
|
||||
Subject: [PATCH] netfilter: nf_flow_table: add hardware offload support
|
||||
|
||||
This patch adds the infrastructure to offload flows to hardware, in case
|
||||
the nic/switch comes with built-in flow tables capabilities.
|
||||
|
||||
If the hardware comes with no hardware flow tables or they have
|
||||
limitations in terms of features, the existing infrastructure falls back
|
||||
to the software flow table implementation.
|
||||
|
||||
The software flow table garbage collector skips entries that resides in
|
||||
the hardware, so the hardware will be responsible for releasing this
|
||||
flow table entry too via flow_offload_dead().
|
||||
|
||||
Hardware configuration, either to add or to delete entries, is done from
|
||||
the hardware offload workqueue, to ensure this is done from user context
|
||||
given that we may sleep when grabbing the mdio mutex.
|
||||
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
---
|
||||
create mode 100644 net/netfilter/nf_flow_table_hw.c
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -922,6 +922,13 @@ struct devlink;
|
||||
struct tlsdev_ops;
|
||||
|
||||
|
||||
+struct flow_offload;
|
||||
+
|
||||
+enum flow_offload_type {
|
||||
+ FLOW_OFFLOAD_ADD = 0,
|
||||
+ FLOW_OFFLOAD_DEL,
|
||||
+};
|
||||
+
|
||||
/*
|
||||
* This structure defines the management hooks for network devices.
|
||||
* The following hooks can be defined; unless noted otherwise, they are
|
||||
@@ -1154,6 +1161,10 @@ struct tlsdev_ops;
|
||||
* int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
|
||||
* u16 flags);
|
||||
*
|
||||
+ * int (*ndo_flow_offload)(enum flow_offload_type type,
|
||||
+ * struct flow_offload *flow);
|
||||
+ * Adds/deletes flow entry to/from net device flowtable.
|
||||
+ *
|
||||
* int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
|
||||
* Called to change device carrier. Soft-devices (like dummy, team, etc)
|
||||
* which do not represent real hardware may define this to allow their
|
||||
@@ -1401,6 +1412,8 @@ struct net_device_ops {
|
||||
int (*ndo_bridge_dellink)(struct net_device *dev,
|
||||
struct nlmsghdr *nlh,
|
||||
u16 flags);
|
||||
+ int (*ndo_flow_offload)(enum flow_offload_type type,
|
||||
+ struct flow_offload *flow);
|
||||
int (*ndo_change_carrier)(struct net_device *dev,
|
||||
bool new_carrier);
|
||||
int (*ndo_get_phys_port_id)(struct net_device *dev,
|
||||
--- a/include/net/netfilter/nf_flow_table.h
|
||||
+++ b/include/net/netfilter/nf_flow_table.h
|
||||
@@ -21,11 +21,17 @@ struct nf_flowtable_type {
|
||||
struct module *owner;
|
||||
};
|
||||
|
||||
+enum nf_flowtable_flags {
|
||||
+ NF_FLOWTABLE_F_HW = 0x1,
|
||||
+};
|
||||
+
|
||||
struct nf_flowtable {
|
||||
struct list_head list;
|
||||
struct rhashtable rhashtable;
|
||||
const struct nf_flowtable_type *type;
|
||||
+ u32 flags;
|
||||
struct delayed_work gc_work;
|
||||
+ possible_net_t ft_net;
|
||||
};
|
||||
|
||||
enum flow_offload_tuple_dir {
|
||||
@@ -68,6 +74,7 @@ struct flow_offload_tuple_rhash {
|
||||
#define FLOW_OFFLOAD_DNAT 0x2
|
||||
#define FLOW_OFFLOAD_DYING 0x4
|
||||
#define FLOW_OFFLOAD_TEARDOWN 0x8
|
||||
+#define FLOW_OFFLOAD_HW 0x10
|
||||
|
||||
struct flow_offload {
|
||||
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
|
||||
@@ -120,6 +127,22 @@ unsigned int nf_flow_offload_ip_hook(voi
|
||||
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
const struct nf_hook_state *state);
|
||||
|
||||
+void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
|
||||
+ struct nf_conn *ct);
|
||||
+void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow);
|
||||
+
|
||||
+struct nf_flow_table_hw {
|
||||
+ struct module *owner;
|
||||
+ void (*add)(struct net *net, struct flow_offload *flow,
|
||||
+ struct nf_conn *ct);
|
||||
+ void (*del)(struct net *net, struct flow_offload *flow);
|
||||
+};
|
||||
+
|
||||
+int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload);
|
||||
+void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload);
|
||||
+
|
||||
+extern struct work_struct nf_flow_offload_hw_work;
|
||||
+
|
||||
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
|
||||
MODULE_ALIAS("nf-flowtable-" __stringify(family))
|
||||
|
||||
--- a/include/uapi/linux/netfilter/nf_tables.h
|
||||
+++ b/include/uapi/linux/netfilter/nf_tables.h
|
||||
@@ -1516,6 +1516,7 @@ enum nft_object_attributes {
|
||||
* @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
|
||||
* @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
|
||||
* @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
|
||||
+ * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32)
|
||||
*/
|
||||
enum nft_flowtable_attributes {
|
||||
NFTA_FLOWTABLE_UNSPEC,
|
||||
@@ -1525,6 +1526,7 @@ enum nft_flowtable_attributes {
|
||||
NFTA_FLOWTABLE_USE,
|
||||
NFTA_FLOWTABLE_HANDLE,
|
||||
NFTA_FLOWTABLE_PAD,
|
||||
+ NFTA_FLOWTABLE_FLAGS,
|
||||
__NFTA_FLOWTABLE_MAX
|
||||
};
|
||||
#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
|
||||
--- a/net/netfilter/Kconfig
|
||||
+++ b/net/netfilter/Kconfig
|
||||
@@ -711,6 +711,15 @@ config NF_FLOW_TABLE
|
||||
|
||||
To compile it as a module, choose M here.
|
||||
|
||||
+config NF_FLOW_TABLE_HW
|
||||
+ tristate "Netfilter flow table hardware offload module"
|
||||
+ depends on NF_FLOW_TABLE
|
||||
+ help
|
||||
+ This option adds hardware offload support for the flow table core
|
||||
+ infrastructure.
|
||||
+
|
||||
+ To compile it as a module, choose M here.
|
||||
+
|
||||
config NETFILTER_XTABLES
|
||||
tristate "Netfilter Xtables support (required for ip_tables)"
|
||||
default m if NETFILTER_ADVANCED=n
|
||||
--- a/net/netfilter/Makefile
|
||||
+++ b/net/netfilter/Makefile
|
||||
@@ -123,6 +123,7 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_t
|
||||
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
|
||||
|
||||
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
|
||||
+obj-$(CONFIG_NF_FLOW_TABLE_HW) += nf_flow_table_hw.o
|
||||
|
||||
# generic X tables
|
||||
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
|
||||
--- a/net/netfilter/nf_flow_table_core.c
|
||||
+++ b/net/netfilter/nf_flow_table_core.c
|
||||
@@ -248,10 +248,16 @@ static inline bool nf_flow_has_expired(c
|
||||
return nf_flow_timeout_delta(flow->timeout) <= 0;
|
||||
}
|
||||
|
||||
+static inline bool nf_flow_in_hw(const struct flow_offload *flow)
|
||||
+{
|
||||
+ return flow->flags & FLOW_OFFLOAD_HW;
|
||||
+}
|
||||
+
|
||||
static void flow_offload_del(struct nf_flowtable *flow_table,
|
||||
struct flow_offload *flow)
|
||||
{
|
||||
struct flow_offload_entry *e;
|
||||
+ struct net *net = read_pnet(&flow_table->ft_net);
|
||||
|
||||
rhashtable_remove_fast(&flow_table->rhashtable,
|
||||
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
|
||||
@@ -271,6 +277,9 @@ static void flow_offload_del(struct nf_f
|
||||
if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
|
||||
flow_offload_fixup_ct_state(e->ct);
|
||||
|
||||
+ if (nf_flow_in_hw(flow))
|
||||
+ nf_flow_offload_hw_del(net, flow);
|
||||
+
|
||||
flow_offload_free(flow);
|
||||
}
|
||||
|
||||
@@ -490,10 +502,43 @@ int nf_flow_dnat_port(const struct flow_
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
|
||||
|
||||
+static const struct nf_flow_table_hw __rcu *nf_flow_table_hw_hook __read_mostly;
|
||||
+
|
||||
+static int nf_flow_offload_hw_init(struct nf_flowtable *flow_table)
|
||||
+{
|
||||
+ const struct nf_flow_table_hw *offload;
|
||||
+
|
||||
+ if (!rcu_access_pointer(nf_flow_table_hw_hook))
|
||||
+ request_module("nf-flow-table-hw");
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ offload = rcu_dereference(nf_flow_table_hw_hook);
|
||||
+ if (!offload)
|
||||
+ goto err_no_hw_offload;
|
||||
+
|
||||
+ if (!try_module_get(offload->owner))
|
||||
+ goto err_no_hw_offload;
|
||||
+
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+err_no_hw_offload:
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+
|
||||
int nf_flow_table_init(struct nf_flowtable *flowtable)
|
||||
{
|
||||
int err;
|
||||
|
||||
+ if (flowtable->flags & NF_FLOWTABLE_F_HW) {
|
||||
+ err = nf_flow_offload_hw_init(flowtable);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
|
||||
|
||||
err = rhashtable_init(&flowtable->rhashtable,
|
||||
@@ -534,6 +579,8 @@ static void nf_flow_table_iterate_cleanu
|
||||
{
|
||||
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
|
||||
flush_delayed_work(&flowtable->gc_work);
|
||||
+ if (flowtable->flags & NF_FLOWTABLE_F_HW)
|
||||
+ flush_work(&nf_flow_offload_hw_work);
|
||||
}
|
||||
|
||||
void nf_flow_table_cleanup(struct net_device *dev)
|
||||
@@ -547,6 +594,26 @@ void nf_flow_table_cleanup(struct net_de
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
|
||||
|
||||
+struct work_struct nf_flow_offload_hw_work;
|
||||
+EXPORT_SYMBOL_GPL(nf_flow_offload_hw_work);
|
||||
+
|
||||
+/* Give the hardware workqueue the chance to remove entries from hardware.*/
|
||||
+static void nf_flow_offload_hw_free(struct nf_flowtable *flowtable)
|
||||
+{
|
||||
+ const struct nf_flow_table_hw *offload;
|
||||
+
|
||||
+ flush_work(&nf_flow_offload_hw_work);
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ offload = rcu_dereference(nf_flow_table_hw_hook);
|
||||
+ if (!offload) {
|
||||
+ rcu_read_unlock();
|
||||
+ return;
|
||||
+ }
|
||||
+ module_put(offload->owner);
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
void nf_flow_table_free(struct nf_flowtable *flow_table)
|
||||
{
|
||||
mutex_lock(&flowtable_lock);
|
||||
@@ -556,9 +623,58 @@ void nf_flow_table_free(struct nf_flowta
|
||||
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
|
||||
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
|
||||
rhashtable_destroy(&flow_table->rhashtable);
|
||||
+ if (flow_table->flags & NF_FLOWTABLE_F_HW)
|
||||
+ nf_flow_offload_hw_free(flow_table);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_table_free);
|
||||
|
||||
+/* Must be called from user context. */
|
||||
+void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
|
||||
+ struct nf_conn *ct)
|
||||
+{
|
||||
+ const struct nf_flow_table_hw *offload;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ offload = rcu_dereference(nf_flow_table_hw_hook);
|
||||
+ if (offload)
|
||||
+ offload->add(net, flow, ct);
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(nf_flow_offload_hw_add);
|
||||
+
|
||||
+/* Must be called from user context. */
|
||||
+void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow)
|
||||
+{
|
||||
+ const struct nf_flow_table_hw *offload;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ offload = rcu_dereference(nf_flow_table_hw_hook);
|
||||
+ if (offload)
|
||||
+ offload->del(net, flow);
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(nf_flow_offload_hw_del);
|
||||
+
|
||||
+int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload)
|
||||
+{
|
||||
+ if (rcu_access_pointer(nf_flow_table_hw_hook))
|
||||
+ return -EBUSY;
|
||||
+
|
||||
+ rcu_assign_pointer(nf_flow_table_hw_hook, offload);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(nf_flow_table_hw_register);
|
||||
+
|
||||
+void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload)
|
||||
+{
|
||||
+ WARN_ON(rcu_access_pointer(nf_flow_table_hw_hook) != offload);
|
||||
+ rcu_assign_pointer(nf_flow_table_hw_hook, NULL);
|
||||
+
|
||||
+ synchronize_rcu();
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(nf_flow_table_hw_unregister);
|
||||
+
|
||||
static int nf_flow_table_netdev_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
--- /dev/null
|
||||
+++ b/net/netfilter/nf_flow_table_hw.c
|
||||
@@ -0,0 +1,169 @@
|
||||
+#include <linux/kernel.h>
|
||||
+#include <linux/init.h>
|
||||
+#include <linux/module.h>
|
||||
+#include <linux/netfilter.h>
|
||||
+#include <linux/rhashtable.h>
|
||||
+#include <linux/netdevice.h>
|
||||
+#include <net/netfilter/nf_flow_table.h>
|
||||
+#include <net/netfilter/nf_conntrack.h>
|
||||
+#include <net/netfilter/nf_conntrack_core.h>
|
||||
+#include <net/netfilter/nf_conntrack_tuple.h>
|
||||
+
|
||||
+static DEFINE_SPINLOCK(flow_offload_hw_pending_list_lock);
|
||||
+static LIST_HEAD(flow_offload_hw_pending_list);
|
||||
+
|
||||
+static DEFINE_MUTEX(nf_flow_offload_hw_mutex);
|
||||
+
|
||||
+struct flow_offload_hw {
|
||||
+ struct list_head list;
|
||||
+ enum flow_offload_type type;
|
||||
+ struct flow_offload *flow;
|
||||
+ struct nf_conn *ct;
|
||||
+ possible_net_t flow_hw_net;
|
||||
+};
|
||||
+
|
||||
+static int do_flow_offload_hw(struct net *net, struct flow_offload *flow,
|
||||
+ int type)
|
||||
+{
|
||||
+ struct net_device *indev;
|
||||
+ int ret, ifindex;
|
||||
+
|
||||
+ ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx;
|
||||
+ indev = dev_get_by_index(net, ifindex);
|
||||
+ if (WARN_ON(!indev))
|
||||
+ return 0;
|
||||
+
|
||||
+ mutex_lock(&nf_flow_offload_hw_mutex);
|
||||
+ ret = indev->netdev_ops->ndo_flow_offload(type, flow);
|
||||
+ mutex_unlock(&nf_flow_offload_hw_mutex);
|
||||
+
|
||||
+ dev_put(indev);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static void flow_offload_hw_work_add(struct flow_offload_hw *offload)
|
||||
+{
|
||||
+ struct net *net;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (nf_ct_is_dying(offload->ct))
|
||||
+ return;
|
||||
+
|
||||
+ net = read_pnet(&offload->flow_hw_net);
|
||||
+ ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD);
|
||||
+ if (ret >= 0)
|
||||
+ offload->flow->flags |= FLOW_OFFLOAD_HW;
|
||||
+}
|
||||
+
|
||||
+static void flow_offload_hw_work_del(struct flow_offload_hw *offload)
|
||||
+{
|
||||
+ struct net *net = read_pnet(&offload->flow_hw_net);
|
||||
+
|
||||
+ do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL);
|
||||
+}
|
||||
+
|
||||
+static void flow_offload_hw_work(struct work_struct *work)
|
||||
+{
|
||||
+ struct flow_offload_hw *offload, *next;
|
||||
+ LIST_HEAD(hw_offload_pending);
|
||||
+
|
||||
+ spin_lock_bh(&flow_offload_hw_pending_list_lock);
|
||||
+ list_replace_init(&flow_offload_hw_pending_list, &hw_offload_pending);
|
||||
+ spin_unlock_bh(&flow_offload_hw_pending_list_lock);
|
||||
+
|
||||
+ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
|
||||
+ switch (offload->type) {
|
||||
+ case FLOW_OFFLOAD_ADD:
|
||||
+ flow_offload_hw_work_add(offload);
|
||||
+ break;
|
||||
+ case FLOW_OFFLOAD_DEL:
|
||||
+ flow_offload_hw_work_del(offload);
|
||||
+ break;
|
||||
+ }
|
||||
+ if (offload->ct)
|
||||
+ nf_conntrack_put(&offload->ct->ct_general);
|
||||
+ list_del(&offload->list);
|
||||
+ kfree(offload);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void flow_offload_queue_work(struct flow_offload_hw *offload)
|
||||
+{
|
||||
+ spin_lock_bh(&flow_offload_hw_pending_list_lock);
|
||||
+ list_add_tail(&offload->list, &flow_offload_hw_pending_list);
|
||||
+ spin_unlock_bh(&flow_offload_hw_pending_list_lock);
|
||||
+
|
||||
+ schedule_work(&nf_flow_offload_hw_work);
|
||||
+}
|
||||
+
|
||||
+static void flow_offload_hw_add(struct net *net, struct flow_offload *flow,
|
||||
+ struct nf_conn *ct)
|
||||
+{
|
||||
+ struct flow_offload_hw *offload;
|
||||
+
|
||||
+ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
|
||||
+ if (!offload)
|
||||
+ return;
|
||||
+
|
||||
+ nf_conntrack_get(&ct->ct_general);
|
||||
+ offload->type = FLOW_OFFLOAD_ADD;
|
||||
+ offload->ct = ct;
|
||||
+ offload->flow = flow;
|
||||
+ write_pnet(&offload->flow_hw_net, net);
|
||||
+
|
||||
+ flow_offload_queue_work(offload);
|
||||
+}
|
||||
+
|
||||
+static void flow_offload_hw_del(struct net *net, struct flow_offload *flow)
|
||||
+{
|
||||
+ struct flow_offload_hw *offload;
|
||||
+
|
||||
+ offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
|
||||
+ if (!offload)
|
||||
+ return;
|
||||
+
|
||||
+ offload->type = FLOW_OFFLOAD_DEL;
|
||||
+ offload->ct = NULL;
|
||||
+ offload->flow = flow;
|
||||
+ write_pnet(&offload->flow_hw_net, net);
|
||||
+
|
||||
+ flow_offload_queue_work(offload);
|
||||
+}
|
||||
+
|
||||
+static const struct nf_flow_table_hw flow_offload_hw = {
|
||||
+ .add = flow_offload_hw_add,
|
||||
+ .del = flow_offload_hw_del,
|
||||
+ .owner = THIS_MODULE,
|
||||
+};
|
||||
+
|
||||
+static int __init nf_flow_table_hw_module_init(void)
|
||||
+{
|
||||
+ INIT_WORK(&nf_flow_offload_hw_work, flow_offload_hw_work);
|
||||
+ nf_flow_table_hw_register(&flow_offload_hw);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void __exit nf_flow_table_hw_module_exit(void)
|
||||
+{
|
||||
+ struct flow_offload_hw *offload, *next;
|
||||
+ LIST_HEAD(hw_offload_pending);
|
||||
+
|
||||
+ nf_flow_table_hw_unregister(&flow_offload_hw);
|
||||
+ cancel_work_sync(&nf_flow_offload_hw_work);
|
||||
+
|
||||
+ list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
|
||||
+ if (offload->ct)
|
||||
+ nf_conntrack_put(&offload->ct->ct_general);
|
||||
+ list_del(&offload->list);
|
||||
+ kfree(offload);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+module_init(nf_flow_table_hw_module_init);
|
||||
+module_exit(nf_flow_table_hw_module_exit);
|
||||
+
|
||||
+MODULE_LICENSE("GPL");
|
||||
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
|
||||
+MODULE_ALIAS("nf-flow-table-hw");
|
||||
--- a/net/netfilter/nf_tables_api.c
|
||||
+++ b/net/netfilter/nf_tables_api.c
|
||||
@@ -5743,6 +5743,13 @@ static int nf_tables_flowtable_parse_hoo
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
+ for (i = 0; i < n; i++) {
|
||||
+ if (flowtable->data.flags & NF_FLOWTABLE_F_HW &&
|
||||
+ !dev_array[i]->netdev_ops->ndo_flow_offload) {
|
||||
+ return -EOPNOTSUPP;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
|
||||
if (!ops)
|
||||
return -ENOMEM;
|
||||
@@ -5873,10 +5880,19 @@ static int nf_tables_newflowtable(struct
|
||||
}
|
||||
|
||||
flowtable->data.type = type;
|
||||
+ write_pnet(&flowtable->data.ft_net, net);
|
||||
+
|
||||
err = type->init(&flowtable->data);
|
||||
if (err < 0)
|
||||
goto err3;
|
||||
|
||||
+ if (nla[NFTA_FLOWTABLE_FLAGS]) {
|
||||
+ flowtable->data.flags =
|
||||
+ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
|
||||
+ if (flowtable->data.flags & ~NF_FLOWTABLE_F_HW)
|
||||
+ goto err4;
|
||||
+ }
|
||||
+
|
||||
err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
|
||||
flowtable);
|
||||
if (err < 0)
|
||||
@@ -6002,7 +6018,8 @@ static int nf_tables_fill_flowtable_info
|
||||
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
|
||||
nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
|
||||
nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
|
||||
- NFTA_FLOWTABLE_PAD))
|
||||
+ NFTA_FLOWTABLE_PAD) ||
|
||||
+ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
|
||||
goto nla_put_failure;
|
||||
|
||||
nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
|
||||
--- a/net/netfilter/nft_flow_offload.c
|
||||
+++ b/net/netfilter/nft_flow_offload.c
|
||||
@@ -128,6 +128,9 @@ static void nft_flow_offload_eval(const
|
||||
if (ret < 0)
|
||||
goto err_flow_add;
|
||||
|
||||
+ if (flowtable->flags & NF_FLOWTABLE_F_HW)
|
||||
+ nf_flow_offload_hw_add(nft_net(pkt), flow, ct);
|
||||
+
|
||||
dst_release(route.tuple[!dir].dst);
|
||||
return;
|
||||
|
|
@ -1,344 +0,0 @@
|
|||
From: Felix Fietkau <nbd@nbd.name>
|
||||
Date: Sun, 26 Jul 2020 14:03:21 +0200
|
||||
Subject: [PATCH] net: add support for threaded NAPI polling
|
||||
|
||||
For some drivers (especially 802.11 drivers), doing a lot of work in the NAPI
|
||||
poll function does not perform well. Since NAPI poll is bound to the CPU it
|
||||
was scheduled from, we can easily end up with a few very busy CPUs spending
|
||||
most of their time in softirq/ksoftirqd and some idle ones.
|
||||
|
||||
Introduce threaded NAPI for such drivers based on a workqueue. The API is the
|
||||
same except for using netif_threaded_napi_add instead of netif_napi_add.
|
||||
|
||||
In my tests with mt76 on MT7621 using threaded NAPI + a thread for tx scheduling
|
||||
improves LAN->WLAN bridging throughput by 10-50%. Throughput without threaded
|
||||
NAPI is wildly inconsistent, depending on the CPU that runs the tx scheduling
|
||||
thread.
|
||||
|
||||
With threaded NAPI it seems stable and consistent (and higher than the best
|
||||
results I got without it).
|
||||
|
||||
Based on a patch by Hillf Danton
|
||||
|
||||
Cc: Hillf Danton <hdanton@sina.com>
|
||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
||||
---
|
||||
|
||||
--- a/include/linux/netdevice.h
|
||||
+++ b/include/linux/netdevice.h
|
||||
@@ -340,6 +340,7 @@ struct napi_struct {
|
||||
struct list_head dev_list;
|
||||
struct hlist_node napi_hash_node;
|
||||
unsigned int napi_id;
|
||||
+ struct work_struct work;
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -350,6 +351,7 @@ enum {
|
||||
NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
|
||||
NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
|
||||
NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
|
||||
+ NAPI_STATE_THREADED, /* Use threaded NAPI */
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -360,6 +362,7 @@ enum {
|
||||
NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
|
||||
NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
|
||||
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
|
||||
+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
|
||||
};
|
||||
|
||||
enum gro_result {
|
||||
@@ -2101,6 +2104,7 @@ struct net_device {
|
||||
struct lock_class_key addr_list_lock_key;
|
||||
bool proto_down;
|
||||
unsigned wol_enabled:1;
|
||||
+ unsigned threaded:1;
|
||||
};
|
||||
#define to_net_dev(d) container_of(d, struct net_device, dev)
|
||||
|
||||
@@ -2281,6 +2285,26 @@ void netif_napi_add(struct net_device *d
|
||||
int (*poll)(struct napi_struct *, int), int weight);
|
||||
|
||||
/**
|
||||
+ * netif_threaded_napi_add - initialize a NAPI context
|
||||
+ * @dev: network device
|
||||
+ * @napi: NAPI context
|
||||
+ * @poll: polling function
|
||||
+ * @weight: default weight
|
||||
+ *
|
||||
+ * This variant of netif_napi_add() should be used from drivers using NAPI
|
||||
+ * with CPU intensive poll functions.
|
||||
+ * This will schedule polling from a high priority workqueue
|
||||
+ */
|
||||
+static inline void netif_threaded_napi_add(struct net_device *dev,
|
||||
+ struct napi_struct *napi,
|
||||
+ int (*poll)(struct napi_struct *, int),
|
||||
+ int weight)
|
||||
+{
|
||||
+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
+ netif_napi_add(dev, napi, poll, weight);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
* netif_tx_napi_add - initialize a NAPI context
|
||||
* @dev: network device
|
||||
* @napi: NAPI context
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -156,6 +156,7 @@ static DEFINE_SPINLOCK(offload_lock);
|
||||
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
|
||||
struct list_head ptype_all __read_mostly; /* Taps */
|
||||
static struct list_head offload_base __read_mostly;
|
||||
+static struct workqueue_struct *napi_workq __read_mostly;
|
||||
|
||||
static int netif_rx_internal(struct sk_buff *skb);
|
||||
static int call_netdevice_notifiers_info(unsigned long val,
|
||||
@@ -5931,6 +5932,11 @@ void __napi_schedule(struct napi_struct
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
+ if (test_bit(NAPI_STATE_THREADED, &n->state)) {
|
||||
+ queue_work(napi_workq, &n->work);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
local_irq_save(flags);
|
||||
____napi_schedule(this_cpu_ptr(&softnet_data), n);
|
||||
local_irq_restore(flags);
|
||||
@@ -6246,9 +6256,89 @@ static void init_gro_hash(struct napi_st
|
||||
napi->gro_bitmask = 0;
|
||||
}
|
||||
|
||||
+static int __napi_poll(struct napi_struct *n, bool *repoll)
|
||||
+{
|
||||
+ int work, weight;
|
||||
+
|
||||
+ weight = n->weight;
|
||||
+
|
||||
+ /* This NAPI_STATE_SCHED test is for avoiding a race
|
||||
+ * with netpoll's poll_napi(). Only the entity which
|
||||
+ * obtains the lock and sees NAPI_STATE_SCHED set will
|
||||
+ * actually make the ->poll() call. Therefore we avoid
|
||||
+ * accidentally calling ->poll() when NAPI is not scheduled.
|
||||
+ */
|
||||
+ work = 0;
|
||||
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
|
||||
+ work = n->poll(n, weight);
|
||||
+ trace_napi_poll(n, work, weight);
|
||||
+ }
|
||||
+
|
||||
+ WARN_ON_ONCE(work > weight);
|
||||
+
|
||||
+ if (likely(work < weight))
|
||||
+ return work;
|
||||
+
|
||||
+ /* Drivers must not modify the NAPI state if they
|
||||
+ * consume the entire weight. In such cases this code
|
||||
+ * still "owns" the NAPI instance and therefore can
|
||||
+ * move the instance around on the list at-will.
|
||||
+ */
|
||||
+ if (unlikely(napi_disable_pending(n))) {
|
||||
+ napi_complete(n);
|
||||
+ return work;
|
||||
+ }
|
||||
+
|
||||
+ if (n->gro_bitmask) {
|
||||
+ /* flush too old packets
|
||||
+ * If HZ < 1000, flush all packets.
|
||||
+ */
|
||||
+ napi_gro_flush(n, HZ >= 1000);
|
||||
+ }
|
||||
+
|
||||
+ gro_normal_list(n);
|
||||
+
|
||||
+ *repoll = true;
|
||||
+
|
||||
+ return work;
|
||||
+}
|
||||
+
|
||||
+static void napi_workfn(struct work_struct *work)
|
||||
+{
|
||||
+ struct napi_struct *n = container_of(work, struct napi_struct, work);
|
||||
+ void *have;
|
||||
+
|
||||
+ for (;;) {
|
||||
+ bool repoll = false;
|
||||
+
|
||||
+ local_bh_disable();
|
||||
+
|
||||
+ have = netpoll_poll_lock(n);
|
||||
+ __napi_poll(n, &repoll);
|
||||
+ netpoll_poll_unlock(have);
|
||||
+
|
||||
+ local_bh_enable();
|
||||
+
|
||||
+ if (!repoll)
|
||||
+ return;
|
||||
+
|
||||
+ if (!need_resched())
|
||||
+ continue;
|
||||
+
|
||||
+ /*
|
||||
+ * have to pay for the latency of task switch even if
|
||||
+ * napi is scheduled
|
||||
+ */
|
||||
+ queue_work(napi_workq, work);
|
||||
+ return;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
|
||||
int (*poll)(struct napi_struct *, int), int weight)
|
||||
{
|
||||
+ if (dev->threaded)
|
||||
+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
INIT_LIST_HEAD(&napi->poll_list);
|
||||
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||
napi->timer.function = napi_watchdog;
|
||||
@@ -6265,6 +6355,7 @@ void netif_napi_add(struct net_device *d
|
||||
#ifdef CONFIG_NETPOLL
|
||||
napi->poll_owner = -1;
|
||||
#endif
|
||||
+ INIT_WORK(&napi->work, napi_workfn);
|
||||
set_bit(NAPI_STATE_SCHED, &napi->state);
|
||||
set_bit(NAPI_STATE_NPSVC, &napi->state);
|
||||
list_add_rcu(&napi->dev_list, &dev->napi_list);
|
||||
@@ -6305,6 +6396,7 @@ static void flush_gro_hash(struct napi_s
|
||||
void netif_napi_del(struct napi_struct *napi)
|
||||
{
|
||||
might_sleep();
|
||||
+ cancel_work_sync(&napi->work);
|
||||
if (napi_hash_del(napi))
|
||||
synchronize_net();
|
||||
list_del_init(&napi->dev_list);
|
||||
@@ -6317,50 +6409,18 @@ EXPORT_SYMBOL(netif_napi_del);
|
||||
|
||||
static int napi_poll(struct napi_struct *n, struct list_head *repoll)
|
||||
{
|
||||
+ bool do_repoll = false;
|
||||
void *have;
|
||||
- int work, weight;
|
||||
+ int work;
|
||||
|
||||
list_del_init(&n->poll_list);
|
||||
|
||||
have = netpoll_poll_lock(n);
|
||||
|
||||
- weight = n->weight;
|
||||
-
|
||||
- /* This NAPI_STATE_SCHED test is for avoiding a race
|
||||
- * with netpoll's poll_napi(). Only the entity which
|
||||
- * obtains the lock and sees NAPI_STATE_SCHED set will
|
||||
- * actually make the ->poll() call. Therefore we avoid
|
||||
- * accidentally calling ->poll() when NAPI is not scheduled.
|
||||
- */
|
||||
- work = 0;
|
||||
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
|
||||
- work = n->poll(n, weight);
|
||||
- trace_napi_poll(n, work, weight);
|
||||
- }
|
||||
-
|
||||
- WARN_ON_ONCE(work > weight);
|
||||
+ work = __napi_poll(n, &do_repoll);
|
||||
|
||||
- if (likely(work < weight))
|
||||
- goto out_unlock;
|
||||
-
|
||||
- /* Drivers must not modify the NAPI state if they
|
||||
- * consume the entire weight. In such cases this code
|
||||
- * still "owns" the NAPI instance and therefore can
|
||||
- * move the instance around on the list at-will.
|
||||
- */
|
||||
- if (unlikely(napi_disable_pending(n))) {
|
||||
- napi_complete(n);
|
||||
+ if (!do_repoll)
|
||||
goto out_unlock;
|
||||
- }
|
||||
-
|
||||
- if (n->gro_bitmask) {
|
||||
- /* flush too old packets
|
||||
- * If HZ < 1000, flush all packets.
|
||||
- */
|
||||
- napi_gro_flush(n, HZ >= 1000);
|
||||
- }
|
||||
-
|
||||
- gro_normal_list(n);
|
||||
|
||||
/* Some drivers may have called napi_schedule
|
||||
* prior to exhausting their budget.
|
||||
@@ -10340,6 +10400,10 @@ static int __init net_dev_init(void)
|
||||
sd->backlog.weight = weight_p;
|
||||
}
|
||||
|
||||
+ napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
|
||||
+ WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS);
|
||||
+ BUG_ON(!napi_workq);
|
||||
+
|
||||
dev_boot_phase = 0;
|
||||
|
||||
/* The loopback device is special if any other network devices
|
||||
--- a/net/core/net-sysfs.c
|
||||
+++ b/net/core/net-sysfs.c
|
||||
@@ -442,6 +442,52 @@ static ssize_t proto_down_store(struct d
|
||||
}
|
||||
NETDEVICE_SHOW_RW(proto_down, fmt_dec);
|
||||
|
||||
+static int change_napi_threaded(struct net_device *dev, unsigned long val)
|
||||
+{
|
||||
+ struct napi_struct *napi;
|
||||
+
|
||||
+ if (list_empty(&dev->napi_list))
|
||||
+ return -EOPNOTSUPP;
|
||||
+
|
||||
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
|
||||
+ if (val)
|
||||
+ set_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
+ else
|
||||
+ clear_bit(NAPI_STATE_THREADED, &napi->state);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static ssize_t napi_threaded_store(struct device *dev,
|
||||
+ struct device_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ return netdev_store(dev, attr, buf, len, change_napi_threaded);
|
||||
+}
|
||||
+
|
||||
+static ssize_t napi_threaded_show(struct device *dev,
|
||||
+ struct device_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ struct net_device *netdev = to_net_dev(dev);
|
||||
+ struct napi_struct *napi;
|
||||
+ bool enabled = false;
|
||||
+
|
||||
+ if (!rtnl_trylock())
|
||||
+ return restart_syscall();
|
||||
+
|
||||
+ list_for_each_entry(napi, &netdev->napi_list, dev_list) {
|
||||
+ if (test_bit(NAPI_STATE_THREADED, &napi->state))
|
||||
+ enabled = true;
|
||||
+ }
|
||||
+
|
||||
+ rtnl_unlock();
|
||||
+
|
||||
+ return sprintf(buf, fmt_dec, enabled);
|
||||
+}
|
||||
+static DEVICE_ATTR_RW(napi_threaded);
|
||||
+
|
||||
static ssize_t phys_port_id_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
@@ -532,6 +578,7 @@ static struct attribute *net_class_attrs
|
||||
&dev_attr_flags.attr,
|
||||
&dev_attr_tx_queue_len.attr,
|
||||
&dev_attr_gro_flush_timeout.attr,
|
||||
+ &dev_attr_napi_threaded.attr,
|
||||
&dev_attr_phys_port_id.attr,
|
||||
&dev_attr_phys_port_name.attr,
|
||||
&dev_attr_phys_switch_id.attr,
|
|
@ -1,57 +0,0 @@
|
|||
From 63e4b45c82ed1bde979da7052229a4229ce9cabf Mon Sep 17 00:00:00 2001
|
||||
From: Georgi Valkov <gvalkov@abv.bg>
|
||||
Date: Tue, 1 Feb 2022 08:16:18 +0100
|
||||
Subject: ipheth: fix EOVERFLOW in ipheth_rcvbulk_callback
|
||||
|
||||
When rx_buf is allocated we need to account for IPHETH_IP_ALIGN,
|
||||
which reduces the usable size by 2 bytes. Otherwise we have 1512
|
||||
bytes usable instead of 1514, and if we receive more than 1512
|
||||
bytes, ipheth_rcvbulk_callback is called with status -EOVERFLOW,
|
||||
after which the driver malfunctiones and all communication stops.
|
||||
|
||||
Resolves ipheth 2-1:4.2: ipheth_rcvbulk_callback: urb status: -75
|
||||
|
||||
Fixes: f33d9e2b48a3 ("usbnet: ipheth: fix connectivity with iOS 14")
|
||||
Signed-off-by: Georgi Valkov <gvalkov@abv.bg>
|
||||
Tested-by: Jan Kiszka <jan.kiszka@siemens.com>
|
||||
Link: https://lore.kernel.org/all/B60B8A4B-92A0-49B3-805D-809A2433B46C@abv.bg/
|
||||
Link: https://lore.kernel.org/all/24851bd2769434a5fc24730dce8e8a984c5a4505.1643699778.git.jan.kiszka@siemens.com/
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
---
|
||||
drivers/net/usb/ipheth.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
|
||||
index cd33955df0b65f..6a769df0b4213c 100644
|
||||
--- a/drivers/net/usb/ipheth.c
|
||||
+++ b/drivers/net/usb/ipheth.c
|
||||
@@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone)
|
||||
if (tx_buf == NULL)
|
||||
goto free_rx_urb;
|
||||
|
||||
- rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE,
|
||||
+ rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
|
||||
GFP_KERNEL, &rx_urb->transfer_dma);
|
||||
if (rx_buf == NULL)
|
||||
goto free_tx_buf;
|
||||
@@ -146,7 +146,7 @@ error_nomem:
|
||||
|
||||
static void ipheth_free_urbs(struct ipheth_device *iphone)
|
||||
{
|
||||
- usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf,
|
||||
+ usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf,
|
||||
iphone->rx_urb->transfer_dma);
|
||||
usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf,
|
||||
iphone->tx_urb->transfer_dma);
|
||||
@@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags)
|
||||
|
||||
usb_fill_bulk_urb(dev->rx_urb, udev,
|
||||
usb_rcvbulkpipe(udev, dev->bulk_in),
|
||||
- dev->rx_buf, IPHETH_BUF_SIZE,
|
||||
+ dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN,
|
||||
ipheth_rcvbulk_callback,
|
||||
dev);
|
||||
dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
|
||||
--
|
||||
cgit
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue