mirror of
https://github.com/Ysurac/openmptcprouter.git
synced 2025-02-15 04:42:02 +00:00
2043 lines
61 KiB
Diff
2043 lines
61 KiB
Diff
From 29913eae8451264716a71485652e9230508cfde6 Mon Sep 17 00:00:00 2001
|
|
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
|
|
Date: Mon, 16 Sep 2024 05:52:07 +0000
|
|
Subject: [PATCH 08/28] mptcp: pm: send ACK on non stale subflows
|
|
|
|
If the subflow is considered as "staled", it is better to avoid it to
|
|
send an ACK carrying an ADD_ADDR or RM_ADDR. Another subflow, if any,
|
|
will then be selected.
|
|
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
net/mptcp/pm_netlink.c | 14 +++++++++++---
|
|
1 file changed, 11 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
|
|
index 64fe0e7d87d7..fe34297ea6dc 100644
|
|
--- a/net/mptcp/pm_netlink.c
|
|
+++ b/net/mptcp/pm_netlink.c
|
|
@@ -781,7 +781,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk,
|
|
|
|
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
|
|
{
|
|
- struct mptcp_subflow_context *subflow;
|
|
+ struct mptcp_subflow_context *subflow, *alt = NULL;
|
|
|
|
msk_owned_by_me(msk);
|
|
lockdep_assert_held(&msk->pm.lock);
|
|
@@ -792,10 +792,18 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
|
|
|
|
mptcp_for_each_subflow(msk, subflow) {
|
|
if (__mptcp_subflow_active(subflow)) {
|
|
- mptcp_pm_send_ack(msk, subflow, false, false);
|
|
- break;
|
|
+ if (!subflow->stale) {
|
|
+ mptcp_pm_send_ack(msk, subflow, false, false);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (!alt)
|
|
+ alt = subflow;
|
|
}
|
|
}
|
|
+
|
|
+ if (alt)
|
|
+ mptcp_pm_send_ack(msk, alt, false, false);
|
|
}
|
|
|
|
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
|
|
--
|
|
2.46.0
|
|
|
|
From 2ef0370d529d8d17e63fb196ba097b684535b5c4 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:09 +0000
|
|
Subject: [PATCH 10/28] mptcp: implement mptcp_pm_connection_closed
|
|
|
|
The MPTCP path manager event handler mptcp_pm_connection_closed
|
|
interface has been added in the commit 1b1c7a0ef7f3 ("mptcp: Add path
|
|
manager interface") but it was an empty function from then on.
|
|
|
|
With such name, it sounds good to invoke mptcp_event with the
|
|
MPTCP_EVENT_CLOSED event type from it. It also removes a bit of
|
|
duplicated code.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
net/mptcp/pm.c | 3 +++
|
|
net/mptcp/protocol.c | 6 ++----
|
|
2 files changed, 5 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
|
|
index 620264c75dc2..16c336c51940 100644
|
|
--- a/net/mptcp/pm.c
|
|
+++ b/net/mptcp/pm.c
|
|
@@ -154,6 +154,9 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
|
|
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
|
|
{
|
|
pr_debug("msk=%p\n", msk);
|
|
+
|
|
+ if (msk->token)
|
|
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
|
|
}
|
|
|
|
void mptcp_pm_subflow_established(struct mptcp_sock *msk)
|
|
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
|
|
index 833fb28d8936..7cc8d81ee605 100644
|
|
--- a/net/mptcp/protocol.c
|
|
+++ b/net/mptcp/protocol.c
|
|
@@ -3121,8 +3121,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
|
|
|
|
sock_hold(sk);
|
|
pr_debug("msk=%p state=%d\n", sk, sk->sk_state);
|
|
- if (msk->token)
|
|
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
|
|
+ mptcp_pm_connection_closed(msk);
|
|
|
|
if (sk->sk_state == TCP_CLOSE) {
|
|
__mptcp_destroy_sock(sk);
|
|
@@ -3188,8 +3187,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
|
|
mptcp_stop_rtx_timer(sk);
|
|
mptcp_stop_tout_timer(sk);
|
|
|
|
- if (msk->token)
|
|
- mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
|
|
+ mptcp_pm_connection_closed(msk);
|
|
|
|
/* msk->subflow is still intact, the following will not free the first
|
|
* subflow
|
|
--
|
|
2.46.0
|
|
|
|
From cc4cbde1802daaac692d1bc6f15fd470c51f987b Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:12 +0000
|
|
Subject: [PATCH 13/28] mptcp: add sched_data helpers
|
|
|
|
Add a new helper mptcp_sched_data_set_contexts() to set the subflow
|
|
pointers array in struct mptcp_sched_data. Add a new helper
|
|
mptcp_subflow_ctx_by_pos() to get the given pos subflow from the
|
|
contexts array in struct mptcp_sched_data. They will be invoked by
|
|
the BPF schedulers to export the subflow pointers to the BPF contexts.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
---
|
|
net/mptcp/bpf.c | 14 ++++++++++++++
|
|
net/mptcp/protocol.h | 2 ++
|
|
net/mptcp/sched.c | 22 ++++++++++++++++++++++
|
|
3 files changed, 38 insertions(+)
|
|
|
|
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
|
|
index 8a16672b94e2..c3d62535eb0c 100644
|
|
--- a/net/mptcp/bpf.c
|
|
+++ b/net/mptcp/bpf.c
|
|
@@ -29,6 +29,20 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
|
|
.set = &bpf_mptcp_fmodret_ids,
|
|
};
|
|
|
|
+__diag_push();
|
|
+__diag_ignore_all("-Wmissing-prototypes",
|
|
+ "kfuncs which will be used in BPF programs");
|
|
+
|
|
+__bpf_kfunc struct mptcp_subflow_context *
|
|
+bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos)
|
|
+{
|
|
+ if (pos >= MPTCP_SUBFLOWS_MAX)
|
|
+ return NULL;
|
|
+ return data->contexts[pos];
|
|
+}
|
|
+
|
|
+__diag_pop();
|
|
+
|
|
static int __init bpf_mptcp_kfunc_init(void)
|
|
{
|
|
return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
|
|
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
|
|
index bbbf200b0c94..a1d06e7e3544 100644
|
|
--- a/net/mptcp/protocol.h
|
|
+++ b/net/mptcp/protocol.h
|
|
@@ -719,6 +719,8 @@ void __mptcp_subflow_send_ack(struct sock *ssk);
|
|
void mptcp_subflow_reset(struct sock *ssk);
|
|
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
|
|
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
|
|
+struct mptcp_subflow_context *
|
|
+bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos);
|
|
struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk);
|
|
bool __mptcp_close(struct sock *sk, long timeout);
|
|
void mptcp_cancel_work(struct sock *sk);
|
|
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
|
|
index 78ed508ebc1b..5257bc6c8cd6 100644
|
|
--- a/net/mptcp/sched.c
|
|
+++ b/net/mptcp/sched.c
|
|
@@ -143,6 +143,26 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
|
|
WRITE_ONCE(subflow->scheduled, scheduled);
|
|
}
|
|
|
|
+static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ struct mptcp_subflow_context *subflow;
|
|
+ int i = 0;
|
|
+
|
|
+ mptcp_for_each_subflow(msk, subflow) {
|
|
+ if (i == MPTCP_SUBFLOWS_MAX) {
|
|
+ pr_warn_once("too many subflows");
|
|
+ break;
|
|
+ }
|
|
+ mptcp_subflow_set_scheduled(subflow, false);
|
|
+ data->contexts[i++] = subflow;
|
|
+ }
|
|
+ data->subflows = i;
|
|
+
|
|
+ for (; i < MPTCP_SUBFLOWS_MAX; i++)
|
|
+ data->contexts[i] = NULL;
|
|
+}
|
|
+
|
|
int mptcp_sched_get_send(struct mptcp_sock *msk)
|
|
{
|
|
struct mptcp_subflow_context *subflow;
|
|
@@ -169,6 +189,7 @@ int mptcp_sched_get_send(struct mptcp_sock *msk)
|
|
data.reinject = false;
|
|
if (msk->sched == &mptcp_sched_default || !msk->sched)
|
|
return mptcp_sched_default_get_subflow(msk, &data);
|
|
+ mptcp_sched_data_set_contexts(msk, &data);
|
|
return msk->sched->get_subflow(msk, &data);
|
|
}
|
|
|
|
@@ -191,5 +212,6 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk)
|
|
data.reinject = true;
|
|
if (msk->sched == &mptcp_sched_default || !msk->sched)
|
|
return mptcp_sched_default_get_subflow(msk, &data);
|
|
+ mptcp_sched_data_set_contexts(msk, &data);
|
|
return msk->sched->get_subflow(msk, &data);
|
|
}
|
|
--
|
|
2.46.0
|
|
|
|
From a6f63a6b7b8076b59098b684577327a32bf0f5a8 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:13 +0000
|
|
Subject: [PATCH 14/28] bpf: Add bpf_mptcp_sched_ops
|
|
|
|
This patch implements a new struct bpf_struct_ops: bpf_mptcp_sched_ops.
|
|
Register and unregister the bpf scheduler in .reg and .unreg.
|
|
|
|
Add write access for the scheduled flag of struct mptcp_subflow_context
|
|
in .btf_struct_access.
|
|
|
|
This MPTCP BPF scheduler implementation is similar to BPF TCP CC. And
|
|
net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch.
|
|
|
|
Acked-by: Paolo Abeni <pabeni@redhat.com>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Co-developed-by: Matthieu Baerts <matttbe@kernel.org>
|
|
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
|
|
Co-developed-by: Gregory Detal <gregory.detal@gmail.com>
|
|
Signed-off-by: Gregory Detal <gregory.detal@gmail.com>
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
---
|
|
net/mptcp/bpf.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 180 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
|
|
index c3d62535eb0c..89b69ab1cf8e 100644
|
|
--- a/net/mptcp/bpf.c
|
|
+++ b/net/mptcp/bpf.c
|
|
@@ -10,8 +10,180 @@
|
|
#define pr_fmt(fmt) "MPTCP: " fmt
|
|
|
|
#include <linux/bpf.h>
|
|
+#include <linux/bpf_verifier.h>
|
|
+#include <linux/btf.h>
|
|
+#include <linux/btf_ids.h>
|
|
+#include <net/bpf_sk_storage.h>
|
|
#include "protocol.h"
|
|
|
|
+#ifdef CONFIG_BPF_JIT
|
|
+static struct bpf_struct_ops bpf_mptcp_sched_ops;
|
|
+static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly;
|
|
+static u32 mptcp_sock_id, mptcp_subflow_id;
|
|
+
|
|
+static const struct bpf_func_proto *
|
|
+bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id,
|
|
+ const struct bpf_prog *prog)
|
|
+{
|
|
+ switch (func_id) {
|
|
+ case BPF_FUNC_sk_storage_get:
|
|
+ return &bpf_sk_storage_get_proto;
|
|
+ case BPF_FUNC_sk_storage_delete:
|
|
+ return &bpf_sk_storage_delete_proto;
|
|
+ case BPF_FUNC_skc_to_tcp6_sock:
|
|
+ return &bpf_skc_to_tcp6_sock_proto;
|
|
+ case BPF_FUNC_skc_to_tcp_sock:
|
|
+ return &bpf_skc_to_tcp_sock_proto;
|
|
+ default:
|
|
+ return bpf_base_func_proto(func_id, prog);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log,
|
|
+ const struct bpf_reg_state *reg,
|
|
+ int off, int size)
|
|
+{
|
|
+ const struct btf_type *t;
|
|
+ size_t end;
|
|
+
|
|
+ t = btf_type_by_id(reg->btf, reg->btf_id);
|
|
+
|
|
+ if (t == mptcp_sock_type) {
|
|
+ switch (off) {
|
|
+ case offsetof(struct mptcp_sock, snd_burst):
|
|
+ end = offsetofend(struct mptcp_sock, snd_burst);
|
|
+ break;
|
|
+ default:
|
|
+ bpf_log(log, "no write support to mptcp_sock at off %d\n",
|
|
+ off);
|
|
+ return -EACCES;
|
|
+ }
|
|
+ } else if (t == mptcp_subflow_type) {
|
|
+ switch (off) {
|
|
+ case offsetof(struct mptcp_subflow_context, avg_pacing_rate):
|
|
+ end = offsetofend(struct mptcp_subflow_context, avg_pacing_rate);
|
|
+ break;
|
|
+ default:
|
|
+ bpf_log(log, "no write support to mptcp_subflow_context at off %d\n",
|
|
+ off);
|
|
+ return -EACCES;
|
|
+ }
|
|
+ } else {
|
|
+ bpf_log(log, "only access to mptcp sock or subflow is supported\n");
|
|
+ return -EACCES;
|
|
+ }
|
|
+
|
|
+ if (off + size > end) {
|
|
+ bpf_log(log, "access beyond %s at off %u size %u ended at %zu",
|
|
+ t == mptcp_sock_type ? "mptcp_sock" : "mptcp_subflow_context",
|
|
+ off, size, end);
|
|
+ return -EACCES;
|
|
+ }
|
|
+
|
|
+ return NOT_INIT;
|
|
+}
|
|
+
|
|
+static const struct bpf_verifier_ops bpf_mptcp_sched_verifier_ops = {
|
|
+ .get_func_proto = bpf_mptcp_sched_get_func_proto,
|
|
+ .is_valid_access = bpf_tracing_btf_ctx_access,
|
|
+ .btf_struct_access = bpf_mptcp_sched_btf_struct_access,
|
|
+};
|
|
+
|
|
+static int bpf_mptcp_sched_reg(void *kdata, struct bpf_link *link)
|
|
+{
|
|
+ return mptcp_register_scheduler(kdata);
|
|
+}
|
|
+
|
|
+static void bpf_mptcp_sched_unreg(void *kdata, struct bpf_link *link)
|
|
+{
|
|
+ mptcp_unregister_scheduler(kdata);
|
|
+}
|
|
+
|
|
+static int bpf_mptcp_sched_check_member(const struct btf_type *t,
|
|
+ const struct btf_member *member,
|
|
+ const struct bpf_prog *prog)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bpf_mptcp_sched_init_member(const struct btf_type *t,
|
|
+ const struct btf_member *member,
|
|
+ void *kdata, const void *udata)
|
|
+{
|
|
+ const struct mptcp_sched_ops *usched;
|
|
+ struct mptcp_sched_ops *sched;
|
|
+ u32 moff;
|
|
+
|
|
+ usched = (const struct mptcp_sched_ops *)udata;
|
|
+ sched = (struct mptcp_sched_ops *)kdata;
|
|
+
|
|
+ moff = __btf_member_bit_offset(t, member) / 8;
|
|
+ switch (moff) {
|
|
+ case offsetof(struct mptcp_sched_ops, name):
|
|
+ if (bpf_obj_name_cpy(sched->name, usched->name,
|
|
+ sizeof(sched->name)) <= 0)
|
|
+ return -EINVAL;
|
|
+ if (mptcp_sched_find(usched->name))
|
|
+ return -EEXIST;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bpf_mptcp_sched_init(struct btf *btf)
|
|
+{
|
|
+ s32 type_id;
|
|
+
|
|
+ type_id = btf_find_by_name_kind(btf, "mptcp_sock",
|
|
+ BTF_KIND_STRUCT);
|
|
+ if (type_id < 0)
|
|
+ return -EINVAL;
|
|
+ mptcp_sock_id = type_id;
|
|
+ mptcp_sock_type = btf_type_by_id(btf, mptcp_sock_id);
|
|
+
|
|
+ type_id = btf_find_by_name_kind(btf, "mptcp_subflow_context",
|
|
+ BTF_KIND_STRUCT);
|
|
+ if (type_id < 0)
|
|
+ return -EINVAL;
|
|
+ mptcp_subflow_id = type_id;
|
|
+ mptcp_subflow_type = btf_type_by_id(btf, mptcp_subflow_id);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __bpf_mptcp_sched_get_subflow(struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void __bpf_mptcp_sched_init(struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+static void __bpf_mptcp_sched_release(struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+static struct mptcp_sched_ops __bpf_mptcp_sched_ops = {
|
|
+ .get_subflow = __bpf_mptcp_sched_get_subflow,
|
|
+ .init = __bpf_mptcp_sched_init,
|
|
+ .release = __bpf_mptcp_sched_release,
|
|
+};
|
|
+
|
|
+static struct bpf_struct_ops bpf_mptcp_sched_ops = {
|
|
+ .verifier_ops = &bpf_mptcp_sched_verifier_ops,
|
|
+ .reg = bpf_mptcp_sched_reg,
|
|
+ .unreg = bpf_mptcp_sched_unreg,
|
|
+ .check_member = bpf_mptcp_sched_check_member,
|
|
+ .init_member = bpf_mptcp_sched_init_member,
|
|
+ .init = bpf_mptcp_sched_init,
|
|
+ .name = "mptcp_sched_ops",
|
|
+ .cfi_stubs = &__bpf_mptcp_sched_ops,
|
|
+};
|
|
+#endif /* CONFIG_BPF_JIT */
|
|
+
|
|
struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
|
|
{
|
|
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
|
|
@@ -45,6 +217,13 @@ __diag_pop();
|
|
|
|
static int __init bpf_mptcp_kfunc_init(void)
|
|
{
|
|
- return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
|
|
+ int ret;
|
|
+
|
|
+ ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
|
|
+#ifdef CONFIG_BPF_JIT
|
|
+ ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops);
|
|
+#endif
|
|
+
|
|
+ return ret;
|
|
}
|
|
late_initcall(bpf_mptcp_kfunc_init);
|
|
--
|
|
2.46.0
|
|
|
|
From 6e68551820459adac18dd50d189e8bb56f70b5aa Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:14 +0000
|
|
Subject: [PATCH 15/28] bpf: Add bpf_mptcp_sched_kfunc_set
|
|
|
|
This patch adds a new struct btf_kfunc_id_set for MPTCP scheduler. Add
|
|
mptcp_subflow_set_scheduled() and mptcp_sched_data_set_contexts() helpers
|
|
into this id_set, and register it in bpf_mptcp_kfunc_init() to make sure
|
|
these helpers can be accessed from the BPF context.
|
|
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
---
|
|
net/mptcp/bpf.c | 12 ++++++++++++
|
|
1 file changed, 12 insertions(+)
|
|
|
|
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
|
|
index 89b69ab1cf8e..2c0fb9bddb9d 100644
|
|
--- a/net/mptcp/bpf.c
|
|
+++ b/net/mptcp/bpf.c
|
|
@@ -215,11 +215,23 @@ bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int p
|
|
|
|
__diag_pop();
|
|
|
|
+BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids)
|
|
+BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled)
|
|
+BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos)
|
|
+BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids)
|
|
+
|
|
+static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .set = &bpf_mptcp_sched_kfunc_ids,
|
|
+};
|
|
+
|
|
static int __init bpf_mptcp_kfunc_init(void)
|
|
{
|
|
int ret;
|
|
|
|
ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
|
|
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
|
|
+ &bpf_mptcp_sched_kfunc_set);
|
|
#ifdef CONFIG_BPF_JIT
|
|
ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops);
|
|
#endif
|
|
--
|
|
2.46.0
|
|
|
|
From 53d163b4553529381a7a50e06eabe7b1e70d27d0 Mon Sep 17 00:00:00 2001
|
|
From: Nicolas Rybowski <nicolas.rybowski@tessares.net>
|
|
Date: Mon, 16 Sep 2024 05:52:15 +0000
|
|
Subject: [PATCH 16/28] selftests/bpf: Add mptcp subflow example
|
|
|
|
Move Nicolas' patch into bpf selftests directory. This example adds a
|
|
different mark (SO_MARK) on each subflow, and changes the TCP CC only on
|
|
the first subflow.
|
|
|
|
From the userspace, an application can do a setsockopt() on an MPTCP
|
|
socket, and typically the same value will be propagated to all subflows
|
|
(paths). If someone wants to have different values per subflow, the
|
|
recommended way is to use BPF. So it is good to add such example here,
|
|
and make sure there is no regressions.
|
|
|
|
This example shows how it is possible to:
|
|
|
|
Identify the parent msk of an MPTCP subflow.
|
|
Put different sockopt for each subflow of a same MPTCP connection.
|
|
|
|
Here especially, two different behaviours are implemented:
|
|
|
|
A socket mark (SOL_SOCKET SO_MARK) is put on each subflow of a same
|
|
MPTCP connection. The order of creation of the current subflow defines
|
|
its mark. The TCP CC algorithm of the very first subflow of an MPTCP
|
|
connection is set to "reno".
|
|
|
|
This is just to show it is possible to identify an MPTCP connection, and
|
|
set socket options, from different SOL levels, per subflow. It is easy
|
|
to verify with 'ss' that these modifications have been applied
|
|
correctly. That's what the next patch is going to do.
|
|
|
|
Nicolas' code comes from:
|
|
|
|
commit 4d120186e4d6 ("bpf:examples: update mptcp_set_mark_kern.c")
|
|
|
|
from the MPTCP repo https://github.com/multipath-tcp/mptcp_net-next (the
|
|
"scripts" branch), and it has been adapted by Geliang.
|
|
|
|
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/76
|
|
Co-developed-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Signed-off-by: Nicolas Rybowski <nicolas.rybowski@tessares.net>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
---
|
|
.../selftests/bpf/progs/mptcp_subflow.c | 59 +++++++++++++++++++
|
|
1 file changed, 59 insertions(+)
|
|
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_subflow.c
|
|
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
|
|
new file mode 100644
|
|
index 000000000000..2e28f4a215b5
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
|
|
@@ -0,0 +1,59 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+/* Copyright (c) 2020, Tessares SA. */
|
|
+/* Copyright (c) 2024, Kylin Software */
|
|
+
|
|
+/* vmlinux.h, bpf_helpers.h and other 'define' */
|
|
+#include "bpf_tracing_net.h"
|
|
+
|
|
+char _license[] SEC("license") = "GPL";
|
|
+
|
|
+char cc[TCP_CA_NAME_MAX] = "reno";
|
|
+
|
|
+/* Associate a subflow counter to each token */
|
|
+struct {
|
|
+ __uint(type, BPF_MAP_TYPE_HASH);
|
|
+ __uint(key_size, sizeof(__u32));
|
|
+ __uint(value_size, sizeof(__u32));
|
|
+ __uint(max_entries, 100);
|
|
+} mptcp_sf SEC(".maps");
|
|
+
|
|
+SEC("sockops")
|
|
+int mptcp_subflow(struct bpf_sock_ops *skops)
|
|
+{
|
|
+ __u32 init = 1, key, mark, *cnt;
|
|
+ struct mptcp_sock *msk;
|
|
+ struct bpf_sock *sk;
|
|
+ int err;
|
|
+
|
|
+ if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
|
|
+ return 1;
|
|
+
|
|
+ sk = skops->sk;
|
|
+ if (!sk)
|
|
+ return 1;
|
|
+
|
|
+ msk = bpf_skc_to_mptcp_sock(sk);
|
|
+ if (!msk)
|
|
+ return 1;
|
|
+
|
|
+ key = msk->token;
|
|
+ cnt = bpf_map_lookup_elem(&mptcp_sf, &key);
|
|
+ if (cnt) {
|
|
+ /* A new subflow is added to an existing MPTCP connection */
|
|
+ __sync_fetch_and_add(cnt, 1);
|
|
+ mark = *cnt;
|
|
+ } else {
|
|
+ /* A new MPTCP connection is just initiated and this is its primary subflow */
|
|
+ bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY);
|
|
+ mark = init;
|
|
+ }
|
|
+
|
|
+ /* Set the mark of the subflow's socket based on appearance order */
|
|
+ err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
|
|
+ if (err < 0)
|
|
+ return 1;
|
|
+ if (mark == 2)
|
|
+ err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
--
|
|
2.46.0
|
|
|
|
From 6cda8081edf4e3ac7f8ed4353c666db7a09446a8 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:16 +0000
|
|
Subject: [PATCH 17/28] selftests/bpf: Add getsockopt to inspect mptcp subflow
|
|
|
|
This patch adds a "cgroup/getsockopt" way to inspect the subflows of an
|
|
mptcp socket. That will be used by the next commit to verify the socket
|
|
options set on each subflow.
|
|
|
|
This extra "cgroup/getsockopt" prog walks the msk->conn_list and use
|
|
bpf_core_cast to cast a pointer for readonly. It allows to inspect all
|
|
the fields of a structure.
|
|
|
|
mptcp_subflow_tcp_sock(), mptcp_for_each_stubflow() and other helpers
|
|
related to list_entry have been added into a new progs/mptcp_bpf.h file.
|
|
|
|
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
MAINTAINERS | 2 +-
|
|
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 42 +++++++++++
|
|
.../selftests/bpf/progs/mptcp_subflow.c | 69 +++++++++++++++++++
|
|
3 files changed, 112 insertions(+), 1 deletion(-)
|
|
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
|
|
diff --git a/MAINTAINERS b/MAINTAINERS
|
|
index 77fcd6f802a5..93d705098220 100644
|
|
--- a/MAINTAINERS
|
|
+++ b/MAINTAINERS
|
|
@@ -16097,7 +16097,7 @@ F: include/net/mptcp.h
|
|
F: include/trace/events/mptcp.h
|
|
F: include/uapi/linux/mptcp*.h
|
|
F: net/mptcp/
|
|
-F: tools/testing/selftests/bpf/*/*mptcp*.c
|
|
+F: tools/testing/selftests/bpf/*/*mptcp*.[ch]
|
|
F: tools/testing/selftests/net/mptcp/
|
|
|
|
NETWORKING [TCP]
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
new file mode 100644
|
|
index 000000000000..179b74c1205f
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
@@ -0,0 +1,42 @@
|
|
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
|
+#ifndef __MPTCP_BPF_H__
|
|
+#define __MPTCP_BPF_H__
|
|
+
|
|
+#include "bpf_experimental.h"
|
|
+
|
|
+/* list helpers from include/linux/list.h */
|
|
+static inline int list_is_head(const struct list_head *list,
|
|
+ const struct list_head *head)
|
|
+{
|
|
+ return list == head;
|
|
+}
|
|
+
|
|
+#define list_entry(ptr, type, member) \
|
|
+ container_of(ptr, type, member)
|
|
+
|
|
+#define list_first_entry(ptr, type, member) \
|
|
+ list_entry((ptr)->next, type, member)
|
|
+
|
|
+#define list_next_entry(pos, member) \
|
|
+ list_entry((pos)->member.next, typeof(*(pos)), member)
|
|
+
|
|
+#define list_entry_is_head(pos, head, member) \
|
|
+ list_is_head(&pos->member, (head))
|
|
+
|
|
+/* small difference: 'cond_break' has been added in the conditions */
|
|
+#define list_for_each_entry(pos, head, member) \
|
|
+ for (pos = list_first_entry(head, typeof(*pos), member); \
|
|
+ cond_break, !list_entry_is_head(pos, head, member); \
|
|
+ pos = list_next_entry(pos, member))
|
|
+
|
|
+/* mptcp helpers from protocol.h */
|
|
+#define mptcp_for_each_subflow(__msk, __subflow) \
|
|
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
|
|
+
|
|
+static __always_inline struct sock *
|
|
+mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
|
|
+{
|
|
+ return subflow->tcp_sock;
|
|
+}
|
|
+
|
|
+#endif
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
|
|
index 2e28f4a215b5..70302477e326 100644
|
|
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
|
|
@@ -4,10 +4,12 @@
|
|
|
|
/* vmlinux.h, bpf_helpers.h and other 'define' */
|
|
#include "bpf_tracing_net.h"
|
|
+#include "mptcp_bpf.h"
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
char cc[TCP_CA_NAME_MAX] = "reno";
|
|
+int pid;
|
|
|
|
/* Associate a subflow counter to each token */
|
|
struct {
|
|
@@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
|
|
|
|
return 1;
|
|
}
|
|
+
|
|
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
|
|
+{
|
|
+ struct mptcp_subflow_context *subflow;
|
|
+ int i = 0;
|
|
+
|
|
+ mptcp_for_each_subflow(msk, subflow) {
|
|
+ struct sock *ssk;
|
|
+
|
|
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
|
|
+ struct mptcp_subflow_context));
|
|
+
|
|
+ if (ssk->sk_mark != ++i) {
|
|
+ ctx->retval = -2;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
|
|
+{
|
|
+ struct mptcp_subflow_context *subflow;
|
|
+
|
|
+ mptcp_for_each_subflow(msk, subflow) {
|
|
+ struct inet_connection_sock *icsk;
|
|
+ struct sock *ssk;
|
|
+
|
|
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
|
|
+ struct mptcp_subflow_context));
|
|
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
|
|
+
|
|
+ if (ssk->sk_mark == 2 &&
|
|
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) {
|
|
+ ctx->retval = -2;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+SEC("cgroup/getsockopt")
|
|
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
|
|
+{
|
|
+ struct bpf_sock *sk = ctx->sk;
|
|
+ struct mptcp_sock *msk;
|
|
+
|
|
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
|
|
+ return 1;
|
|
+
|
|
+ if (!sk || sk->protocol != IPPROTO_MPTCP ||
|
|
+ (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) &&
|
|
+ !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)))
|
|
+ return 1;
|
|
+
|
|
+ msk = bpf_core_cast(sk, struct mptcp_sock);
|
|
+ if (msk->pm.subflows != 1) {
|
|
+ ctx->retval = -1;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (ctx->optname == SO_MARK)
|
|
+ return _check_getsockopt_subflow_mark(msk, ctx);
|
|
+ return _check_getsockopt_subflow_cc(msk, ctx);
|
|
+}
|
|
--
|
|
2.46.0
|
|
|
|
From 88c9717cb6d32d931aabf69eee0d7fea30118466 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:17 +0000
|
|
Subject: [PATCH 18/28] selftests/bpf: Add mptcp subflow subtest
|
|
|
|
This patch adds a subtest named test_subflow to load and verify the newly
|
|
added mptcp subflow example in test_mptcp. Add a helper endpoint_init()
|
|
to add a new subflow endpoint. Add another helper ss_search() to verify the
|
|
fwmark and congestion values set by mptcp_subflow prog using setsockopts.
|
|
|
|
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/76
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
---
|
|
.../testing/selftests/bpf/prog_tests/mptcp.c | 127 ++++++++++++++++++
|
|
1 file changed, 127 insertions(+)
|
|
|
|
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
index d2ca32fa3b21..c76a0d8c8f93 100644
|
|
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
@@ -5,12 +5,17 @@
|
|
#include <linux/const.h>
|
|
#include <netinet/in.h>
|
|
#include <test_progs.h>
|
|
+#include <unistd.h>
|
|
#include "cgroup_helpers.h"
|
|
#include "network_helpers.h"
|
|
#include "mptcp_sock.skel.h"
|
|
#include "mptcpify.skel.h"
|
|
+#include "mptcp_subflow.skel.h"
|
|
|
|
#define NS_TEST "mptcp_ns"
|
|
+#define ADDR_1 "10.0.1.1"
|
|
+#define ADDR_2 "10.0.1.2"
|
|
+#define PORT_1 10001
|
|
|
|
#ifndef IPPROTO_MPTCP
|
|
#define IPPROTO_MPTCP 262
|
|
@@ -335,10 +340,132 @@ static void test_mptcpify(void)
|
|
close(cgroup_fd);
|
|
}
|
|
|
|
+static int endpoint_init(char *flags)
|
|
+{
|
|
+ SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST);
|
|
+ SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1);
|
|
+ SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST);
|
|
+ SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2);
|
|
+ SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST);
|
|
+ if (SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags)) {
|
|
+ printf("'ip mptcp' not supported, skip this test.\n");
|
|
+ test__skip();
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+fail:
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+static void wait_for_new_subflows(int fd)
|
|
+{
|
|
+ socklen_t len;
|
|
+ u8 subflows;
|
|
+ int err, i;
|
|
+
|
|
+ len = sizeof(subflows);
|
|
+ /* Wait max 1 sec for new subflows to be created */
|
|
+ for (i = 0; i < 10; i++) {
|
|
+ err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len);
|
|
+ if (!err && subflows > 0)
|
|
+ break;
|
|
+
|
|
+ usleep(100000); /* 0.1s */
|
|
+ }
|
|
+}
|
|
+
|
|
+static void run_subflow(void)
|
|
+{
|
|
+ int server_fd, client_fd, err;
|
|
+ char new[TCP_CA_NAME_MAX];
|
|
+ char cc[TCP_CA_NAME_MAX];
|
|
+ unsigned int mark;
|
|
+ socklen_t len;
|
|
+
|
|
+ server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
|
|
+ if (!ASSERT_OK_FD(server_fd, "start_mptcp_server"))
|
|
+ return;
|
|
+
|
|
+ client_fd = connect_to_fd(server_fd, 0);
|
|
+ if (!ASSERT_OK_FD(client_fd, "connect_to_fd"))
|
|
+ goto close_server;
|
|
+
|
|
+ send_byte(client_fd);
|
|
+ wait_for_new_subflows(client_fd);
|
|
+
|
|
+ len = sizeof(mark);
|
|
+ err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
|
|
+ if (ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"))
|
|
+ ASSERT_EQ(mark, 0, "mark");
|
|
+
|
|
+ len = sizeof(new);
|
|
+ err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, new, &len);
|
|
+ if (ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)")) {
|
|
+ get_msk_ca_name(cc);
|
|
+ ASSERT_STREQ(new, cc, "cc");
|
|
+ }
|
|
+
|
|
+ close(client_fd);
|
|
+close_server:
|
|
+ close(server_fd);
|
|
+}
|
|
+
|
|
+static void test_subflow(void)
|
|
+{
|
|
+ int cgroup_fd, prog_fd, err;
|
|
+ struct mptcp_subflow *skel;
|
|
+ struct nstoken *nstoken;
|
|
+ struct bpf_link *link;
|
|
+
|
|
+ cgroup_fd = test__join_cgroup("/mptcp_subflow");
|
|
+ if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_subflow"))
|
|
+ return;
|
|
+
|
|
+ skel = mptcp_subflow__open_and_load();
|
|
+ if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_subflow"))
|
|
+ goto close_cgroup;
|
|
+
|
|
+ skel->bss->pid = getpid();
|
|
+
|
|
+ err = mptcp_subflow__attach(skel);
|
|
+ if (!ASSERT_OK(err, "skel_attach: mptcp_subflow"))
|
|
+ goto skel_destroy;
|
|
+
|
|
+ prog_fd = bpf_program__fd(skel->progs.mptcp_subflow);
|
|
+ err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
|
|
+ if (!ASSERT_OK(err, "prog_attach"))
|
|
+ goto skel_destroy;
|
|
+
|
|
+ nstoken = create_netns();
|
|
+ if (!ASSERT_OK_PTR(nstoken, "create_netns: mptcp_subflow"))
|
|
+ goto skel_destroy;
|
|
+
|
|
+ if (endpoint_init("subflow") < 0)
|
|
+ goto close_netns;
|
|
+
|
|
+ link = bpf_program__attach_cgroup(skel->progs._getsockopt_subflow,
|
|
+ cgroup_fd);
|
|
+ if (!ASSERT_OK_PTR(link, "getsockopt prog"))
|
|
+ goto close_netns;
|
|
+
|
|
+ run_subflow();
|
|
+
|
|
+ bpf_link__destroy(link);
|
|
+close_netns:
|
|
+ cleanup_netns(nstoken);
|
|
+skel_destroy:
|
|
+ mptcp_subflow__destroy(skel);
|
|
+close_cgroup:
|
|
+ close(cgroup_fd);
|
|
+}
|
|
+
|
|
void test_mptcp(void)
|
|
{
|
|
if (test__start_subtest("base"))
|
|
test_base();
|
|
if (test__start_subtest("mptcpify"))
|
|
test_mptcpify();
|
|
+ if (test__start_subtest("subflow"))
|
|
+ test_subflow();
|
|
}
|
|
--
|
|
2.46.0
|
|
|
|
From e80fa7af7531ac183afe0d2ccd248faab335892b Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:18 +0000
|
|
Subject: [PATCH 19/28] selftests/bpf: Add bpf scheduler test
|
|
|
|
This patch extends the MPTCP test base to support MPTCP packet scheduler
|
|
tests. Add a new test to use the default in-kernel scheduler.
|
|
|
|
In the new helper sched_init(), add two veth net devices to simulate the
|
|
multiple addresses case. Use 'ip mptcp endpoint' command to add the new
|
|
endpoint ADDR_2 to PM netlink. Use sysctl to set net.mptcp.scheduler to
|
|
use the given sched.
|
|
|
|
Invoke start_mptcp_server() to start the server on ADDR_1, and invoke
|
|
connect_to_fd() to connect with the server from the client. Then invoke
|
|
send_data() to send data.
|
|
|
|
Some code in send_data() is from prog_tests/bpf_tcp_ca.c.
|
|
|
|
Add time metrics for BPF tests to compare the performance of each
|
|
schedulers. Run prog_tests with '-v' option can print out the running
|
|
time of each test.
|
|
|
|
Use the new helper has_bytes_sent() to check the bytes_sent filed of 'ss'
|
|
output after send_data() to make sure no data has been sent on ADDR_2.
|
|
All data has been sent on the first subflow.
|
|
|
|
Invoke the new helper sched_cleanup() to set back net.mptcp.scheduler to
|
|
default, flush all mptcp endpoints, and delete the veth net devices.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
---
|
|
.../testing/selftests/bpf/prog_tests/mptcp.c | 92 +++++++++++++++++++
|
|
1 file changed, 92 insertions(+)
|
|
|
|
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
index c76a0d8c8f93..aff6986f84ac 100644
|
|
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
@@ -16,6 +16,8 @@
|
|
#define ADDR_1 "10.0.1.1"
|
|
#define ADDR_2 "10.0.1.2"
|
|
#define PORT_1 10001
|
|
+#define WITH_DATA true
|
|
+#define WITHOUT_DATA false
|
|
|
|
#ifndef IPPROTO_MPTCP
|
|
#define IPPROTO_MPTCP 262
|
|
@@ -38,6 +40,9 @@
|
|
#define TCP_CA_NAME_MAX 16
|
|
#endif
|
|
|
|
+static const unsigned int total_bytes = 10 * 1024 * 1024;
|
|
+static int duration;
|
|
+
|
|
struct __mptcp_info {
|
|
__u8 mptcpi_subflows;
|
|
__u8 mptcpi_add_addr_signal;
|
|
@@ -460,6 +465,91 @@ static void test_subflow(void)
|
|
close(cgroup_fd);
|
|
}
|
|
|
|
+static struct nstoken *sched_init(char *flags, char *sched)
|
|
+{
|
|
+ struct nstoken *nstoken;
|
|
+
|
|
+ nstoken = create_netns();
|
|
+ if (!ASSERT_OK_PTR(nstoken, "create_netns"))
|
|
+ return NULL;
|
|
+
|
|
+ if (endpoint_init("subflow") < 0)
|
|
+ goto fail;
|
|
+
|
|
+ SYS(fail, "ip netns exec %s sysctl -qw net.mptcp.scheduler=%s", NS_TEST, sched);
|
|
+
|
|
+ return nstoken;
|
|
+fail:
|
|
+ cleanup_netns(nstoken);
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static int ss_search(char *src, char *dst, char *port, char *keyword)
|
|
+{
|
|
+ return SYS_NOFAIL("ip netns exec %s ss -enita src %s dst %s %s %d | grep -q '%s'",
|
|
+ NS_TEST, src, dst, port, PORT_1, keyword);
|
|
+}
|
|
+
|
|
+static int has_bytes_sent(char *dst)
|
|
+{
|
|
+ return ss_search(ADDR_1, dst, "sport", "bytes_sent:");
|
|
+}
|
|
+
|
|
+static void send_data_and_verify(char *sched, bool addr1, bool addr2)
|
|
+{
|
|
+ struct timespec start, end;
|
|
+ int server_fd, client_fd;
|
|
+ unsigned int delta_ms;
|
|
+
|
|
+ server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
|
|
+ if (!ASSERT_OK_FD(server_fd, "start_mptcp_server"))
|
|
+ return;
|
|
+
|
|
+ client_fd = connect_to_fd(server_fd, 0);
|
|
+ if (!ASSERT_OK_FD(client_fd, "connect_to_fd"))
|
|
+ goto fail;
|
|
+
|
|
+ if (clock_gettime(CLOCK_MONOTONIC, &start) < 0)
|
|
+ goto fail;
|
|
+
|
|
+ if (!ASSERT_OK(send_recv_data(server_fd, client_fd, total_bytes),
|
|
+ "send_recv_data"))
|
|
+ goto fail;
|
|
+
|
|
+ if (clock_gettime(CLOCK_MONOTONIC, &end) < 0)
|
|
+ goto fail;
|
|
+
|
|
+ delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
|
|
+ printf("%s: %u ms\n", sched, delta_ms);
|
|
+
|
|
+ if (addr1)
|
|
+ CHECK(has_bytes_sent(ADDR_1), sched, "should have bytes_sent on addr1\n");
|
|
+ else
|
|
+ CHECK(!has_bytes_sent(ADDR_1), sched, "shouldn't have bytes_sent on addr1\n");
|
|
+ if (addr2)
|
|
+ CHECK(has_bytes_sent(ADDR_2), sched, "should have bytes_sent on addr2\n");
|
|
+ else
|
|
+ CHECK(!has_bytes_sent(ADDR_2), sched, "shouldn't have bytes_sent on addr2\n");
|
|
+
|
|
+ close(client_fd);
|
|
+fail:
|
|
+ close(server_fd);
|
|
+}
|
|
+
|
|
+static void test_default(void)
|
|
+{
|
|
+ struct nstoken *nstoken;
|
|
+
|
|
+ nstoken = sched_init("subflow", "default");
|
|
+ if (!nstoken)
|
|
+ goto fail;
|
|
+
|
|
+ send_data_and_verify("default", WITH_DATA, WITH_DATA);
|
|
+
|
|
+fail:
|
|
+ cleanup_netns(nstoken);
|
|
+}
|
|
+
|
|
void test_mptcp(void)
|
|
{
|
|
if (test__start_subtest("base"))
|
|
@@ -468,4 +558,6 @@ void test_mptcp(void)
|
|
test_mptcpify();
|
|
if (test__start_subtest("subflow"))
|
|
test_subflow();
|
|
+ if (test__start_subtest("default"))
|
|
+ test_default();
|
|
}
|
|
--
|
|
2.46.0
|
|
|
|
From 98a4df409f3862b6bb7b5f246752b05e3ccc55af Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:19 +0000
|
|
Subject: [PATCH 20/28] selftests/bpf: Add bpf_first scheduler & test
|
|
|
|
This patch implements the simplest MPTCP scheduler, named bpf_first,
|
|
which always picks the first subflow to send data. It's a sample of
|
|
MPTCP BPF scheduler implementations.
|
|
|
|
This patch defines MPTCP_SCHED_TEST macro, a template for all scheduler
|
|
tests. Every scheduler is identified by argument name, and use sysctl
|
|
to set net.mptcp.scheduler as "bpf_name" to use this sched. Add two
|
|
veth net devices to simulate the multiple addresses case. Use 'ip mptcp
|
|
endpoint' command to add the new endpoint ADDR2 to PM netlink. Arguments
|
|
addr1/add2 means whether the data has been sent on the first/second
|
|
subflow or not. Send data and check bytes_sent of 'ss' output after it
|
|
using send_data_and_verify().
|
|
|
|
Using MPTCP_SCHED_TEST macro to add a new test for this bpf_first
|
|
scheduler, the arguments "1 0" means data has been only sent on the
|
|
first subflow ADDR1. Run this test by RUN_MPTCP_TEST macro.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Acked-by: Paolo Abeni <pabeni@redhat.com>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
.../testing/selftests/bpf/prog_tests/mptcp.c | 44 +++++++++++++++++++
|
|
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 7 +++
|
|
.../selftests/bpf/progs/mptcp_bpf_first.c | 33 ++++++++++++++
|
|
3 files changed, 84 insertions(+)
|
|
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
|
|
|
|
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
index aff6986f84ac..ee3fab606855 100644
|
|
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
@@ -11,6 +11,7 @@
|
|
#include "mptcp_sock.skel.h"
|
|
#include "mptcpify.skel.h"
|
|
#include "mptcp_subflow.skel.h"
|
|
+#include "mptcp_bpf_first.skel.h"
|
|
|
|
#define NS_TEST "mptcp_ns"
|
|
#define ADDR_1 "10.0.1.1"
|
|
@@ -39,6 +40,7 @@
|
|
#ifndef TCP_CA_NAME_MAX
|
|
#define TCP_CA_NAME_MAX 16
|
|
#endif
|
|
+#define MPTCP_SCHED_NAME_MAX 16
|
|
|
|
static const unsigned int total_bytes = 10 * 1024 * 1024;
|
|
static int duration;
|
|
@@ -550,6 +552,46 @@ static void test_default(void)
|
|
cleanup_netns(nstoken);
|
|
}
|
|
|
|
+static void test_bpf_sched(struct bpf_object *obj, char *sched,
|
|
+ bool addr1, bool addr2)
|
|
+{
|
|
+ char bpf_sched[MPTCP_SCHED_NAME_MAX] = "bpf_";
|
|
+ struct nstoken *nstoken;
|
|
+ struct bpf_link *link;
|
|
+ struct bpf_map *map;
|
|
+
|
|
+ if (!ASSERT_LT(strlen(bpf_sched) + strlen(sched),
|
|
+ MPTCP_SCHED_NAME_MAX, "Scheduler name too long"))
|
|
+ return;
|
|
+
|
|
+ map = bpf_object__find_map_by_name(obj, sched);
|
|
+ link = bpf_map__attach_struct_ops(map);
|
|
+ if (CHECK(!link, sched, "attach_struct_ops: %d\n", errno))
|
|
+ return;
|
|
+
|
|
+ nstoken = sched_init("subflow", strcat(bpf_sched, sched));
|
|
+ if (!nstoken)
|
|
+ goto fail;
|
|
+
|
|
+ send_data_and_verify(sched, addr1, addr2);
|
|
+
|
|
+fail:
|
|
+ cleanup_netns(nstoken);
|
|
+ bpf_link__destroy(link);
|
|
+}
|
|
+
|
|
+static void test_first(void)
|
|
+{
|
|
+ struct mptcp_bpf_first *skel;
|
|
+
|
|
+ skel = mptcp_bpf_first__open_and_load();
|
|
+ if (!ASSERT_OK_PTR(skel, "open_and_load: first"))
|
|
+ return;
|
|
+
|
|
+ test_bpf_sched(skel->obj, "first", WITH_DATA, WITHOUT_DATA);
|
|
+ mptcp_bpf_first__destroy(skel);
|
|
+}
|
|
+
|
|
void test_mptcp(void)
|
|
{
|
|
if (test__start_subtest("base"))
|
|
@@ -560,4 +602,6 @@ void test_mptcp(void)
|
|
test_subflow();
|
|
if (test__start_subtest("default"))
|
|
test_default();
|
|
+ if (test__start_subtest("first"))
|
|
+ test_first();
|
|
}
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
index 179b74c1205f..95449963c1d3 100644
|
|
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
@@ -39,4 +39,11 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
|
|
return subflow->tcp_sock;
|
|
}
|
|
|
|
+/* ksym */
|
|
+extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
|
|
+ bool scheduled) __ksym;
|
|
+
|
|
+extern struct mptcp_subflow_context *
|
|
+bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym;
|
|
+
|
|
#endif
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
|
|
new file mode 100644
|
|
index 000000000000..d57399b407a7
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
|
|
@@ -0,0 +1,33 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+/* Copyright (c) 2022, SUSE. */
|
|
+
|
|
+#include "mptcp_bpf.h"
|
|
+#include <bpf/bpf_tracing.h>
|
|
+
|
|
+char _license[] SEC("license") = "GPL";
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_first_init, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_first_release, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+SEC(".struct_ops")
|
|
+struct mptcp_sched_ops first = {
|
|
+ .init = (void *)mptcp_sched_first_init,
|
|
+ .release = (void *)mptcp_sched_first_release,
|
|
+ .get_subflow = (void *)bpf_first_get_subflow,
|
|
+ .name = "bpf_first",
|
|
+};
|
|
--
|
|
2.46.0
|
|
|
|
From 156161b367e8fea9b012e0d2da4b816670bd3a3f Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:20 +0000
|
|
Subject: [PATCH 21/28] selftests/bpf: Add bpf_bkup scheduler & test
|
|
|
|
This patch implements the backup flag test scheduler, named bpf_bkup,
|
|
which picks the first non-backup subflow to send data.
|
|
|
|
Using MPTCP_SCHED_TEST macro to add a new test for this bpf_bkup
|
|
scheduler, the arguments "1 0" means data has been only sent on the
|
|
first subflow ADDR1. Run this test by RUN_MPTCP_TEST macro.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
.../testing/selftests/bpf/prog_tests/mptcp.c | 15 ++++++
|
|
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 3 ++
|
|
.../selftests/bpf/progs/mptcp_bpf_bkup.c | 52 +++++++++++++++++++
|
|
3 files changed, 70 insertions(+)
|
|
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c
|
|
|
|
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
index ee3fab606855..4a760efc2ede 100644
|
|
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
@@ -12,6 +12,7 @@
|
|
#include "mptcpify.skel.h"
|
|
#include "mptcp_subflow.skel.h"
|
|
#include "mptcp_bpf_first.skel.h"
|
|
+#include "mptcp_bpf_bkup.skel.h"
|
|
|
|
#define NS_TEST "mptcp_ns"
|
|
#define ADDR_1 "10.0.1.1"
|
|
@@ -592,6 +593,18 @@ static void test_first(void)
|
|
mptcp_bpf_first__destroy(skel);
|
|
}
|
|
|
|
+static void test_bkup(void)
|
|
+{
|
|
+ struct mptcp_bpf_bkup *skel;
|
|
+
|
|
+ skel = mptcp_bpf_bkup__open_and_load();
|
|
+ if (!ASSERT_OK_PTR(skel, "open_and_load: bkup"))
|
|
+ return;
|
|
+
|
|
+ test_bpf_sched(skel->obj, "bkup", WITH_DATA, WITHOUT_DATA);
|
|
+ mptcp_bpf_bkup__destroy(skel);
|
|
+}
|
|
+
|
|
void test_mptcp(void)
|
|
{
|
|
if (test__start_subtest("base"))
|
|
@@ -604,4 +617,6 @@ void test_mptcp(void)
|
|
test_default();
|
|
if (test__start_subtest("first"))
|
|
test_first();
|
|
+ if (test__start_subtest("bkup"))
|
|
+ test_bkup();
|
|
}
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
index 95449963c1d3..928a1e5ad8db 100644
|
|
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
|
|
@@ -4,6 +4,9 @@
|
|
|
|
#include "bpf_experimental.h"
|
|
|
|
+/* mptcp helpers from include/net/mptcp.h */
|
|
+#define MPTCP_SUBFLOWS_MAX 8
|
|
+
|
|
/* list helpers from include/linux/list.h */
|
|
static inline int list_is_head(const struct list_head *list,
|
|
const struct list_head *head)
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c
|
|
new file mode 100644
|
|
index 000000000000..296f0318d843
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_bkup.c
|
|
@@ -0,0 +1,52 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+/* Copyright (c) 2022, SUSE. */
|
|
+
|
|
+#include "mptcp_bpf.h"
|
|
+#include <bpf/bpf_tracing.h>
|
|
+
|
|
+char _license[] SEC("license") = "GPL";
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_bkup_init, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_bkup_release, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+int BPF_PROG(bpf_bkup_get_subflow, struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ int nr = -1;
|
|
+
|
|
+ for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
|
|
+ struct mptcp_subflow_context *subflow;
|
|
+
|
|
+ subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
|
|
+ if (!subflow)
|
|
+ break;
|
|
+
|
|
+ if (!BPF_CORE_READ_BITFIELD_PROBED(subflow, backup) ||
|
|
+ !BPF_CORE_READ_BITFIELD_PROBED(subflow, request_bkup)) {
|
|
+ nr = i;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (nr != -1) {
|
|
+ mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, nr), true);
|
|
+ return -1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+SEC(".struct_ops")
|
|
+struct mptcp_sched_ops bkup = {
|
|
+ .init = (void *)mptcp_sched_bkup_init,
|
|
+ .release = (void *)mptcp_sched_bkup_release,
|
|
+ .get_subflow = (void *)bpf_bkup_get_subflow,
|
|
+ .name = "bpf_bkup",
|
|
+};
|
|
--
|
|
2.46.0
|
|
|
|
From 23be357908c466769030c111270c4438cac3e0f3 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:21 +0000
|
|
Subject: [PATCH 22/28] selftests/bpf: Add bpf_rr scheduler & test
|
|
|
|
This patch implements the round-robin BPF MPTCP scheduler, named bpf_rr,
|
|
which always picks the next available subflow to send data. If no such
|
|
next subflow available, picks the first one.
|
|
|
|
Using MPTCP_SCHED_TEST macro to add a new test for this bpf_rr
|
|
scheduler, the arguments "1 1" means data has been sent on both net
|
|
devices. Run this test by RUN_MPTCP_TEST macro.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
.../testing/selftests/bpf/prog_tests/mptcp.c | 15 ++++
|
|
.../selftests/bpf/progs/mptcp_bpf_rr.c | 78 +++++++++++++++++++
|
|
2 files changed, 93 insertions(+)
|
|
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
|
|
|
|
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
index 4a760efc2ede..d4e07c24806c 100644
|
|
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
@@ -13,6 +13,7 @@
|
|
#include "mptcp_subflow.skel.h"
|
|
#include "mptcp_bpf_first.skel.h"
|
|
#include "mptcp_bpf_bkup.skel.h"
|
|
+#include "mptcp_bpf_rr.skel.h"
|
|
|
|
#define NS_TEST "mptcp_ns"
|
|
#define ADDR_1 "10.0.1.1"
|
|
@@ -605,6 +606,18 @@ static void test_bkup(void)
|
|
mptcp_bpf_bkup__destroy(skel);
|
|
}
|
|
|
|
+static void test_rr(void)
|
|
+{
|
|
+ struct mptcp_bpf_rr *skel;
|
|
+
|
|
+ skel = mptcp_bpf_rr__open_and_load();
|
|
+ if (!ASSERT_OK_PTR(skel, "open_and_load: rr"))
|
|
+ return;
|
|
+
|
|
+ test_bpf_sched(skel->obj, "rr", WITH_DATA, WITH_DATA);
|
|
+ mptcp_bpf_rr__destroy(skel);
|
|
+}
|
|
+
|
|
void test_mptcp(void)
|
|
{
|
|
if (test__start_subtest("base"))
|
|
@@ -619,4 +632,6 @@ void test_mptcp(void)
|
|
test_first();
|
|
if (test__start_subtest("bkup"))
|
|
test_bkup();
|
|
+ if (test__start_subtest("rr"))
|
|
+ test_rr();
|
|
}
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
|
|
new file mode 100644
|
|
index 000000000000..638ea6aa63b7
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
|
|
@@ -0,0 +1,78 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+/* Copyright (c) 2022, SUSE. */
|
|
+
|
|
+#include "mptcp_bpf.h"
|
|
+#include <bpf/bpf_tracing.h>
|
|
+
|
|
+char _license[] SEC("license") = "GPL";
|
|
+
|
|
+struct mptcp_rr_storage {
|
|
+ struct sock *last_snd;
|
|
+};
|
|
+
|
|
+struct {
|
|
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
|
|
+ __uint(map_flags, BPF_F_NO_PREALLOC);
|
|
+ __type(key, int);
|
|
+ __type(value, struct mptcp_rr_storage);
|
|
+} mptcp_rr_map SEC(".maps");
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk)
|
|
+{
|
|
+ bpf_sk_storage_get(&mptcp_rr_map, msk, 0,
|
|
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_rr_release, struct mptcp_sock *msk)
|
|
+{
|
|
+ bpf_sk_storage_delete(&mptcp_rr_map, msk);
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+int BPF_PROG(bpf_rr_get_subflow, struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ struct mptcp_subflow_context *subflow;
|
|
+ struct mptcp_rr_storage *ptr;
|
|
+ struct sock *last_snd = NULL;
|
|
+ int nr = 0;
|
|
+
|
|
+ ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0,
|
|
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
|
|
+ if (!ptr)
|
|
+ return -1;
|
|
+
|
|
+ last_snd = ptr->last_snd;
|
|
+
|
|
+ for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
|
|
+ subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
|
|
+ if (!last_snd || !subflow)
|
|
+ break;
|
|
+
|
|
+ if (mptcp_subflow_tcp_sock(subflow) == last_snd) {
|
|
+ if (i + 1 == MPTCP_SUBFLOWS_MAX ||
|
|
+ !bpf_mptcp_subflow_ctx_by_pos(data, i + 1))
|
|
+ break;
|
|
+
|
|
+ nr = i + 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ subflow = bpf_mptcp_subflow_ctx_by_pos(data, nr);
|
|
+ if (!subflow)
|
|
+ return -1;
|
|
+ mptcp_subflow_set_scheduled(subflow, true);
|
|
+ ptr->last_snd = mptcp_subflow_tcp_sock(subflow);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+SEC(".struct_ops")
|
|
+struct mptcp_sched_ops rr = {
|
|
+ .init = (void *)mptcp_sched_rr_init,
|
|
+ .release = (void *)mptcp_sched_rr_release,
|
|
+ .get_subflow = (void *)bpf_rr_get_subflow,
|
|
+ .name = "bpf_rr",
|
|
+};
|
|
--
|
|
2.46.0
|
|
|
|
From 24f9dc216230966e8e7301d7ac82af04d8583566 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:22 +0000
|
|
Subject: [PATCH 23/28] selftests/bpf: Add bpf_red scheduler & test
|
|
|
|
This patch implements the redundant BPF MPTCP scheduler, named bpf_red,
|
|
which sends all packets redundantly on all available subflows.
|
|
|
|
Using MPTCP_SCHED_TEST macro to add a new test for this bpf_red
|
|
scheduler, the arguments "1 1" means data has been sent on both
|
|
net devices. Run this test by RUN_MPTCP_TEST macro.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
.../testing/selftests/bpf/prog_tests/mptcp.c | 15 +++++++
|
|
.../selftests/bpf/progs/mptcp_bpf_red.c | 39 +++++++++++++++++++
|
|
2 files changed, 54 insertions(+)
|
|
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_red.c
|
|
|
|
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
index d4e07c24806c..ede2d1ff9f6b 100644
|
|
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
@@ -14,6 +14,7 @@
|
|
#include "mptcp_bpf_first.skel.h"
|
|
#include "mptcp_bpf_bkup.skel.h"
|
|
#include "mptcp_bpf_rr.skel.h"
|
|
+#include "mptcp_bpf_red.skel.h"
|
|
|
|
#define NS_TEST "mptcp_ns"
|
|
#define ADDR_1 "10.0.1.1"
|
|
@@ -618,6 +619,18 @@ static void test_rr(void)
|
|
mptcp_bpf_rr__destroy(skel);
|
|
}
|
|
|
|
+static void test_red(void)
|
|
+{
|
|
+ struct mptcp_bpf_red *skel;
|
|
+
|
|
+ skel = mptcp_bpf_red__open_and_load();
|
|
+ if (!ASSERT_OK_PTR(skel, "open_and_load: red"))
|
|
+ return;
|
|
+
|
|
+ test_bpf_sched(skel->obj, "red", WITH_DATA, WITH_DATA);
|
|
+ mptcp_bpf_red__destroy(skel);
|
|
+}
|
|
+
|
|
void test_mptcp(void)
|
|
{
|
|
if (test__start_subtest("base"))
|
|
@@ -634,4 +647,6 @@ void test_mptcp(void)
|
|
test_bkup();
|
|
if (test__start_subtest("rr"))
|
|
test_rr();
|
|
+ if (test__start_subtest("red"))
|
|
+ test_red();
|
|
}
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c
|
|
new file mode 100644
|
|
index 000000000000..cc0aab732fc4
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c
|
|
@@ -0,0 +1,39 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+/* Copyright (c) 2022, SUSE. */
|
|
+
|
|
+#include "mptcp_bpf.h"
|
|
+#include <bpf/bpf_tracing.h>
|
|
+
|
|
+char _license[] SEC("license") = "GPL";
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_red_init, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_red_release, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+int BPF_PROG(bpf_red_get_subflow, struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
|
|
+ if (!bpf_mptcp_subflow_ctx_by_pos(data, i))
|
|
+ break;
|
|
+
|
|
+ mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, i), true);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+SEC(".struct_ops")
|
|
+struct mptcp_sched_ops red = {
|
|
+ .init = (void *)mptcp_sched_red_init,
|
|
+ .release = (void *)mptcp_sched_red_release,
|
|
+ .get_subflow = (void *)bpf_red_get_subflow,
|
|
+ .name = "bpf_red",
|
|
+};
|
|
--
|
|
2.46.0
|
|
|
|
From de732279a1cfc454c4d355a7dc31bfc2766383e0 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:23 +0000
|
|
Subject: [PATCH 24/28] bpf: Export more bpf_burst related functions
|
|
|
|
sk_stream_memory_free() and tcp_rtx_and_write_queues_empty() are needed
|
|
to export into the BPF context for bpf_burst scheduler. But these two
|
|
functions are inline ones. So this patch added two wrappers for them,
|
|
and export the wrappers in the BPF context.
|
|
|
|
Add more bpf_burst related functions into bpf_mptcp_sched_kfunc_set to make
|
|
sure these helpers can be accessed from the BPF context.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
---
|
|
net/mptcp/bpf.c | 11 +++++++++++
|
|
net/mptcp/protocol.c | 4 ++--
|
|
net/mptcp/protocol.h | 3 +++
|
|
3 files changed, 16 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
|
|
index 2c0fb9bddb9d..6414824402e6 100644
|
|
--- a/net/mptcp/bpf.c
|
|
+++ b/net/mptcp/bpf.c
|
|
@@ -213,11 +213,22 @@ bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int p
|
|
return data->contexts[pos];
|
|
}
|
|
|
|
+__bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk)
|
|
+{
|
|
+ return tcp_rtx_queue_empty(sk);
|
|
+}
|
|
+
|
|
__diag_pop();
|
|
|
|
BTF_KFUNCS_START(bpf_mptcp_sched_kfunc_ids)
|
|
BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled)
|
|
BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos)
|
|
+BTF_ID_FLAGS(func, mptcp_subflow_active)
|
|
+BTF_ID_FLAGS(func, mptcp_set_timeout)
|
|
+BTF_ID_FLAGS(func, mptcp_wnd_end)
|
|
+BTF_ID_FLAGS(func, tcp_stream_memory_free)
|
|
+BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty)
|
|
+BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale)
|
|
BTF_KFUNCS_END(bpf_mptcp_sched_kfunc_ids)
|
|
|
|
static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = {
|
|
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
|
|
index 7cc8d81ee605..3b837765c84b 100644
|
|
--- a/net/mptcp/protocol.c
|
|
+++ b/net/mptcp/protocol.c
|
|
@@ -50,7 +50,7 @@ DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
|
|
static struct net_device mptcp_napi_dev;
|
|
|
|
/* Returns end sequence number of the receiver's advertised window */
|
|
-static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
|
|
+u64 mptcp_wnd_end(const struct mptcp_sock *msk)
|
|
{
|
|
return READ_ONCE(msk->wnd_end);
|
|
}
|
|
@@ -489,7 +489,7 @@ static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subfl
|
|
inet_csk(ssk)->icsk_timeout - jiffies : 0;
|
|
}
|
|
|
|
-static void mptcp_set_timeout(struct sock *sk)
|
|
+void mptcp_set_timeout(struct sock *sk)
|
|
{
|
|
struct mptcp_subflow_context *subflow;
|
|
long tout = 0;
|
|
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
|
|
index a1d06e7e3544..c3942416fa3a 100644
|
|
--- a/net/mptcp/protocol.h
|
|
+++ b/net/mptcp/protocol.h
|
|
@@ -719,6 +719,9 @@ void __mptcp_subflow_send_ack(struct sock *ssk);
|
|
void mptcp_subflow_reset(struct sock *ssk);
|
|
void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
|
|
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
|
|
+u64 mptcp_wnd_end(const struct mptcp_sock *msk);
|
|
+void mptcp_set_timeout(struct sock *sk);
|
|
+bool bpf_mptcp_subflow_queues_empty(struct sock *sk);
|
|
struct mptcp_subflow_context *
|
|
bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos);
|
|
struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk);
|
|
--
|
|
2.46.0
|
|
|
|
From 9f1d0166bff9923c5889a0db70e189f147efee50 Mon Sep 17 00:00:00 2001
|
|
From: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Date: Mon, 16 Sep 2024 05:52:24 +0000
|
|
Subject: [PATCH 25/28] selftests/bpf: Add bpf_burst scheduler & test
|
|
|
|
This patch implements the burst BPF MPTCP scheduler, named bpf_burst,
|
|
which is the default scheduler in protocol.c. bpf_burst_get_send() uses
|
|
the same logic as mptcp_subflow_get_send() and bpf_burst_get_retrans
|
|
uses the same logic as mptcp_subflow_get_retrans().
|
|
|
|
Using MPTCP_SCHED_TEST macro to add a new test for this bpf_burst
|
|
scheduler, the arguments "1 1" means data has been sent on both net
|
|
devices. Run this test by RUN_MPTCP_TEST macro.
|
|
|
|
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
|
|
Reviewed-by: Mat Martineau <martineau@kernel.org>
|
|
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
|
|
---
|
|
.../testing/selftests/bpf/prog_tests/mptcp.c | 15 ++
|
|
.../selftests/bpf/progs/mptcp_bpf_burst.c | 207 ++++++++++++++++++
|
|
2 files changed, 222 insertions(+)
|
|
create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c
|
|
|
|
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
index ede2d1ff9f6b..a3e68bc6afa3 100644
|
|
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
|
|
@@ -15,6 +15,7 @@
|
|
#include "mptcp_bpf_bkup.skel.h"
|
|
#include "mptcp_bpf_rr.skel.h"
|
|
#include "mptcp_bpf_red.skel.h"
|
|
+#include "mptcp_bpf_burst.skel.h"
|
|
|
|
#define NS_TEST "mptcp_ns"
|
|
#define ADDR_1 "10.0.1.1"
|
|
@@ -631,6 +632,18 @@ static void test_red(void)
|
|
mptcp_bpf_red__destroy(skel);
|
|
}
|
|
|
|
+static void test_burst(void)
|
|
+{
|
|
+ struct mptcp_bpf_burst *skel;
|
|
+
|
|
+ skel = mptcp_bpf_burst__open_and_load();
|
|
+ if (!ASSERT_OK_PTR(skel, "open_and_load: burst"))
|
|
+ return;
|
|
+
|
|
+ test_bpf_sched(skel->obj, "burst", WITH_DATA, WITH_DATA);
|
|
+ mptcp_bpf_burst__destroy(skel);
|
|
+}
|
|
+
|
|
void test_mptcp(void)
|
|
{
|
|
if (test__start_subtest("base"))
|
|
@@ -649,4 +662,6 @@ void test_mptcp(void)
|
|
test_rr();
|
|
if (test__start_subtest("red"))
|
|
test_red();
|
|
+ if (test__start_subtest("burst"))
|
|
+ test_burst();
|
|
}
|
|
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c
|
|
new file mode 100644
|
|
index 000000000000..eb21119aa8f7
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_burst.c
|
|
@@ -0,0 +1,207 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+/* Copyright (c) 2023, SUSE. */
|
|
+
|
|
+#include "mptcp_bpf.h"
|
|
+#include <bpf/bpf_tracing.h>
|
|
+#include <limits.h>
|
|
+
|
|
+char _license[] SEC("license") = "GPL";
|
|
+
|
|
+#define MPTCP_SEND_BURST_SIZE 65428
|
|
+
|
|
+#define min(a, b) ((a) < (b) ? (a) : (b))
|
|
+
|
|
+struct bpf_subflow_send_info {
|
|
+ __u8 subflow_id;
|
|
+ __u64 linger_time;
|
|
+};
|
|
+
|
|
+extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym;
|
|
+extern void mptcp_set_timeout(struct sock *sk) __ksym;
|
|
+extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym;
|
|
+extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym;
|
|
+extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym;
|
|
+extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym;
|
|
+
|
|
+#define SSK_MODE_ACTIVE 0
|
|
+#define SSK_MODE_BACKUP 1
|
|
+#define SSK_MODE_MAX 2
|
|
+
|
|
+static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor)
|
|
+{
|
|
+ return dividend / divisor;
|
|
+}
|
|
+
|
|
+static __always_inline bool tcp_write_queue_empty(struct sock *sk)
|
|
+{
|
|
+ const struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);
|
|
+
|
|
+ return tp ? tp->write_seq == tp->snd_nxt : true;
|
|
+}
|
|
+
|
|
+static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk)
|
|
+{
|
|
+ return bpf_mptcp_subflow_queues_empty(sk) && tcp_write_queue_empty(sk);
|
|
+}
|
|
+
|
|
+static __always_inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
|
|
+{
|
|
+ if (sk->sk_wmem_queued >= sk->sk_sndbuf)
|
|
+ return false;
|
|
+
|
|
+ return tcp_stream_memory_free(sk, wake);
|
|
+}
|
|
+
|
|
+static __always_inline bool sk_stream_memory_free(const struct sock *sk)
|
|
+{
|
|
+ return __sk_stream_memory_free(sk, 0);
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk)
|
|
+{
|
|
+}
|
|
+
|
|
+static int bpf_burst_get_send(struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ struct bpf_subflow_send_info send_info[SSK_MODE_MAX];
|
|
+ struct mptcp_subflow_context *subflow;
|
|
+ struct sock *sk = (struct sock *)msk;
|
|
+ __u32 pace, burst, wmem;
|
|
+ int i, nr_active = 0;
|
|
+ __u64 linger_time;
|
|
+ struct sock *ssk;
|
|
+
|
|
+ /* pick the subflow with the lower wmem/wspace ratio */
|
|
+ for (i = 0; i < SSK_MODE_MAX; ++i) {
|
|
+ send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX;
|
|
+ send_info[i].linger_time = -1;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
|
|
+ bool backup;
|
|
+
|
|
+ subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
|
|
+ if (!subflow)
|
|
+ break;
|
|
+
|
|
+ backup = subflow->backup || subflow->request_bkup;
|
|
+
|
|
+ ssk = mptcp_subflow_tcp_sock(subflow);
|
|
+ if (!mptcp_subflow_active(subflow))
|
|
+ continue;
|
|
+
|
|
+ nr_active += !backup;
|
|
+ pace = subflow->avg_pacing_rate;
|
|
+ if (!pace) {
|
|
+ /* init pacing rate from socket */
|
|
+ subflow->avg_pacing_rate = ssk->sk_pacing_rate;
|
|
+ pace = subflow->avg_pacing_rate;
|
|
+ if (!pace)
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace);
|
|
+ if (linger_time < send_info[backup].linger_time) {
|
|
+ send_info[backup].subflow_id = i;
|
|
+ send_info[backup].linger_time = linger_time;
|
|
+ }
|
|
+ }
|
|
+ mptcp_set_timeout(sk);
|
|
+
|
|
+ /* pick the best backup if no other subflow is active */
|
|
+ if (!nr_active)
|
|
+ send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id;
|
|
+
|
|
+ subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id);
|
|
+ if (!subflow)
|
|
+ return -1;
|
|
+ ssk = mptcp_subflow_tcp_sock(subflow);
|
|
+ if (!ssk || !sk_stream_memory_free(ssk))
|
|
+ return -1;
|
|
+
|
|
+ burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
|
|
+ wmem = ssk->sk_wmem_queued;
|
|
+ if (!burst)
|
|
+ goto out;
|
|
+
|
|
+ subflow->avg_pacing_rate = div_u64((__u64)subflow->avg_pacing_rate * wmem +
|
|
+ ssk->sk_pacing_rate * burst,
|
|
+ burst + wmem);
|
|
+ msk->snd_burst = burst;
|
|
+
|
|
+out:
|
|
+ mptcp_subflow_set_scheduled(subflow, true);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bpf_burst_get_retrans(struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id;
|
|
+ struct mptcp_subflow_context *subflow;
|
|
+ int min_stale_count = INT_MAX;
|
|
+ struct sock *ssk;
|
|
+
|
|
+ for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
|
|
+ subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
|
|
+ if (!subflow)
|
|
+ break;
|
|
+
|
|
+ if (!mptcp_subflow_active(subflow))
|
|
+ continue;
|
|
+
|
|
+ ssk = mptcp_subflow_tcp_sock(subflow);
|
|
+ /* still data outstanding at TCP level? skip this */
|
|
+ if (!tcp_rtx_and_write_queues_empty(ssk)) {
|
|
+ mptcp_pm_subflow_chk_stale(msk, ssk);
|
|
+ min_stale_count = min(min_stale_count, subflow->stale_count);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (subflow->backup || subflow->request_bkup) {
|
|
+ if (backup == MPTCP_SUBFLOWS_MAX)
|
|
+ backup = i;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (pick == MPTCP_SUBFLOWS_MAX)
|
|
+ pick = i;
|
|
+ }
|
|
+
|
|
+ if (pick < MPTCP_SUBFLOWS_MAX) {
|
|
+ subflow_id = pick;
|
|
+ goto out;
|
|
+ }
|
|
+ subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX;
|
|
+
|
|
+out:
|
|
+ subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id);
|
|
+ if (!subflow)
|
|
+ return -1;
|
|
+ mptcp_subflow_set_scheduled(subflow, true);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+SEC("struct_ops")
|
|
+int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk,
|
|
+ struct mptcp_sched_data *data)
|
|
+{
|
|
+ if (data->reinject)
|
|
+ return bpf_burst_get_retrans(msk, data);
|
|
+ return bpf_burst_get_send(msk, data);
|
|
+}
|
|
+
|
|
+SEC(".struct_ops")
|
|
+struct mptcp_sched_ops burst = {
|
|
+ .init = (void *)mptcp_sched_burst_init,
|
|
+ .release = (void *)mptcp_sched_burst_release,
|
|
+ .get_subflow = (void *)bpf_burst_get_subflow,
|
|
+ .name = "bpf_burst",
|
|
+};
|
|
--
|
|
2.46.0
|
|
|