diff --git a/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch b/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch index 2ebe3630..819773ee 100644 --- a/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch +++ b/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch @@ -1,6 +1,6 @@ diff -aurN linux-5.4/Documentation/networking/ip-sysctl.txt mptcp-mptcp_trunk/Documentation/networking/ip-sysctl.txt --- linux-5.4/Documentation/networking/ip-sysctl.txt 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/Documentation/networking/ip-sysctl.txt 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/Documentation/networking/ip-sysctl.txt 2020-06-08 19:41:07.000000000 +0200 @@ -818,6 +818,18 @@ Default: 0 (disabled) @@ -22,7 +22,7 @@ diff -aurN linux-5.4/Documentation/networking/ip-sysctl.txt mptcp-mptcp_trunk/Do udp_l3mdev_accept - BOOLEAN diff -aurN linux-5.4/drivers/infiniband/hw/cxgb4/cm.c mptcp-mptcp_trunk/drivers/infiniband/hw/cxgb4/cm.c --- linux-5.4/drivers/infiniband/hw/cxgb4/cm.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/drivers/infiniband/hw/cxgb4/cm.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/drivers/infiniband/hw/cxgb4/cm.c 2020-06-08 19:41:07.000000000 +0200 @@ -3946,7 +3946,7 @@ */ memset(&tmp_opt, 0, sizeof(tmp_opt)); @@ -34,7 +34,7 @@ diff -aurN linux-5.4/drivers/infiniband/hw/cxgb4/cm.c mptcp-mptcp_trunk/drivers/ memset(req, 0, sizeof(*req)); diff -aurN linux-5.4/include/linux/skbuff.h mptcp-mptcp_trunk/include/linux/skbuff.h --- linux-5.4/include/linux/skbuff.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/linux/skbuff.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/linux/skbuff.h 2020-06-08 19:41:07.000000000 +0200 @@ -717,7 +717,7 @@ * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. @@ -46,7 +46,7 @@ diff -aurN linux-5.4/include/linux/skbuff.h mptcp-mptcp_trunk/include/linux/skbu struct { diff -aurN linux-5.4/include/linux/tcp.h mptcp-mptcp_trunk/include/linux/tcp.h --- linux-5.4/include/linux/tcp.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/linux/tcp.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/linux/tcp.h 2020-06-08 19:41:07.000000000 +0200 @@ -54,7 +54,7 @@ /* TCP Fast Open */ #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ @@ -205,7 +205,7 @@ diff -aurN linux-5.4/include/linux/tcp.h mptcp-mptcp_trunk/include/linux/tcp.h static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) diff -aurN linux-5.4/include/net/inet_common.h mptcp-mptcp_trunk/include/net/inet_common.h --- linux-5.4/include/net/inet_common.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/inet_common.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/inet_common.h 2020-06-08 19:41:07.000000000 +0200 @@ -2,6 +2,7 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H @@ -225,7 +225,7 @@ diff -aurN linux-5.4/include/net/inet_common.h mptcp-mptcp_trunk/include/net/ine int addr_len, int flags); diff -aurN linux-5.4/include/net/inet_connection_sock.h mptcp-mptcp_trunk/include/net/inet_connection_sock.h --- linux-5.4/include/net/inet_connection_sock.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/inet_connection_sock.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/inet_connection_sock.h 2020-06-08 19:41:07.000000000 +0200 @@ -25,6 +25,7 @@ struct inet_bind_bucket; @@ -236,7 +236,7 @@ diff -aurN linux-5.4/include/net/inet_connection_sock.h mptcp-mptcp_trunk/includ * Pointers to address related TCP functions diff -aurN linux-5.4/include/net/inet_sock.h mptcp-mptcp_trunk/include/net/inet_sock.h --- linux-5.4/include/net/inet_sock.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/inet_sock.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/inet_sock.h 2020-06-08 19:41:07.000000000 +0200 @@ -79,7 +79,7 @@ #define ireq_state req.__req_common.skc_state #define ireq_family req.__req_common.skc_family @@ -257,8 +257,8 @@ diff -aurN linux-5.4/include/net/inet_sock.h mptcp-mptcp_trunk/include/net/inet_ union { diff -aurN linux-5.4/include/net/mptcp.h mptcp-mptcp_trunk/include/net/mptcp.h --- linux-5.4/include/net/mptcp.h 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/mptcp.h 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,1519 @@ ++++ mptcp-mptcp_trunk/include/net/mptcp.h 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,1542 @@ +/* + * MPTCP implementation + * @@ -305,6 +305,7 @@ diff -aurN linux-5.4/include/net/mptcp.h mptcp-mptcp_trunk/include/net/mptcp.h +#include +#include +#include ++#include +#include + +#if defined(__LITTLE_ENDIAN_BITFIELD) @@ -463,7 +464,7 @@ diff -aurN linux-5.4/include/net/mptcp.h mptcp-mptcp_trunk/include/net/mptcp.h + struct timer_list mptcp_ack_timer; + + /* HMAC of the third ack */ -+ char sender_mac[20]; ++ char sender_mac[SHA256_DIGEST_SIZE]; +}; + +struct mptcp_tw { @@ -831,11 +832,29 @@ diff -aurN linux-5.4/include/net/mptcp.h mptcp-mptcp_trunk/include/net/mptcp.h + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 ipver:4, -+ sub:4; ++ union { ++ struct { ++ __u8 ipver:4, ++ sub:4; ++ } v0; ++ struct { ++ __u8 echo:1, ++ rsv:3, ++ sub:4; ++ } v1; ++ } u_bit; +#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u8 sub:4, -+ ipver:4; ++ union { ++ struct { ++ __u8 sub:4, ++ ipver:4; ++ } v0; ++ struct { ++ __u8 sub:4, ++ rsv:3, ++ echo:1; ++ } v1; ++ } u_bit; +#else +#error "Adjust your defines" +#endif @@ -1091,7 +1110,7 @@ diff -aurN linux-5.4/include/net/mptcp.h mptcp-mptcp_trunk/include/net/mptcp.h + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd); +unsigned int mptcp_current_mss(struct sock *meta_sk); -+void mptcp_hmac(u8 ver, const u8 *key_1, const u8 *key_2, u32 *hash_out, ++void mptcp_hmac(u8 ver, const u8 *key_1, const u8 *key_2, u8 *hash_out, + int arg_num, ...); +void mptcp_clean_rtx_infinite(const struct sk_buff *skb, struct sock *sk); +void mptcp_fin(struct sock *meta_sk); @@ -1179,6 +1198,10 @@ diff -aurN linux-5.4/include/net/mptcp.h mptcp-mptcp_trunk/include/net/mptcp.h +bool subflow_is_backup(const struct tcp_sock *tp); +struct sock *get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, + bool zero_wnd_test); ++struct sk_buff *mptcp_next_segment(struct sock *meta_sk, ++ int *reinject, ++ struct sock **subsk, ++ unsigned int *limit); +extern struct mptcp_sched_ops mptcp_sched_default; + +/* Initializes function-pointers and MPTCP-flags */ @@ -1780,7 +1803,7 @@ diff -aurN linux-5.4/include/net/mptcp.h mptcp-mptcp_trunk/include/net/mptcp.h +#endif /* _MPTCP_H */ diff -aurN linux-5.4/include/net/mptcp_v4.h mptcp-mptcp_trunk/include/net/mptcp_v4.h --- linux-5.4/include/net/mptcp_v4.h 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/mptcp_v4.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/mptcp_v4.h 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,76 @@ +/* + * MPTCP implementation @@ -1860,7 +1883,7 @@ diff -aurN linux-5.4/include/net/mptcp_v4.h mptcp-mptcp_trunk/include/net/mptcp_ +#endif /* MPTCP_V4_H_ */ diff -aurN linux-5.4/include/net/mptcp_v6.h mptcp-mptcp_trunk/include/net/mptcp_v6.h --- linux-5.4/include/net/mptcp_v6.h 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/mptcp_v6.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/mptcp_v6.h 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,77 @@ +/* + * MPTCP implementation @@ -1941,7 +1964,7 @@ diff -aurN linux-5.4/include/net/mptcp_v6.h mptcp-mptcp_trunk/include/net/mptcp_ +#endif /* _MPTCP_V6_H */ diff -aurN linux-5.4/include/net/net_namespace.h mptcp-mptcp_trunk/include/net/net_namespace.h --- linux-5.4/include/net/net_namespace.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/net_namespace.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/net_namespace.h 2020-06-08 19:41:07.000000000 +0200 @@ -19,6 +19,7 @@ #include #include @@ -1962,7 +1985,7 @@ diff -aurN linux-5.4/include/net/net_namespace.h mptcp-mptcp_trunk/include/net/n #endif diff -aurN linux-5.4/include/net/netns/mptcp.h mptcp-mptcp_trunk/include/net/netns/mptcp.h --- linux-5.4/include/net/netns/mptcp.h 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/netns/mptcp.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/netns/mptcp.h 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,52 @@ +/* + * MPTCP implementation - MPTCP namespace @@ -2018,7 +2041,7 @@ diff -aurN linux-5.4/include/net/netns/mptcp.h mptcp-mptcp_trunk/include/net/net +#endif /* __NETNS_MPTCP_H__ */ diff -aurN linux-5.4/include/net/snmp.h mptcp-mptcp_trunk/include/net/snmp.h --- linux-5.4/include/net/snmp.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/snmp.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/snmp.h 2020-06-08 19:41:07.000000000 +0200 @@ -86,7 +86,6 @@ atomic_long_t mibs[ICMP6MSG_MIB_MAX]; }; @@ -2029,7 +2052,7 @@ diff -aurN linux-5.4/include/net/snmp.h mptcp-mptcp_trunk/include/net/snmp.h struct tcp_mib { diff -aurN linux-5.4/include/net/sock.h mptcp-mptcp_trunk/include/net/sock.h --- linux-5.4/include/net/sock.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/sock.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/sock.h 2020-06-08 19:41:07.000000000 +0200 @@ -814,6 +814,7 @@ SOCK_TXTIME, SOCK_XDP, /* XDP is attached */ @@ -2048,7 +2071,7 @@ diff -aurN linux-5.4/include/net/sock.h mptcp-mptcp_trunk/include/net/sock.h #ifdef CONFIG_PROC_FS diff -aurN linux-5.4/include/net/tcp.h mptcp-mptcp_trunk/include/net/tcp.h --- linux-5.4/include/net/tcp.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/tcp.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/tcp.h 2020-06-08 19:41:07.000000000 +0200 @@ -182,6 +182,7 @@ #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ @@ -2395,7 +2418,7 @@ diff -aurN linux-5.4/include/net/tcp.h mptcp-mptcp_trunk/include/net/tcp.h { diff -aurN linux-5.4/include/net/tcp_states.h mptcp-mptcp_trunk/include/net/tcp_states.h --- linux-5.4/include/net/tcp_states.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/tcp_states.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/tcp_states.h 2020-06-08 19:41:07.000000000 +0200 @@ -22,6 +22,7 @@ TCP_LISTEN, TCP_CLOSING, /* Now a valid state */ @@ -2414,7 +2437,7 @@ diff -aurN linux-5.4/include/net/tcp_states.h mptcp-mptcp_trunk/include/net/tcp_ #endif /* _LINUX_TCP_STATES_H */ diff -aurN linux-5.4/include/net/transp_v6.h mptcp-mptcp_trunk/include/net/transp_v6.h --- linux-5.4/include/net/transp_v6.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/net/transp_v6.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/net/transp_v6.h 2020-06-08 19:41:07.000000000 +0200 @@ -58,6 +58,8 @@ /* address family specific functions */ @@ -2426,7 +2449,7 @@ diff -aurN linux-5.4/include/net/transp_v6.h mptcp-mptcp_trunk/include/net/trans diff -aurN linux-5.4/include/trace/events/tcp.h mptcp-mptcp_trunk/include/trace/events/tcp.h --- linux-5.4/include/trace/events/tcp.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/trace/events/tcp.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/trace/events/tcp.h 2020-06-08 19:41:07.000000000 +0200 @@ -10,6 +10,7 @@ #include #include @@ -2477,7 +2500,7 @@ diff -aurN linux-5.4/include/trace/events/tcp.h mptcp-mptcp_trunk/include/trace/ #endif /* _TRACE_TCP_H */ diff -aurN linux-5.4/include/uapi/linux/bpf.h mptcp-mptcp_trunk/include/uapi/linux/bpf.h --- linux-5.4/include/uapi/linux/bpf.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/uapi/linux/bpf.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/uapi/linux/bpf.h 2020-06-08 19:41:07.000000000 +0200 @@ -3438,6 +3438,7 @@ BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ @@ -2488,7 +2511,7 @@ diff -aurN linux-5.4/include/uapi/linux/bpf.h mptcp-mptcp_trunk/include/uapi/lin }; diff -aurN linux-5.4/include/uapi/linux/if.h mptcp-mptcp_trunk/include/uapi/linux/if.h --- linux-5.4/include/uapi/linux/if.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/uapi/linux/if.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/uapi/linux/if.h 2020-06-08 19:41:07.000000000 +0200 @@ -132,6 +132,9 @@ #define IFF_ECHO IFF_ECHO #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ @@ -2501,7 +2524,7 @@ diff -aurN linux-5.4/include/uapi/linux/if.h mptcp-mptcp_trunk/include/uapi/linu diff -aurN linux-5.4/include/uapi/linux/mptcp.h mptcp-mptcp_trunk/include/uapi/linux/mptcp.h --- linux-5.4/include/uapi/linux/mptcp.h 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/include/uapi/linux/mptcp.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/uapi/linux/mptcp.h 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* @@ -2654,7 +2677,7 @@ diff -aurN linux-5.4/include/uapi/linux/mptcp.h mptcp-mptcp_trunk/include/uapi/l +#endif /* _LINUX_MPTCP_H */ diff -aurN linux-5.4/include/uapi/linux/tcp.h mptcp-mptcp_trunk/include/uapi/linux/tcp.h --- linux-5.4/include/uapi/linux/tcp.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/include/uapi/linux/tcp.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/include/uapi/linux/tcp.h 2020-06-08 19:41:07.000000000 +0200 @@ -18,9 +18,15 @@ #ifndef _UAPI_LINUX_TCP_H #define _UAPI_LINUX_TCP_H @@ -2742,7 +2765,7 @@ diff -aurN linux-5.4/include/uapi/linux/tcp.h mptcp-mptcp_trunk/include/uapi/lin diff -aurN linux-5.4/net/core/dev.c mptcp-mptcp_trunk/net/core/dev.c --- linux-5.4/net/core/dev.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/core/dev.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/core/dev.c 2020-06-08 19:41:07.000000000 +0200 @@ -7855,7 +7855,7 @@ dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | @@ -2754,7 +2777,7 @@ diff -aurN linux-5.4/net/core/dev.c mptcp-mptcp_trunk/net/core/dev.c diff -aurN linux-5.4/net/core/net-traces.c mptcp-mptcp_trunk/net/core/net-traces.c --- linux-5.4/net/core/net-traces.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/core/net-traces.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/core/net-traces.c 2020-06-08 19:41:07.000000000 +0200 @@ -60,3 +60,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); @@ -2763,7 +2786,7 @@ diff -aurN linux-5.4/net/core/net-traces.c mptcp-mptcp_trunk/net/core/net-traces +EXPORT_TRACEPOINT_SYMBOL_GPL(mptcp_retransmit); diff -aurN linux-5.4/net/core/skbuff.c mptcp-mptcp_trunk/net/core/skbuff.c --- linux-5.4/net/core/skbuff.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/core/skbuff.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/core/skbuff.c 2020-06-08 19:41:07.000000000 +0200 @@ -573,7 +573,7 @@ skb_drop_list(&skb_shinfo(skb)->frag_list); } @@ -2775,7 +2798,7 @@ diff -aurN linux-5.4/net/core/skbuff.c mptcp-mptcp_trunk/net/core/skbuff.c diff -aurN linux-5.4/net/core/sock.c mptcp-mptcp_trunk/net/core/sock.c --- linux-5.4/net/core/sock.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/core/sock.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/core/sock.c 2020-06-08 19:41:07.000000000 +0200 @@ -135,6 +135,11 @@ #include @@ -2827,16 +2850,17 @@ diff -aurN linux-5.4/net/core/sock.c mptcp-mptcp_trunk/net/core/sock.c } else sk = kmalloc(prot->obj_size, priority); -@@ -1832,4 +1858,5 @@ +@@ -1832,6 +1858,7 @@ atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); + sock_reset_flag(newsk, SOCK_MPTCP); + mem_cgroup_sk_alloc(newsk); + cgroup_sk_alloc(&newsk->sk_cgrp_data); - /* sk->sk_memcg will be populated at accept() time */ diff -aurN linux-5.4/net/ipv4/af_inet.c mptcp-mptcp_trunk/net/ipv4/af_inet.c --- linux-5.4/net/ipv4/af_inet.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/af_inet.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/af_inet.c 2020-06-08 19:41:07.000000000 +0200 @@ -100,6 +100,7 @@ #include #include @@ -2902,7 +2926,7 @@ diff -aurN linux-5.4/net/ipv4/af_inet.c mptcp-mptcp_trunk/net/ipv4/af_inet.c diff -aurN linux-5.4/net/ipv4/inet_connection_sock.c mptcp-mptcp_trunk/net/ipv4/inet_connection_sock.c --- linux-5.4/net/ipv4/inet_connection_sock.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/inet_connection_sock.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/inet_connection_sock.c 2020-06-08 19:41:07.000000000 +0200 @@ -19,6 +19,7 @@ #include #include @@ -2962,7 +2986,7 @@ diff -aurN linux-5.4/net/ipv4/inet_connection_sock.c mptcp-mptcp_trunk/net/ipv4/ cond_resched(); diff -aurN linux-5.4/net/ipv4/ip_sockglue.c mptcp-mptcp_trunk/net/ipv4/ip_sockglue.c --- linux-5.4/net/ipv4/ip_sockglue.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/ip_sockglue.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/ip_sockglue.c 2020-06-08 19:41:07.000000000 +0200 @@ -44,6 +44,8 @@ #endif #include @@ -3004,7 +3028,7 @@ diff -aurN linux-5.4/net/ipv4/ip_sockglue.c mptcp-mptcp_trunk/net/ipv4/ip_sockgl case IP_TTL: diff -aurN linux-5.4/net/ipv4/Kconfig mptcp-mptcp_trunk/net/ipv4/Kconfig --- linux-5.4/net/ipv4/Kconfig 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/Kconfig 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/Kconfig 2020-06-08 19:41:07.000000000 +0200 @@ -654,6 +654,51 @@ bufferbloat, policers, or AQM schemes that do not provide a delay signal. It requires the fq ("Fair Queue") pacing packet scheduler. @@ -3092,7 +3116,7 @@ diff -aurN linux-5.4/net/ipv4/Kconfig mptcp-mptcp_trunk/net/ipv4/Kconfig default "cdg" if DEFAULT_CDG diff -aurN linux-5.4/net/ipv4/syncookies.c mptcp-mptcp_trunk/net/ipv4/syncookies.c --- linux-5.4/net/ipv4/syncookies.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/syncookies.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/syncookies.c 2020-06-08 19:41:07.000000000 +0200 @@ -12,6 +12,8 @@ #include #include @@ -3220,7 +3244,7 @@ diff -aurN linux-5.4/net/ipv4/syncookies.c mptcp-mptcp_trunk/net/ipv4/syncookies */ diff -aurN linux-5.4/net/ipv4/tcp.c mptcp-mptcp_trunk/net/ipv4/tcp.c --- linux-5.4/net/ipv4/tcp.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp.c 2020-06-08 19:41:07.000000000 +0200 @@ -270,6 +270,7 @@ #include @@ -3605,7 +3629,7 @@ diff -aurN linux-5.4/net/ipv4/tcp.c mptcp-mptcp_trunk/net/ipv4/tcp.c seq = tp->write_seq + tp->max_window + 2; if (!seq) -@@ -2616,17 +2730,11 @@ +@@ -2616,15 +2730,11 @@ icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; @@ -3614,7 +3638,6 @@ diff -aurN linux-5.4/net/ipv4/tcp.c mptcp-mptcp_trunk/net/ipv4/tcp.c - tp->snd_cwnd = TCP_INIT_CWND; - tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; -- tp->delivered = 0; - tp->delivered_ce = 0; + + tcp_reset_vars(sk); @@ -3622,22 +3645,17 @@ diff -aurN linux-5.4/net/ipv4/tcp.c mptcp-mptcp_trunk/net/ipv4/tcp.c tcp_set_ca_state(sk, TCP_CA_Open); - tp->is_sack_reneg = 0; - tcp_clear_retrans(tp); -- tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() -@@ -2636,14 +2746,10 @@ +@@ -2636,10 +2746,6 @@ sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); tp->compressed_ack = 0; - tp->segs_in = 0; - tp->segs_out = 0; - tp->bytes_sent = 0; - tp->bytes_acked = 0; - tp->bytes_received = 0; - tp->bytes_retrans = 0; - tp->data_segs_in = 0; - tp->data_segs_out = 0; tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; @@ -3969,7 +3987,7 @@ diff -aurN linux-5.4/net/ipv4/tcp.c mptcp-mptcp_trunk/net/ipv4/tcp.c EXPORT_SYMBOL_GPL(tcp_abort); diff -aurN linux-5.4/net/ipv4/tcp_cong.c mptcp-mptcp_trunk/net/ipv4/tcp_cong.c --- linux-5.4/net/ipv4/tcp_cong.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_cong.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_cong.c 2020-06-08 19:41:07.000000000 +0200 @@ -328,13 +328,19 @@ return ret; } @@ -3994,7 +4012,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_cong.c mptcp-mptcp_trunk/net/ipv4/tcp_cong.c const struct tcp_congestion_ops *ca; diff -aurN linux-5.4/net/ipv4/tcp_diag.c mptcp-mptcp_trunk/net/ipv4/tcp_diag.c --- linux-5.4/net/ipv4/tcp_diag.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_diag.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_diag.c 2020-06-08 19:41:07.000000000 +0200 @@ -31,7 +31,7 @@ r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; } @@ -4006,7 +4024,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_diag.c mptcp-mptcp_trunk/net/ipv4/tcp_diag.c #ifdef CONFIG_TCP_MD5SIG diff -aurN linux-5.4/net/ipv4/tcp_fastopen.c mptcp-mptcp_trunk/net/ipv4/tcp_fastopen.c --- linux-5.4/net/ipv4/tcp_fastopen.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_fastopen.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_fastopen.c 2020-06-08 19:41:07.000000000 +0200 @@ -9,6 +9,7 @@ #include #include @@ -4067,7 +4085,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_fastopen.c mptcp-mptcp_trunk/net/ipv4/tcp_fast */ diff -aurN linux-5.4/net/ipv4/tcp_input.c mptcp-mptcp_trunk/net/ipv4/tcp_input.c --- linux-5.4/net/ipv4/tcp_input.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_input.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_input.c 2020-06-08 19:41:07.000000000 +0200 @@ -76,35 +76,15 @@ #include #include @@ -4466,13 +4484,12 @@ diff -aurN linux-5.4/net/ipv4/tcp_input.c mptcp-mptcp_trunk/net/ipv4/tcp_input.c { int eaten; struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); -@@ -4746,8 +4811,8 @@ +@@ -4746,7 +4811,7 @@ const struct tcp_sock *tp = tcp_sk(sk); int avail = tp->rcv_nxt - tp->copied_seq; -- if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && -+ if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && !mptcp(tp) && - !sock_flag(sk, SOCK_DONE)) +- if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE)) ++ if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE) && !mptcp(tp)) return; sk->sk_data_ready(sk); @@ -4952,7 +4969,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_input.c mptcp-mptcp_trunk/net/ipv4/tcp_input.c tcp_rsk(req)->tfo_listener = false; diff -aurN linux-5.4/net/ipv4/tcp_ipv4.c mptcp-mptcp_trunk/net/ipv4/tcp_ipv4.c --- linux-5.4/net/ipv4/tcp_ipv4.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_ipv4.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_ipv4.c 2020-06-08 19:41:07.000000000 +0200 @@ -62,6 +62,8 @@ #include #include @@ -5417,7 +5434,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_ipv4.c mptcp-mptcp_trunk/net/ipv4/tcp_ipv4.c diff -aurN linux-5.4/net/ipv4/tcp_minisocks.c mptcp-mptcp_trunk/net/ipv4/tcp_minisocks.c --- linux-5.4/net/ipv4/tcp_minisocks.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_minisocks.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_minisocks.c 2020-06-08 19:41:07.000000000 +0200 @@ -19,11 +19,13 @@ * Jorge Cwik, */ @@ -5659,7 +5676,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_minisocks.c mptcp-mptcp_trunk/net/ipv4/tcp_min } diff -aurN linux-5.4/net/ipv4/tcp_output.c mptcp-mptcp_trunk/net/ipv4/tcp_output.c --- linux-5.4/net/ipv4/tcp_output.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_output.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_output.c 2020-06-08 19:41:07.000000000 +0200 @@ -37,6 +37,12 @@ #define pr_fmt(fmt) "TCP: " fmt @@ -5796,7 +5813,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_output.c mptcp-mptcp_trunk/net/ipv4/tcp_output return MAX_TCP_OPTION_SPACE - remaining; } -@@ -747,17 +753,22 @@ +@@ -747,16 +753,22 @@ opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -5810,9 +5827,8 @@ diff -aurN linux-5.4/net/ipv4/tcp_output.c mptcp-mptcp_trunk/net/ipv4/tcp_output - min_t(unsigned int, eff_sacks, - (remaining - TCPOLEN_SACK_BASE_ALIGNED) / - TCPOLEN_SACK_PERBLOCK); -- if (likely(opts->num_sack_blocks)) -- size += TCPOLEN_SACK_BASE_ALIGNED + -- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; +- size += TCPOLEN_SACK_BASE_ALIGNED + +- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + const unsigned remaining = MAX_TCP_OPTION_SPACE - size; + if (remaining < TCPOLEN_SACK_BASE_ALIGNED) + opts->num_sack_blocks = 0; @@ -5821,7 +5837,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_output.c mptcp-mptcp_trunk/net/ipv4/tcp_output + min_t(unsigned int, eff_sacks, + (remaining - TCPOLEN_SACK_BASE_ALIGNED) / + TCPOLEN_SACK_PERBLOCK); -+ if (likely(opts->num_sack_blocks)) ++ if (opts->num_sack_blocks) + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; } @@ -6251,7 +6267,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_output.c mptcp-mptcp_trunk/net/ipv4/tcp_output /* Cancel probe timer, if it is not required. */ diff -aurN linux-5.4/net/ipv4/tcp_timer.c mptcp-mptcp_trunk/net/ipv4/tcp_timer.c --- linux-5.4/net/ipv4/tcp_timer.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv4/tcp_timer.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv4/tcp_timer.c 2020-06-08 19:41:07.000000000 +0200 @@ -21,6 +21,7 @@ #include @@ -6465,7 +6481,7 @@ diff -aurN linux-5.4/net/ipv4/tcp_timer.c mptcp-mptcp_trunk/net/ipv4/tcp_timer.c diff -aurN linux-5.4/net/ipv6/addrconf.c mptcp-mptcp_trunk/net/ipv6/addrconf.c --- linux-5.4/net/ipv6/addrconf.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv6/addrconf.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv6/addrconf.c 2020-06-08 19:41:07.000000000 +0200 @@ -967,6 +967,7 @@ kfree_rcu(ifp, rcu); @@ -6476,7 +6492,7 @@ diff -aurN linux-5.4/net/ipv6/addrconf.c mptcp-mptcp_trunk/net/ipv6/addrconf.c ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) diff -aurN linux-5.4/net/ipv6/af_inet6.c mptcp-mptcp_trunk/net/ipv6/af_inet6.c --- linux-5.4/net/ipv6/af_inet6.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv6/af_inet6.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv6/af_inet6.c 2020-06-08 19:41:07.000000000 +0200 @@ -104,8 +104,7 @@ return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } @@ -6489,7 +6505,7 @@ diff -aurN linux-5.4/net/ipv6/af_inet6.c mptcp-mptcp_trunk/net/ipv6/af_inet6.c struct ipv6_pinfo *np; diff -aurN linux-5.4/net/ipv6/ipv6_sockglue.c mptcp-mptcp_trunk/net/ipv6/ipv6_sockglue.c --- linux-5.4/net/ipv6/ipv6_sockglue.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv6/ipv6_sockglue.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv6/ipv6_sockglue.c 2020-06-08 19:41:07.000000000 +0200 @@ -44,6 +44,8 @@ #include #include @@ -6515,7 +6531,7 @@ diff -aurN linux-5.4/net/ipv6/ipv6_sockglue.c mptcp-mptcp_trunk/net/ipv6/ipv6_so tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff -aurN linux-5.4/net/ipv6/syncookies.c mptcp-mptcp_trunk/net/ipv6/syncookies.c --- linux-5.4/net/ipv6/syncookies.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv6/syncookies.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv6/syncookies.c 2020-06-08 19:41:07.000000000 +0200 @@ -15,6 +15,8 @@ #include #include @@ -6605,7 +6621,7 @@ diff -aurN linux-5.4/net/ipv6/syncookies.c mptcp-mptcp_trunk/net/ipv6/syncookies out_free: diff -aurN linux-5.4/net/ipv6/tcp_ipv6.c mptcp-mptcp_trunk/net/ipv6/tcp_ipv6.c --- linux-5.4/net/ipv6/tcp_ipv6.c 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/ipv6/tcp_ipv6.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/ipv6/tcp_ipv6.c 2020-06-08 19:41:07.000000000 +0200 @@ -58,6 +58,8 @@ #include #include @@ -7199,7 +7215,7 @@ diff -aurN linux-5.4/net/ipv6/tcp_ipv6.c mptcp-mptcp_trunk/net/ipv6/tcp_ipv6.c /* thinking of making this const? Don't. diff -aurN linux-5.4/net/Kconfig mptcp-mptcp_trunk/net/Kconfig --- linux-5.4/net/Kconfig 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/Kconfig 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/Kconfig 2020-06-08 19:41:07.000000000 +0200 @@ -91,6 +91,7 @@ source "net/ipv4/Kconfig" source "net/ipv6/Kconfig" @@ -7210,7 +7226,7 @@ diff -aurN linux-5.4/net/Kconfig mptcp-mptcp_trunk/net/Kconfig diff -aurN linux-5.4/net/Makefile mptcp-mptcp_trunk/net/Makefile --- linux-5.4/net/Makefile 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/net/Makefile 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/Makefile 2020-06-08 19:41:07.000000000 +0200 @@ -20,6 +20,7 @@ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX_SCM) += unix/ @@ -7221,8 +7237,8 @@ diff -aurN linux-5.4/net/Makefile mptcp-mptcp_trunk/net/Makefile obj-$(CONFIG_NET_KEY) += key/ diff -aurN linux-5.4/net/mptcp/Kconfig mptcp-mptcp_trunk/net/mptcp/Kconfig --- linux-5.4/net/mptcp/Kconfig 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/Kconfig 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,146 @@ ++++ mptcp-mptcp_trunk/net/mptcp/Kconfig 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,152 @@ +# +# MPTCP configuration +# @@ -7334,6 +7350,12 @@ diff -aurN linux-5.4/net/mptcp/Kconfig mptcp-mptcp_trunk/net/mptcp/Kconfig + This scheduler sends all packets redundantly over all subflows to decreases + latency and jitter on the cost of lower throughput. + ++config MPTCP_ECF ++ tristate "MPTCP ECF" ++ depends on (MPTCP=y) ++ ---help--- ++ This is an experimental Earliest Completion First (ECF) scheduler. ++ +choice + prompt "Default MPTCP Scheduler" + default DEFAULT_SCHEDULER @@ -7371,8 +7393,8 @@ diff -aurN linux-5.4/net/mptcp/Kconfig mptcp-mptcp_trunk/net/mptcp/Kconfig + diff -aurN linux-5.4/net/mptcp/Makefile mptcp-mptcp_trunk/net/mptcp/Makefile --- linux-5.4/net/mptcp/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/Makefile 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,24 @@ ++++ mptcp-mptcp_trunk/net/mptcp/Makefile 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,25 @@ +# +## Makefile for MultiPath TCP support code. +# @@ -7395,11 +7417,12 @@ diff -aurN linux-5.4/net/mptcp/Makefile mptcp-mptcp_trunk/net/mptcp/Makefile +obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o +obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o +obj-$(CONFIG_MPTCP_BLEST) += mptcp_blest.o ++obj-$(CONFIG_MPTCP_ECF) += mptcp_ecf.o + +mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o diff -aurN linux-5.4/net/mptcp/mctcp_desync.c mptcp-mptcp_trunk/net/mptcp/mctcp_desync.c --- linux-5.4/net/mptcp/mctcp_desync.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mctcp_desync.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mctcp_desync.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,193 @@ +/* + * Desynchronized Multi-Channel TCP Congestion Control Algorithm @@ -7596,7 +7619,7 @@ diff -aurN linux-5.4/net/mptcp/mctcp_desync.c mptcp-mptcp_trunk/net/mptcp/mctcp_ +MODULE_VERSION("1.0"); diff -aurN linux-5.4/net/mptcp/mptcp_balia.c mptcp-mptcp_trunk/net/mptcp/mptcp_balia.c --- linux-5.4/net/mptcp/mptcp_balia.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_balia.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_balia.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,261 @@ +/* + * MPTCP implementation - Balia Congestion Control @@ -7861,7 +7884,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_balia.c mptcp-mptcp_trunk/net/mptcp/mptcp_b +MODULE_VERSION("0.1"); diff -aurN linux-5.4/net/mptcp/mptcp_binder.c mptcp-mptcp_trunk/net/mptcp/mptcp_binder.c --- linux-5.4/net/mptcp/mptcp_binder.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_binder.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_binder.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,494 @@ +#include + @@ -8359,8 +8382,8 @@ diff -aurN linux-5.4/net/mptcp/mptcp_binder.c mptcp-mptcp_trunk/net/mptcp/mptcp_ +MODULE_VERSION("0.1"); diff -aurN linux-5.4/net/mptcp/mptcp_blest.c mptcp-mptcp_trunk/net/mptcp/mptcp_blest.c --- linux-5.4/net/mptcp/mptcp_blest.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_blest.c 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,481 @@ ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_blest.c 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* MPTCP Scheduler to reduce HoL-blocking and spurious retransmissions. + * @@ -8384,7 +8407,6 @@ diff -aurN linux-5.4/net/mptcp/mptcp_blest.c mptcp-mptcp_trunk/net/mptcp/mptcp_b + +#include +#include -+#include + +static unsigned char lambda __read_mostly = 12; +module_param(lambda, byte, 0644); @@ -8413,7 +8435,6 @@ diff -aurN linux-5.4/net/mptcp/mptcp_blest.c mptcp-mptcp_trunk/net/mptcp/mptcp_b +}; + +struct blestsched_cb { -+ bool retrans_flag; + s16 lambda_1000; /* values range from min_lambda * 100 to max_lambda * 100 */ + u32 last_lambda_update; +}; @@ -8440,14 +8461,13 @@ diff -aurN linux-5.4/net/mptcp/mptcp_blest.c mptcp-mptcp_trunk/net/mptcp/mptcp_b + * during the slow flows last RTT => increase lambda + * otherwise decrease + */ -+ if (blest_cb->retrans_flag) { ++ if (tcp_sk(meta_sk)->retrans_stamp) { + /* need to slow down on the slow flow */ + blest_cb->lambda_1000 += dyn_lambda_bad; + } else { + /* use the slow flow more */ + blest_cb->lambda_1000 -= dyn_lambda_good; + } -+ blest_cb->retrans_flag = false; + + /* cap lambda_1000 to its value range */ + blest_cb->lambda_1000 = min_t(s16, blest_cb->lambda_1000, max_lambda * 100); @@ -8603,199 +8623,6 @@ diff -aurN linux-5.4/net/mptcp/mptcp_blest.c mptcp-mptcp_trunk/net/mptcp/mptcp_b + return bestsk; +} + -+/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */ -+static struct sk_buff *mptcp_blest_rcv_buf_optimization(struct sock *sk, int penal) -+{ -+ struct sock *meta_sk; -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_tcp_sock *mptcp; -+ struct sk_buff *skb_head; -+ struct blestsched_priv *blest_p = blestsched_get_priv(tp); -+ struct blestsched_cb *blest_cb; -+ -+ meta_sk = mptcp_meta_sk(sk); -+ skb_head = tcp_rtx_queue_head(meta_sk); -+ -+ if (!skb_head) -+ return NULL; -+ -+ /* If penalization is optional (coming from mptcp_next_segment() and -+ * We are not send-buffer-limited we do not penalize. The retransmission -+ * is just an optimization to fix the idle-time due to the delay before -+ * we wake up the application. -+ */ -+ if (!penal && sk_stream_memory_free(meta_sk)) -+ goto retrans; -+ -+ /* Record the occurrence of a retransmission to update the lambda value */ -+ blest_cb = blestsched_get_cb(tcp_sk(meta_sk)); -+ blest_cb->retrans_flag = true; -+ -+ /* Only penalize again after an RTT has elapsed */ -+ if (tcp_jiffies32 - blest_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3)) -+ goto retrans; -+ -+ /* Half the cwnd of the slow flows */ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) { -+ u32 prior_cwnd = tp_it->snd_cwnd; -+ -+ tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U); -+ -+ /* If in slow start, do not reduce the ssthresh */ -+ if (prior_cwnd >= tp_it->snd_ssthresh) -+ tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U); -+ -+ blest_p->last_rbuf_opti = tcp_jiffies32; -+ } -+ } -+ } -+ -+retrans: -+ -+ /* Segment not yet injected into this path? Take it!!! */ -+ if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) { -+ bool do_retrans = false; -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp_it->snd_cwnd <= 4) { -+ do_retrans = true; -+ break; -+ } -+ -+ if (4 * tp->srtt_us >= tp_it->srtt_us) { -+ do_retrans = false; -+ break; -+ } else { -+ do_retrans = true; -+ } -+ } -+ } -+ -+ if (do_retrans && mptcp_is_available(sk, skb_head, false)) { -+ trace_mptcp_retransmit(sk, skb_head); -+ return skb_head; -+ } -+ } -+ return NULL; -+} -+ -+/* copy from mptcp_sched.c: __mptcp_next_segment */ -+/* Returns the next segment to be sent from the mptcp meta-queue. -+ * (chooses the reinject queue if any segment is waiting in it, otherwise, -+ * chooses the normal write queue). -+ * Sets *@reinject to 1 if the returned segment comes from the -+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, -+ * and sets it to -1 if it is a meta-level retransmission to optimize the -+ * receive-buffer. -+ */ -+static struct sk_buff *__mptcp_blest_next_segment(struct sock *meta_sk, int *reinject) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sk_buff *skb = NULL; -+ -+ *reinject = 0; -+ -+ /* If we are in fallback-mode, just take from the meta-send-queue */ -+ if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) -+ return tcp_send_head(meta_sk); -+ -+ skb = skb_peek(&mpcb->reinject_queue); -+ -+ if (skb) { -+ *reinject = 1; -+ } else { -+ skb = tcp_send_head(meta_sk); -+ -+ if (!skb && meta_sk->sk_socket && -+ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && -+ sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -+ struct sock *subsk = blest_get_available_subflow(meta_sk, NULL, -+ false); -+ if (!subsk) -+ return NULL; -+ -+ skb = mptcp_blest_rcv_buf_optimization(subsk, 0); -+ if (skb) -+ *reinject = -1; -+ } -+ } -+ return skb; -+} -+ -+/* copy from mptcp_sched.c: mptcp_next_segment */ -+static struct sk_buff *mptcp_blest_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit) -+{ -+ struct sk_buff *skb = __mptcp_blest_next_segment(meta_sk, reinject); -+ unsigned int mss_now; -+ struct tcp_sock *subtp; -+ u16 gso_max_segs; -+ u32 max_len, max_segs, window, needed; -+ -+ /* As we set it, we have to reset it as well. */ -+ *limit = 0; -+ -+ if (!skb) -+ return NULL; -+ -+ *subsk = blest_get_available_subflow(meta_sk, skb, false); -+ if (!*subsk) -+ return NULL; -+ -+ subtp = tcp_sk(*subsk); -+ mss_now = tcp_current_mss(*subsk); -+ -+ if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) { -+ skb = mptcp_blest_rcv_buf_optimization(*subsk, 1); -+ if (skb) -+ *reinject = -1; -+ else -+ return NULL; -+ } -+ -+ /* No splitting required, as we will only send one single segment */ -+ if (skb->len <= mss_now) -+ return skb; -+ -+ /* The following is similar to tcp_mss_split_point, but -+ * we do not care about nagle, because we will anyways -+ * use TCP_NAGLE_PUSH, which overrides this. -+ * -+ * So, we first limit according to the cwnd/gso-size and then according -+ * to the subflow's window. -+ */ -+ -+ gso_max_segs = (*subsk)->sk_gso_max_segs; -+ if (!gso_max_segs) /* No gso supported on the subflow's NIC */ -+ gso_max_segs = 1; -+ max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); -+ if (!max_segs) -+ return NULL; -+ -+ max_len = mss_now * max_segs; -+ window = tcp_wnd_end(subtp) - subtp->write_seq; -+ -+ needed = min(skb->len, window); -+ if (max_len <= skb->len) -+ /* Take max_win, which is actually the cwnd/gso-size */ -+ *limit = max_len; -+ else -+ /* Or, take the window */ -+ *limit = needed; -+ -+ return skb; -+} -+ +static void blestsched_init(struct sock *sk) +{ + struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk)); @@ -8813,7 +8640,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_blest.c mptcp-mptcp_trunk/net/mptcp/mptcp_b + +static struct mptcp_sched_ops mptcp_sched_blest = { + .get_subflow = blest_get_available_subflow, -+ .next_segment = mptcp_blest_next_segment, ++ .next_segment = mptcp_next_segment, + .init = blestsched_init, + .name = "blest", + .owner = THIS_MODULE, @@ -8844,7 +8671,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_blest.c mptcp-mptcp_trunk/net/mptcp/mptcp_b +MODULE_VERSION("0.95"); diff -aurN linux-5.4/net/mptcp/mptcp_coupled.c mptcp-mptcp_trunk/net/mptcp/mptcp_coupled.c --- linux-5.4/net/mptcp/mptcp_coupled.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_coupled.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_coupled.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,262 @@ +/* + * MPTCP implementation - Linked Increase congestion control Algorithm (LIA) @@ -9110,8 +8937,8 @@ diff -aurN linux-5.4/net/mptcp/mptcp_coupled.c mptcp-mptcp_trunk/net/mptcp/mptcp +MODULE_VERSION("0.1"); diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ctrl.c --- linux-5.4/net/mptcp/mptcp_ctrl.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_ctrl.c 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,3254 @@ ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_ctrl.c 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,3250 @@ +/* + * MPTCP implementation - MPTCP-control + * @@ -9974,11 +9801,10 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct + *idsn = ntohll(*((__be64 *)&mptcp_hashed_key[6])); +} + -+static void mptcp_hmac_sha256(const u8 *key_1, const u8 *key_2, u32 *hash_out, ++static void mptcp_hmac_sha256(const u8 *key_1, const u8 *key_2, u8 *hash_out, + int arg_num, va_list list) +{ + u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; -+ __be32 output[SHA256_DIGEST_WORDS]; + struct sha256_state state; + int index, msg_length; + int length = 0; @@ -10016,10 +9842,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct + + sha256_init(&state); + sha256_update(&state, input, sizeof(input)); -+ sha256_final(&state, (u8 *)output); -+ -+ for (i = 0; i < 5; i++) -+ hash_out[i] = output[i]; ++ sha256_final(&state, hash_out); +} + +static void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn) @@ -10129,14 +9952,14 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct + hash_out[i] = (__force u32)cpu_to_be32(hash_out[i]); +} + -+void mptcp_hmac(u8 ver, const u8 *key_1, const u8 *key_2, u32 *hash_out, ++void mptcp_hmac(u8 ver, const u8 *key_1, const u8 *key_2, u8 *hash_out, + int arg_num, ...) +{ + va_list args; + + va_start(args, arg_num); + if (ver == MPTCP_VERSION_0) -+ mptcp_hmac_sha1(key_1, key_2, hash_out, arg_num, args); ++ mptcp_hmac_sha1(key_1, key_2, (u32 *)hash_out, arg_num, args); + else if (ver >= MPTCP_VERSION_1) + mptcp_hmac_sha256(key_1, key_2, hash_out, arg_num, args); + va_end(args); @@ -11500,7 +11323,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct + const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; + struct mptcp_request_sock *mtreq = mptcp_rsk(req); + struct tcp_sock *child_tp = tcp_sk(child); -+ u8 hash_mac_check[20]; ++ u8 hash_mac_check[SHA256_DIGEST_SIZE]; + + if (!mopt->join_ack) { + MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINACKFAIL); @@ -11508,7 +11331,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct + } + + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)&mpcb->mptcp_loc_key, (u32 *)hash_mac_check, 2, ++ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 2, + 4, (u8 *)&mtreq->mptcp_rem_nonce, + 4, (u8 *)&mtreq->mptcp_loc_nonce); + @@ -11761,8 +11584,8 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct + struct sk_buff *skb) +{ + struct mptcp_request_sock *mtreq = mptcp_rsk(req); ++ u8 mptcp_hash_mac[SHA256_DIGEST_SIZE]; + struct mptcp_options_received mopt; -+ u8 mptcp_hash_mac[20]; + + mptcp_init_mp_opt(&mopt); + tcp_parse_mptcp_options(skb, &mopt); @@ -11773,7 +11596,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct + mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce; + + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, -+ (u8 *)&mpcb->mptcp_rem_key, (u32 *)mptcp_hash_mac, 2, ++ (u8 *)&mpcb->mptcp_rem_key, mptcp_hash_mac, 2, + 4, (u8 *)&mtreq->mptcp_loc_nonce, + 4, (u8 *)&mtreq->mptcp_rem_nonce); + mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac; @@ -12366,10 +12189,209 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ctrl.c mptcp-mptcp_trunk/net/mptcp/mptcp_ct +mptcp_sock_cache_failed: + mptcp_init_failed = true; +} +diff -aurN linux-5.4/net/mptcp/mptcp_ecf.c mptcp-mptcp_trunk/net/mptcp/mptcp_ecf.c +--- linux-5.4/net/mptcp/mptcp_ecf.c 1970-01-01 01:00:00.000000000 +0100 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_ecf.c 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,195 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* MPTCP ECF Scheduler ++ * ++ * Algorithm Design: ++ * Yeon-sup Lim ++ * Don Towsley ++ * Erich M. Nahum ++ * Richard J. Gibbens ++ * ++ * Initial Implementation: ++ * Yeon-sup Lim ++ * ++ * Additional Authors: ++ * Daniel Weber ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++#include ++#include ++ ++static unsigned int mptcp_ecf_r_beta __read_mostly = 4; /* beta = 1/r_beta = 0.25 */ ++module_param(mptcp_ecf_r_beta, int, 0644); ++MODULE_PARM_DESC(mptcp_ecf_r_beta, "beta for ECF"); ++ ++struct ecfsched_priv { ++ u32 last_rbuf_opti; ++}; ++ ++struct ecfsched_cb { ++ u32 switching_margin; /* this is "waiting" in algorithm description */ ++}; ++ ++static struct ecfsched_priv *ecfsched_get_priv(const struct tcp_sock *tp) ++{ ++ return (struct ecfsched_priv *)&tp->mptcp->mptcp_sched[0]; ++} ++ ++static struct ecfsched_cb *ecfsched_get_cb(const struct tcp_sock *tp) ++{ ++ return (struct ecfsched_cb *)&tp->mpcb->mptcp_sched[0]; ++} ++ ++/* This is the ECF scheduler. This function decides on which flow to send ++ * a given MSS. If all subflows are found to be busy or the currently best ++ * subflow is estimated to be slower than waiting for minsk, NULL is returned. ++ */ ++static struct sock *ecf_get_available_subflow(struct sock *meta_sk, ++ struct sk_buff *skb, ++ bool zero_wnd_test) ++{ ++ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; ++ struct sock *bestsk, *minsk = NULL; ++ struct tcp_sock *besttp; ++ struct mptcp_tcp_sock *mptcp; ++ struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(meta_sk)); ++ u32 min_srtt = U32_MAX; ++ u32 sub_sndbuf = 0; ++ u32 sub_packets_out = 0; ++ ++ /* Answer data_fin on same subflow!!! */ ++ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && ++ skb && mptcp_is_data_fin(skb)) { ++ mptcp_for_each_sub(mpcb, mptcp) { ++ bestsk = mptcp_to_sock(mptcp); ++ ++ if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index && ++ mptcp_is_available(bestsk, skb, zero_wnd_test)) ++ return bestsk; ++ } ++ } ++ ++ /* First, find the overall best (fastest) subflow */ ++ mptcp_for_each_sub(mpcb, mptcp) { ++ bestsk = mptcp_to_sock(mptcp); ++ besttp = tcp_sk(bestsk); ++ ++ /* Set of states for which we are allowed to send data */ ++ if (!mptcp_sk_can_send(bestsk)) ++ continue; ++ ++ /* We do not send data on this subflow unless it is ++ * fully established, i.e. the 4th ack has been received. ++ */ ++ if (besttp->mptcp->pre_established) ++ continue; ++ ++ sub_sndbuf += bestsk->sk_wmem_queued; ++ sub_packets_out += besttp->packets_out; ++ ++ /* record minimal rtt */ ++ if (besttp->srtt_us < min_srtt) { ++ min_srtt = besttp->srtt_us; ++ minsk = bestsk; ++ } ++ } ++ ++ /* find the current best subflow according to the default scheduler */ ++ bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test); ++ ++ /* if we decided to use a slower flow, we have the option of not using it at all */ ++ if (bestsk && minsk && bestsk != minsk) { ++ u32 mss = tcp_current_mss(bestsk); /* assuming equal MSS */ ++ u32 sndbuf_meta = meta_sk->sk_wmem_queued; ++ u32 sndbuf_minus = sub_sndbuf; ++ u32 sndbuf = 0; ++ ++ u32 cwnd_f = tcp_sk(minsk)->snd_cwnd; ++ u32 srtt_f = tcp_sk(minsk)->srtt_us >> 3; ++ u32 rttvar_f = tcp_sk(minsk)->rttvar_us >> 1; ++ ++ u32 cwnd_s = tcp_sk(bestsk)->snd_cwnd; ++ u32 srtt_s = tcp_sk(bestsk)->srtt_us >> 3; ++ u32 rttvar_s = tcp_sk(bestsk)->rttvar_us >> 1; ++ ++ u32 delta = max(rttvar_f, rttvar_s); ++ ++ u32 x_f; ++ u64 lhs, rhs; /* to avoid overflow, using u64 */ ++ ++ if (tcp_sk(meta_sk)->packets_out > sub_packets_out) ++ sndbuf_minus += (tcp_sk(meta_sk)->packets_out - sub_packets_out) * mss; ++ ++ if (sndbuf_meta > sndbuf_minus) ++ sndbuf = sndbuf_meta - sndbuf_minus; ++ ++ /* we have something to send. ++ * at least one time tx over fastest subflow is required ++ */ ++ x_f = sndbuf > cwnd_f * mss ? sndbuf : cwnd_f * mss; ++ lhs = srtt_f * (x_f + cwnd_f * mss); ++ rhs = cwnd_f * mss * (srtt_s + delta); ++ ++ if (mptcp_ecf_r_beta * lhs < mptcp_ecf_r_beta * rhs + ecf_cb->switching_margin * rhs) { ++ u32 x_s = sndbuf > cwnd_s * mss ? sndbuf : cwnd_s * mss; ++ u64 lhs_s = srtt_s * x_s; ++ u64 rhs_s = cwnd_s * mss * (2 * srtt_f + delta); ++ ++ if (lhs_s >= rhs_s) { ++ /* too slower than fastest */ ++ ecf_cb->switching_margin = 1; ++ return NULL; ++ } ++ } else { ++ /* use slower one */ ++ ecf_cb->switching_margin = 0; ++ } ++ } ++ ++ return bestsk; ++} ++ ++static void ecfsched_init(struct sock *sk) ++{ ++ struct ecfsched_priv *ecf_p = ecfsched_get_priv(tcp_sk(sk)); ++ struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(mptcp_meta_sk(sk))); ++ ++ ecf_p->last_rbuf_opti = tcp_jiffies32; ++ ecf_cb->switching_margin = 0; ++} ++ ++struct mptcp_sched_ops mptcp_sched_ecf = { ++ .get_subflow = ecf_get_available_subflow, ++ .next_segment = mptcp_next_segment, ++ .init = ecfsched_init, ++ .name = "ecf", ++ .owner = THIS_MODULE, ++}; ++ ++static int __init ecf_register(void) ++{ ++ BUILD_BUG_ON(sizeof(struct ecfsched_priv) > MPTCP_SCHED_SIZE); ++ BUILD_BUG_ON(sizeof(struct ecfsched_cb) > MPTCP_SCHED_DATA_SIZE); ++ ++ if (mptcp_register_scheduler(&mptcp_sched_ecf)) ++ return -1; ++ ++ return 0; ++} ++ ++static void ecf_unregister(void) ++{ ++ mptcp_unregister_scheduler(&mptcp_sched_ecf); ++} ++ ++module_init(ecf_register); ++module_exit(ecf_unregister); ++ ++MODULE_AUTHOR("Yeon-sup Lim, Daniel Weber"); ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("ECF (Earliest Completion First) scheduler for MPTCP, based on default minimum RTT scheduler"); ++MODULE_VERSION("0.95"); diff -aurN linux-5.4/net/mptcp/mptcp_fullmesh.c mptcp-mptcp_trunk/net/mptcp/mptcp_fullmesh.c --- linux-5.4/net/mptcp/mptcp_fullmesh.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_fullmesh.c 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,1941 @@ ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_fullmesh.c 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,1937 @@ +#include +#include + @@ -13964,15 +13986,13 @@ diff -aurN linux-5.4/net/mptcp/mptcp_fullmesh.c mptcp-mptcp_trunk/net/mptcp/mptc + opts->add_addr4.addr = mptcp_local->locaddr4[ind].addr; + opts->add_addr_v4 = 1; + if (mpcb->mptcp_ver >= MPTCP_VERSION_1) { -+ u8 mptcp_hash_mac[20]; -+ u8 no_key[8]; ++ u8 mptcp_hash_mac[SHA256_DIGEST_SIZE]; + -+ *(u64 *)no_key = 0; + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, -+ (u8 *)no_key, (u32 *)mptcp_hash_mac, 2, ++ (u8 *)&mpcb->mptcp_rem_key, mptcp_hash_mac, 2, + 1, (u8 *)&mptcp_local->locaddr4[ind].loc4_id, + 4, (u8 *)&opts->add_addr4.addr.s_addr); -+ opts->add_addr4.trunc_mac = *(u64 *)mptcp_hash_mac; ++ opts->add_addr4.trunc_mac = *(u64 *)&mptcp_hash_mac[SHA256_DIGEST_SIZE - sizeof(u64)]; + } + + if (skb) { @@ -14006,15 +14026,13 @@ diff -aurN linux-5.4/net/mptcp/mptcp_fullmesh.c mptcp-mptcp_trunk/net/mptcp/mptc + opts->add_addr6.addr = mptcp_local->locaddr6[ind].addr; + opts->add_addr_v6 = 1; + if (mpcb->mptcp_ver >= MPTCP_VERSION_1) { -+ u8 mptcp_hash_mac[20]; -+ u8 no_key[8]; ++ u8 mptcp_hash_mac[SHA256_DIGEST_SIZE]; + -+ *(u64 *)no_key = 0; + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, -+ (u8 *)no_key, (u32 *)mptcp_hash_mac, 2, ++ (u8 *)&mpcb->mptcp_rem_key, mptcp_hash_mac, 2, + 1, (u8 *)&mptcp_local->locaddr6[ind].loc6_id, + 16, (u8 *)&opts->add_addr6.addr.s6_addr); -+ opts->add_addr6.trunc_mac = *(u64 *)mptcp_hash_mac; ++ opts->add_addr6.trunc_mac = *(u64 *)&mptcp_hash_mac[SHA256_DIGEST_SIZE - sizeof(u64)]; + } + + if (skb) { @@ -14313,8 +14331,8 @@ diff -aurN linux-5.4/net/mptcp/mptcp_fullmesh.c mptcp-mptcp_trunk/net/mptcp/mptc +MODULE_VERSION("0.88"); diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_input.c --- linux-5.4/net/mptcp/mptcp_input.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_input.c 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,2523 @@ ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_input.c 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,2529 @@ +/* + * MPTCP implementation - Sending side + * @@ -15931,19 +15949,21 @@ diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_i + int opsize) +{ +#if IS_ENABLED(CONFIG_IPV6) -+ if (mptcp_ver < MPTCP_VERSION_1 && mpadd->ipver == 6) { ++ if (mptcp_ver < MPTCP_VERSION_1 && mpadd->u_bit.v0.ipver == 6) { + return opsize == MPTCP_SUB_LEN_ADD_ADDR6 || + opsize == MPTCP_SUB_LEN_ADD_ADDR6 + 2; + } -+ if (mptcp_ver >= MPTCP_VERSION_1 && mpadd->ipver == 6) ++ if (mptcp_ver >= MPTCP_VERSION_1) + return opsize == MPTCP_SUB_LEN_ADD_ADDR6_VER1 || -+ opsize == MPTCP_SUB_LEN_ADD_ADDR6_VER1 + 2; ++ opsize == MPTCP_SUB_LEN_ADD_ADDR6_VER1 + 2 || ++ opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 || ++ opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2; +#endif -+ if (mptcp_ver < MPTCP_VERSION_1 && mpadd->ipver == 4) { ++ if (mptcp_ver < MPTCP_VERSION_1 && mpadd->u_bit.v0.ipver == 4) { + return opsize == MPTCP_SUB_LEN_ADD_ADDR4 || + opsize == MPTCP_SUB_LEN_ADD_ADDR4 + 2; + } -+ if (mptcp_ver >= MPTCP_VERSION_1 && mpadd->ipver == 4) { ++ if (mptcp_ver >= MPTCP_VERSION_1) { + return opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 || + opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2; + } @@ -16289,33 +16309,42 @@ diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_i +{ + struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr; + struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ __be16 port = 0; + union inet_addr addr; + sa_family_t family; ++ __be16 port = 0; ++ bool is_v4; + -+ if (mpadd->ipver == 4) { ++ if (mpcb->mptcp_ver < MPTCP_VERSION_1) { ++ is_v4 = mpadd->u_bit.v0.ipver == 4; ++ } else { ++ is_v4 = mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1 || ++ mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2; ++ ++ /* TODO: support ADD_ADDRv1 retransmissions */ ++ if (mpadd->u_bit.v1.echo) ++ return; ++ } ++ ++ if (is_v4) { ++ u8 hash_mac_check[SHA256_DIGEST_SIZE]; ++ __be16 hmacport = 0; + char *recv_hmac; -+ u8 hash_mac_check[20]; -+ u8 no_key[8]; -+ int msg_parts = 0; + + if (mpcb->mptcp_ver < MPTCP_VERSION_1) + goto skip_hmac_v4; + -+ *(u64 *)no_key = 0; + recv_hmac = (char *)mpadd->u.v4.mac; + if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1) { + recv_hmac -= sizeof(mpadd->u.v4.port); -+ msg_parts = 2; + } else if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2) { -+ msg_parts = 3; ++ hmacport = mpadd->u.v4.port; + } + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)no_key, (u32 *)hash_mac_check, msg_parts, ++ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 3, + 1, (u8 *)&mpadd->addr_id, + 4, (u8 *)&mpadd->u.v4.addr.s_addr, -+ 2, (u8 *)&mpadd->u.v4.port); -+ if (memcmp(hash_mac_check, recv_hmac, 8) != 0) ++ 2, (u8 *)&hmacport); ++ if (memcmp(&hash_mac_check[SHA256_DIGEST_SIZE - sizeof(u64)], recv_hmac, 8) != 0) + /* ADD_ADDR2 discarded */ + return; +skip_hmac_v4: @@ -16327,29 +16356,26 @@ diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_i + family = AF_INET; + addr.in = mpadd->u.v4.addr; +#if IS_ENABLED(CONFIG_IPV6) -+ } else if (mpadd->ipver == 6) { ++ } else { ++ u8 hash_mac_check[SHA256_DIGEST_SIZE]; ++ __be16 hmacport = 0; + char *recv_hmac; -+ u8 hash_mac_check[20]; -+ u8 no_key[8]; -+ int msg_parts = 0; + + if (mpcb->mptcp_ver < MPTCP_VERSION_1) + goto skip_hmac_v6; + -+ *(u64 *)no_key = 0; + recv_hmac = (char *)mpadd->u.v6.mac; + if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6_VER1) { + recv_hmac -= sizeof(mpadd->u.v6.port); -+ msg_parts = 2; + } else if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6_VER1 + 2) { -+ msg_parts = 3; ++ hmacport = mpadd->u.v6.port; + } + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)no_key, (u32 *)hash_mac_check, msg_parts, ++ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 3, + 1, (u8 *)&mpadd->addr_id, + 16, (u8 *)&mpadd->u.v6.addr.s6_addr, -+ 2, (u8 *)&mpadd->u.v6.port); -+ if (memcmp(hash_mac_check, recv_hmac, 8) != 0) ++ 2, (u8 *)&hmacport); ++ if (memcmp(&hash_mac_check[SHA256_DIGEST_SIZE - sizeof(u64)], recv_hmac, 8) != 0) + /* ADD_ADDR2 discarded */ + return; +skip_hmac_v6: @@ -16361,8 +16387,6 @@ diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_i + family = AF_INET6; + addr.in6 = mpadd->u.v6.addr; +#endif /* CONFIG_IPV6 */ -+ } else { -+ return; + } + + if (mpcb->pm_ops->add_raddr) @@ -16691,11 +16715,11 @@ diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_i + struct tcp_sock *tp = tcp_sk(sk); + + if (mptcp(tp)) { -+ u8 hash_mac_check[20]; ++ u8 hash_mac_check[SHA256_DIGEST_SIZE]; + struct mptcp_cb *mpcb = tp->mpcb; + + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)&mpcb->mptcp_loc_key, (u32 *)hash_mac_check, 2, ++ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 2, + 4, (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce, + 4, (u8 *)&tp->mptcp->mptcp_loc_nonce); + if (memcmp(hash_mac_check, @@ -16713,7 +16737,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_i + + mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, + (u8 *)&mpcb->mptcp_rem_key, -+ (u32 *)&tp->mptcp->sender_mac[0], 2, ++ tp->mptcp->sender_mac, 2, + 4, (u8 *)&tp->mptcp->mptcp_loc_nonce, + 4, (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce); + @@ -16840,7 +16864,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_input.c mptcp-mptcp_trunk/net/mptcp/mptcp_i +} diff -aurN linux-5.4/net/mptcp/mptcp_ipv4.c mptcp-mptcp_trunk/net/mptcp/mptcp_ipv4.c --- linux-5.4/net/mptcp/mptcp_ipv4.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_ipv4.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_ipv4.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,430 @@ +/* + * MPTCP implementation - IPv4-specific functions @@ -17274,7 +17298,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ipv4.c mptcp-mptcp_trunk/net/mptcp/mptcp_ip +} diff -aurN linux-5.4/net/mptcp/mptcp_ipv6.c mptcp-mptcp_trunk/net/mptcp/mptcp_ipv6.c --- linux-5.4/net/mptcp/mptcp_ipv6.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_ipv6.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_ipv6.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,478 @@ +/* + * MPTCP implementation - IPv6-specific functions @@ -17756,7 +17780,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ipv6.c mptcp-mptcp_trunk/net/mptcp/mptcp_ip +} diff -aurN linux-5.4/net/mptcp/mptcp_ndiffports.c mptcp-mptcp_trunk/net/mptcp/mptcp_ndiffports.c --- linux-5.4/net/mptcp/mptcp_ndiffports.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_ndiffports.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_ndiffports.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,174 @@ +#include + @@ -17934,7 +17958,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_ndiffports.c mptcp-mptcp_trunk/net/mptcp/mp +MODULE_VERSION("0.88"); diff -aurN linux-5.4/net/mptcp/mptcp_netlink.c mptcp-mptcp_trunk/net/mptcp/mptcp_netlink.c --- linux-5.4/net/mptcp/mptcp_netlink.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_netlink.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_netlink.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,1271 @@ +// SPDX-License-Identifier: GPL-2.0 +/* MPTCP implementation - Netlink Path Manager @@ -19209,7 +19233,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_netlink.c mptcp-mptcp_trunk/net/mptcp/mptcp +MODULE_ALIAS_GENL_FAMILY(MPTCP_GENL_NAME); diff -aurN linux-5.4/net/mptcp/mptcp_olia.c mptcp-mptcp_trunk/net/mptcp/mptcp_olia.c --- linux-5.4/net/mptcp/mptcp_olia.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_olia.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_olia.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,318 @@ +/* + * MPTCP implementation - OPPORTUNISTIC LINKED INCREASES CONGESTION CONTROL: @@ -19531,8 +19555,8 @@ diff -aurN linux-5.4/net/mptcp/mptcp_olia.c mptcp-mptcp_trunk/net/mptcp/mptcp_ol +MODULE_VERSION("0.1"); diff -aurN linux-5.4/net/mptcp/mptcp_output.c mptcp-mptcp_trunk/net/mptcp/mptcp_output.c --- linux-5.4/net/mptcp/mptcp_output.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_output.c 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,1988 @@ ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_output.c 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,1994 @@ +/* + * MPTCP implementation - Sending side + * @@ -20847,29 +20871,35 @@ diff -aurN linux-5.4/net/mptcp/mptcp_output.c mptcp-mptcp_trunk/net/mptcp/mptcp_ + + mpadd->kind = TCPOPT_MPTCP; + if (opts->add_addr_v4) { -+ mpadd->sub = MPTCP_SUB_ADD_ADDR; -+ mpadd->ipver = 4; + mpadd->addr_id = opts->add_addr4.addr_id; + mpadd->u.v4.addr = opts->add_addr4.addr; + if (mpcb->mptcp_ver < MPTCP_VERSION_1) { ++ mpadd->u_bit.v0.sub = MPTCP_SUB_ADD_ADDR; ++ mpadd->u_bit.v0.ipver = 4; + mpadd->len = MPTCP_SUB_LEN_ADD_ADDR4; + ptr += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN >> 2; + } else { ++ mpadd->u_bit.v1.sub = MPTCP_SUB_ADD_ADDR; ++ mpadd->u_bit.v1.rsv = 0; ++ mpadd->u_bit.v1.echo = 0; + memcpy((char *)mpadd->u.v4.mac - 2, + (char *)&opts->add_addr4.trunc_mac, 8); + mpadd->len = MPTCP_SUB_LEN_ADD_ADDR4_VER1; + ptr += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1 >> 2; + } + } else if (opts->add_addr_v6) { -+ mpadd->sub = MPTCP_SUB_ADD_ADDR; -+ mpadd->ipver = 6; + mpadd->addr_id = opts->add_addr6.addr_id; + memcpy(&mpadd->u.v6.addr, &opts->add_addr6.addr, + sizeof(mpadd->u.v6.addr)); + if (mpcb->mptcp_ver < MPTCP_VERSION_1) { ++ mpadd->u_bit.v0.sub = MPTCP_SUB_ADD_ADDR; ++ mpadd->u_bit.v0.ipver = 6; + mpadd->len = MPTCP_SUB_LEN_ADD_ADDR6; + ptr += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN >> 2; + } else { ++ mpadd->u_bit.v1.sub = MPTCP_SUB_ADD_ADDR; ++ mpadd->u_bit.v1.rsv = 0; ++ mpadd->u_bit.v1.echo = 0; + memcpy((char *)mpadd->u.v6.mac - 2, + (char *)&opts->add_addr6.trunc_mac, 8); + mpadd->len = MPTCP_SUB_LEN_ADD_ADDR6_VER1; @@ -21523,7 +21553,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_output.c mptcp-mptcp_trunk/net/mptcp/mptcp_ + diff -aurN linux-5.4/net/mptcp/mptcp_pm.c mptcp-mptcp_trunk/net/mptcp/mptcp_pm.c --- linux-5.4/net/mptcp/mptcp_pm.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_pm.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_pm.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,226 @@ +/* + * MPTCP implementation - MPTCP-subflow-management @@ -21753,7 +21783,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_pm.c mptcp-mptcp_trunk/net/mptcp/mptcp_pm.c +late_initcall(mptcp_path_manager_default); diff -aurN linux-5.4/net/mptcp/mptcp_redundant.c mptcp-mptcp_trunk/net/mptcp/mptcp_redundant.c --- linux-5.4/net/mptcp/mptcp_redundant.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_redundant.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_redundant.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,392 @@ +/* + * MPTCP Scheduler to reduce latency and jitter. @@ -22149,7 +22179,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_redundant.c mptcp-mptcp_trunk/net/mptcp/mpt +MODULE_VERSION("0.90"); diff -aurN linux-5.4/net/mptcp/mptcp_rr.c mptcp-mptcp_trunk/net/mptcp/mptcp_rr.c --- linux-5.4/net/mptcp/mptcp_rr.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_rr.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_rr.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,309 @@ +/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ + @@ -22462,8 +22492,8 @@ diff -aurN linux-5.4/net/mptcp/mptcp_rr.c mptcp-mptcp_trunk/net/mptcp/mptcp_rr.c +MODULE_VERSION("0.89"); diff -aurN linux-5.4/net/mptcp/mptcp_sched.c mptcp-mptcp_trunk/net/mptcp/mptcp_sched.c --- linux-5.4/net/mptcp/mptcp_sched.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_sched.c 2020-05-16 10:39:52.000000000 +0200 -@@ -0,0 +1,646 @@ ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_sched.c 2020-06-08 19:41:07.000000000 +0200 +@@ -0,0 +1,647 @@ +/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ + +#include @@ -22838,8 +22868,8 @@ diff -aurN linux-5.4/net/mptcp/mptcp_sched.c mptcp-mptcp_trunk/net/mptcp/mptcp_s + if (!skb && meta_sk->sk_socket && + test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && + sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -+ struct sock *subsk = get_available_subflow(meta_sk, NULL, -+ false); ++ struct sock *subsk = mpcb->sched_ops->get_subflow(meta_sk, NULL, ++ false); + if (!subsk) + return NULL; + @@ -22851,7 +22881,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_sched.c mptcp-mptcp_trunk/net/mptcp/mptcp_s + return skb; +} + -+static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, ++struct sk_buff *mptcp_next_segment(struct sock *meta_sk, + int *reinject, + struct sock **subsk, + unsigned int *limit) @@ -22869,7 +22899,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_sched.c mptcp-mptcp_trunk/net/mptcp/mptcp_s + if (!skb) + return NULL; + -+ *subsk = get_available_subflow(meta_sk, skb, false); ++ *subsk = tcp_sk(meta_sk)->mpcb->sched_ops->get_subflow(meta_sk, skb, false); + if (!*subsk) + return NULL; + @@ -22927,6 +22957,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_sched.c mptcp-mptcp_trunk/net/mptcp/mptcp_s + + return skb; +} ++EXPORT_SYMBOL_GPL(mptcp_next_segment); + +static void defsched_init(struct sock *sk) +{ @@ -23112,7 +23143,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_sched.c mptcp-mptcp_trunk/net/mptcp/mptcp_s +late_initcall(mptcp_scheduler_default); diff -aurN linux-5.4/net/mptcp/mptcp_wvegas.c mptcp-mptcp_trunk/net/mptcp/mptcp_wvegas.c --- linux-5.4/net/mptcp/mptcp_wvegas.c 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_trunk/net/mptcp/mptcp_wvegas.c 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/net/mptcp/mptcp_wvegas.c 2020-06-08 19:41:07.000000000 +0200 @@ -0,0 +1,271 @@ +/* + * MPTCP implementation - WEIGHTED VEGAS @@ -23387,7 +23418,7 @@ diff -aurN linux-5.4/net/mptcp/mptcp_wvegas.c mptcp-mptcp_trunk/net/mptcp/mptcp_ +MODULE_VERSION("0.1"); diff -aurN linux-5.4/tools/include/uapi/linux/bpf.h mptcp-mptcp_trunk/tools/include/uapi/linux/bpf.h --- linux-5.4/tools/include/uapi/linux/bpf.h 2019-11-25 01:32:01.000000000 +0100 -+++ mptcp-mptcp_trunk/tools/include/uapi/linux/bpf.h 2020-05-16 10:39:52.000000000 +0200 ++++ mptcp-mptcp_trunk/tools/include/uapi/linux/bpf.h 2020-06-08 19:41:07.000000000 +0200 @@ -3438,6 +3438,7 @@ BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ diff --git a/root/target/linux/generic/hack-5.4/691-mptcp_ecf.patch b/root/target/linux/generic/hack-5.4/691-mptcp_ecf.patch deleted file mode 100644 index 4925612c..00000000 --- a/root/target/linux/generic/hack-5.4/691-mptcp_ecf.patch +++ /dev/null @@ -1,988 +0,0 @@ -From 025619486cf04c0beb9f395609d7711726fd63c6 Mon Sep 17 00:00:00 2001 -From: Daniel Weber -Date: Mon, 5 Aug 2019 14:02:30 +0200 -Subject: [PATCH 1/3] mptcp: Earliest Completion First (ECF) Scheduler - -This scheduler works much like the default MPTCP scheduler. It always -prefers the subflow with the smallest round-trip-time that is available. - -Signed-off-by: Daniel Weber ---- - net/mptcp/Kconfig | 6 + - net/mptcp/Makefile | 1 + - net/mptcp/mptcp_ecf.c | 384 ++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 391 insertions(+) - create mode 100644 net/mptcp/mptcp_ecf.c - -diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig -index 37f3af3db2a6..829ea084cf70 100644 ---- a/net/mptcp/Kconfig -+++ b/net/mptcp/Kconfig -@@ -109,6 +109,12 @@ config MPTCP_REDUNDANT - This scheduler sends all packets redundantly over all subflows to decreases - latency and jitter on the cost of lower throughput. - -+config MPTCP_ECF -+ tristate "MPTCP ECF" -+ depends on (MPTCP=y) -+ ---help--- -+ This is an experimental Earliest Completion First (ECF) scheduler. -+ - choice - prompt "Default MPTCP Scheduler" - default DEFAULT_SCHEDULER -diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile -index 82a2d4d945ae..369248a2f68e 100644 ---- a/net/mptcp/Makefile -+++ b/net/mptcp/Makefile -@@ -20,5 +20,6 @@ obj-$(CONFIG_MPTCP_NETLINK) += mptcp_netlink.o - obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o - obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o - obj-$(CONFIG_MPTCP_BLEST) += mptcp_blest.o -+obj-$(CONFIG_MPTCP_ECF) += mptcp_ecf.o - - mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o -diff --git a/net/mptcp/mptcp_ecf.c b/net/mptcp/mptcp_ecf.c -new file mode 100644 -index 000000000000..e0bd430a8943 ---- /dev/null -+++ b/net/mptcp/mptcp_ecf.c -@@ -0,0 +1,384 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* MPTCP ECF Scheduler -+ * -+ * Algorithm Design: -+ * Yeon-sup Lim -+ * Don Towsley -+ * Erich M. Nahum -+ * Richard J. Gibbens -+ * -+ * Initial Implementation: -+ * Yeon-sup Lim -+ * -+ * Additional Authors: -+ * Daniel Weber -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+#include -+ -+static unsigned int mptcp_ecf_r_beta __read_mostly = 4; /* beta = 1/r_beta = 0.25 */ -+module_param(mptcp_ecf_r_beta, int, 0644); -+MODULE_PARM_DESC(mptcp_ecf_r_beta, "beta for ECF"); -+ -+struct ecfsched_priv { -+ u32 last_rbuf_opti; -+}; -+ -+struct ecfsched_cb { -+ u32 switching_margin; /* this is "waiting" in algorithm description */ -+}; -+ -+static struct ecfsched_priv *ecfsched_get_priv(const struct tcp_sock *tp) -+{ -+ return (struct ecfsched_priv *)&tp->mptcp->mptcp_sched[0]; -+} -+ -+static struct ecfsched_cb *ecfsched_get_cb(const struct tcp_sock *tp) -+{ -+ return (struct ecfsched_cb *)&tp->mpcb->mptcp_sched[0]; -+} -+ -+/* This is the ECF scheduler. This function decides on which flow to send -+ * a given MSS. If all subflows are found to be busy or the currently best -+ * subflow is estimated to be slower than waiting for minsk, NULL is returned. -+ */ -+static struct sock *ecf_get_available_subflow(struct sock *meta_sk, -+ struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sock *bestsk, *minsk = NULL; -+ struct tcp_sock *besttp; -+ struct mptcp_tcp_sock *mptcp; -+ struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(meta_sk)); -+ u32 min_srtt = U32_MAX; -+ u32 sub_sndbuf = 0; -+ u32 sub_packets_out = 0; -+ -+ /* Answer data_fin on same subflow!!! */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && -+ skb && mptcp_is_data_fin(skb)) { -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index && -+ mptcp_is_available(bestsk, skb, zero_wnd_test)) -+ return bestsk; -+ } -+ } -+ -+ /* First, find the overall best (fastest) subflow */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ besttp = tcp_sk(bestsk); -+ -+ /* Set of states for which we are allowed to send data */ -+ if (!mptcp_sk_can_send(bestsk)) -+ continue; -+ -+ /* We do not send data on this subflow unless it is -+ * fully established, i.e. the 4th ack has been received. -+ */ -+ if (besttp->mptcp->pre_established) -+ continue; -+ -+ sub_sndbuf += bestsk->sk_wmem_queued; -+ sub_packets_out += besttp->packets_out; -+ -+ /* record minimal rtt */ -+ if (besttp->srtt_us < min_srtt) { -+ min_srtt = besttp->srtt_us; -+ minsk = bestsk; -+ } -+ } -+ -+ /* find the current best subflow according to the default scheduler */ -+ bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test); -+ -+ /* if we decided to use a slower flow, we have the option of not using it at all */ -+ if (bestsk && minsk && bestsk != minsk) { -+ u32 mss = tcp_current_mss(bestsk); /* assuming equal MSS */ -+ u32 sndbuf_meta = meta_sk->sk_wmem_queued; -+ u32 sndbuf_minus = sub_sndbuf; -+ u32 sndbuf = 0; -+ -+ u32 cwnd_f = tcp_sk(minsk)->snd_cwnd; -+ u32 srtt_f = tcp_sk(minsk)->srtt_us >> 3; -+ u32 rttvar_f = tcp_sk(minsk)->rttvar_us >> 1; -+ -+ u32 cwnd_s = tcp_sk(bestsk)->snd_cwnd; -+ u32 srtt_s = tcp_sk(bestsk)->srtt_us >> 3; -+ u32 rttvar_s = tcp_sk(bestsk)->rttvar_us >> 1; -+ -+ u32 delta = max(rttvar_f, rttvar_s); -+ -+ u32 x_f; -+ u64 lhs, rhs; /* to avoid overflow, using u64 */ -+ -+ if (tcp_sk(meta_sk)->packets_out > sub_packets_out) -+ sndbuf_minus += (tcp_sk(meta_sk)->packets_out - sub_packets_out) * mss; -+ -+ if (sndbuf_meta > sndbuf_minus) -+ sndbuf = sndbuf_meta - sndbuf_minus; -+ -+ /* we have something to send. -+ * at least one time tx over fastest subflow is required -+ */ -+ x_f = sndbuf > cwnd_f * mss ? sndbuf : cwnd_f * mss; -+ lhs = srtt_f * (x_f + cwnd_f * mss); -+ rhs = cwnd_f * mss * (srtt_s + delta); -+ -+ if (mptcp_ecf_r_beta * lhs < mptcp_ecf_r_beta * rhs + ecf_cb->switching_margin * rhs) { -+ u32 x_s = sndbuf > cwnd_s * mss ? sndbuf : cwnd_s * mss; -+ u64 lhs_s = srtt_s * x_s; -+ u64 rhs_s = cwnd_s * mss * (2 * srtt_f + delta); -+ -+ if (lhs_s >= rhs_s) { -+ /* too slower than fastest */ -+ ecf_cb->switching_margin = 1; -+ return NULL; -+ } -+ } else { -+ /* use slower one */ -+ ecf_cb->switching_margin = 0; -+ } -+ } -+ -+ return bestsk; -+} -+ -+/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */ -+static struct sk_buff *mptcp_ecf_rcv_buf_optimization(struct sock *sk, int penal) -+{ -+ struct sock *meta_sk; -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_tcp_sock *mptcp; -+ struct sk_buff *skb_head; -+ struct ecfsched_priv *ecf_p = ecfsched_get_priv(tp); -+ -+ meta_sk = mptcp_meta_sk(sk); -+ skb_head = tcp_rtx_queue_head(meta_sk); -+ -+ if (!skb_head) -+ return NULL; -+ -+ /* If penalization is optional (coming from mptcp_next_segment() and -+ * We are not send-buffer-limited we do not penalize. The retransmission -+ * is just an optimization to fix the idle-time due to the delay before -+ * we wake up the application. -+ */ -+ if (!penal && sk_stream_memory_free(meta_sk)) -+ goto retrans; -+ -+ /* Only penalize again after an RTT has elapsed */ -+ if (tcp_jiffies32 - ecf_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3)) -+ goto retrans; -+ -+ /* Half the cwnd of the slow flows */ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) { -+ u32 prior_cwnd = tp_it->snd_cwnd; -+ -+ tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U); -+ -+ /* If in slow start, do not reduce the ssthresh */ -+ if (prior_cwnd >= tp_it->snd_ssthresh) -+ tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U); -+ -+ ecf_p->last_rbuf_opti = tcp_jiffies32; -+ } -+ } -+ } -+ -+retrans: -+ -+ /* Segment not yet injected into this path? Take it!!! */ -+ if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) { -+ bool do_retrans = false; -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp_it->snd_cwnd <= 4) { -+ do_retrans = true; -+ break; -+ } -+ -+ if (4 * tp->srtt_us >= tp_it->srtt_us) { -+ do_retrans = false; -+ break; -+ } else { -+ do_retrans = true; -+ } -+ } -+ } -+ -+ if (do_retrans && mptcp_is_available(sk, skb_head, false)) { -+ trace_mptcp_retransmit(sk, skb_head); -+ return skb_head; -+ } -+ } -+ return NULL; -+} -+ -+/* copy from mptcp_sched.c: __mptcp_next_segment */ -+/* Returns the next segment to be sent from the mptcp meta-queue. -+ * (chooses the reinject queue if any segment is waiting in it, otherwise, -+ * chooses the normal write queue). -+ * Sets *@reinject to 1 if the returned segment comes from the -+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, -+ * and sets it to -1 if it is a meta-level retransmission to optimize the -+ * receive-buffer. -+ */ -+static struct sk_buff *__mptcp_ecf_next_segment(struct sock *meta_sk, int *reinject) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sk_buff *skb = NULL; -+ -+ *reinject = 0; -+ -+ /* If we are in fallback-mode, just take from the meta-send-queue */ -+ if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) -+ return tcp_send_head(meta_sk); -+ -+ skb = skb_peek(&mpcb->reinject_queue); -+ -+ if (skb) { -+ *reinject = 1; -+ } else { -+ skb = tcp_send_head(meta_sk); -+ -+ if (!skb && meta_sk->sk_socket && -+ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && -+ sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -+ struct sock *subsk = ecf_get_available_subflow(meta_sk, NULL, -+ false); -+ if (!subsk) -+ return NULL; -+ -+ skb = mptcp_ecf_rcv_buf_optimization(subsk, 0); -+ if (skb) -+ *reinject = -1; -+ } -+ } -+ return skb; -+} -+ -+/* copy from mptcp_sched.c: mptcp_next_segment */ -+static struct sk_buff *mptcp_ecf_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit) -+{ -+ struct sk_buff *skb = __mptcp_ecf_next_segment(meta_sk, reinject); -+ unsigned int mss_now; -+ struct tcp_sock *subtp; -+ u16 gso_max_segs; -+ u32 max_len, max_segs, window, needed; -+ -+ /* As we set it, we have to reset it as well. */ -+ *limit = 0; -+ -+ if (!skb) -+ return NULL; -+ -+ *subsk = ecf_get_available_subflow(meta_sk, skb, false); -+ if (!*subsk) -+ return NULL; -+ -+ subtp = tcp_sk(*subsk); -+ mss_now = tcp_current_mss(*subsk); -+ -+ if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) { -+ skb = mptcp_ecf_rcv_buf_optimization(*subsk, 1); -+ if (skb) -+ *reinject = -1; -+ else -+ return NULL; -+ } -+ -+ /* No splitting required, as we will only send one single segment */ -+ if (skb->len <= mss_now) -+ return skb; -+ -+ /* The following is similar to tcp_mss_split_point, but -+ * we do not care about nagle, because we will anyways -+ * use TCP_NAGLE_PUSH, which overrides this. -+ * -+ * So, we first limit according to the cwnd/gso-size and then according -+ * to the subflow's window. -+ */ -+ -+ gso_max_segs = (*subsk)->sk_gso_max_segs; -+ if (!gso_max_segs) /* No gso supported on the subflow's NIC */ -+ gso_max_segs = 1; -+ max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); -+ if (!max_segs) -+ return NULL; -+ -+ max_len = mss_now * max_segs; -+ window = tcp_wnd_end(subtp) - subtp->write_seq; -+ -+ needed = min(skb->len, window); -+ if (max_len <= skb->len) -+ /* Take max_win, which is actually the cwnd/gso-size */ -+ *limit = max_len; -+ else -+ /* Or, take the window */ -+ *limit = needed; -+ -+ return skb; -+} -+ -+static void ecfsched_init(struct sock *sk) -+{ -+ struct ecfsched_priv *ecf_p = ecfsched_get_priv(tcp_sk(sk)); -+ struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(mptcp_meta_sk(sk))); -+ -+ ecf_p->last_rbuf_opti = tcp_jiffies32; -+ ecf_cb->switching_margin = 0; -+} -+ -+struct mptcp_sched_ops mptcp_sched_ecf = { -+ .get_subflow = ecf_get_available_subflow, -+ .next_segment = mptcp_ecf_next_segment, -+ .init = ecfsched_init, -+ .name = "ecf", -+ .owner = THIS_MODULE, -+}; -+ -+static int __init ecf_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct ecfsched_priv) > MPTCP_SCHED_SIZE); -+ BUILD_BUG_ON(sizeof(struct ecfsched_cb) > MPTCP_SCHED_DATA_SIZE); -+ -+ if (mptcp_register_scheduler(&mptcp_sched_ecf)) -+ return -1; -+ -+ return 0; -+} -+ -+static void ecf_unregister(void) -+{ -+ mptcp_unregister_scheduler(&mptcp_sched_ecf); -+} -+ -+module_init(ecf_register); -+module_exit(ecf_unregister); -+ -+MODULE_AUTHOR("Yeon-sup Lim, Daniel Weber"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("ECF (Earliest Completion First) scheduler for MPTCP, based on default minimum RTT scheduler"); -+MODULE_VERSION("0.95"); - -From 5a9641c84cbb5a49749d7533c20035631985dbe7 Mon Sep 17 00:00:00 2001 -From: Daniel Weber -Date: Mon, 9 Mar 2020 11:00:23 +0100 -Subject: [PATCH 2/3] mptcp: Reduce code-duplication for other schedulers - -'mptcp_next_segment' now honors the function pointer to the actual part -that makes the scheduling decision in 'sched_ops->get_subflow'. This -allows for a better reuse by other schedulers. - -The BLEST scheduler needs to adapt the direction of lambda value change -depending on the occurrence of a retransmission. In order to remove the -copied 'mptcp_rcv_buf_optimization' as well the scheduler now checks the -tcp 'retrans_stamp' of the meta socket. - -Signed-off-by: Daniel Weber ---- - include/net/mptcp.h | 4 + - net/mptcp/mptcp_blest.c | 200 +--------------------------------------- - net/mptcp/mptcp_sched.c | 9 +- - 3 files changed, 11 insertions(+), 202 deletions(-) - -diff --git a/include/net/mptcp.h b/include/net/mptcp.h -index 02312c9ea3a3..82f66ce206cc 100644 ---- a/include/net/mptcp.h -+++ b/include/net/mptcp.h -@@ -902,6 +902,10 @@ bool subflow_is_active(const struct tcp_sock *tp); - bool subflow_is_backup(const struct tcp_sock *tp); - struct sock *get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, - bool zero_wnd_test); -+struct sk_buff *mptcp_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit); - extern struct mptcp_sched_ops mptcp_sched_default; - - /* Initializes function-pointers and MPTCP-flags */ -diff --git a/net/mptcp/mptcp_blest.c b/net/mptcp/mptcp_blest.c -index 40905a0d1fe5..22e25dd0d44e 100644 ---- a/net/mptcp/mptcp_blest.c -+++ b/net/mptcp/mptcp_blest.c -@@ -21,7 +21,6 @@ - - #include - #include --#include - - static unsigned char lambda __read_mostly = 12; - module_param(lambda, byte, 0644); -@@ -50,7 +49,6 @@ struct blestsched_priv { - }; - - struct blestsched_cb { -- bool retrans_flag; - s16 lambda_1000; /* values range from min_lambda * 100 to max_lambda * 100 */ - u32 last_lambda_update; - }; -@@ -77,14 +75,13 @@ static void blestsched_update_lambda(struct sock *meta_sk, struct sock *sk) - * during the slow flows last RTT => increase lambda - * otherwise decrease - */ -- if (blest_cb->retrans_flag) { -+ if (tcp_sk(meta_sk)->retrans_stamp) { - /* need to slow down on the slow flow */ - blest_cb->lambda_1000 += dyn_lambda_bad; - } else { - /* use the slow flow more */ - blest_cb->lambda_1000 -= dyn_lambda_good; - } -- blest_cb->retrans_flag = false; - - /* cap lambda_1000 to its value range */ - blest_cb->lambda_1000 = min_t(s16, blest_cb->lambda_1000, max_lambda * 100); -@@ -240,199 +237,6 @@ struct sock *blest_get_available_subflow(struct sock *meta_sk, struct sk_buff *s - return bestsk; - } - --/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */ --static struct sk_buff *mptcp_blest_rcv_buf_optimization(struct sock *sk, int penal) --{ -- struct sock *meta_sk; -- const struct tcp_sock *tp = tcp_sk(sk); -- struct mptcp_tcp_sock *mptcp; -- struct sk_buff *skb_head; -- struct blestsched_priv *blest_p = blestsched_get_priv(tp); -- struct blestsched_cb *blest_cb; -- -- meta_sk = mptcp_meta_sk(sk); -- skb_head = tcp_rtx_queue_head(meta_sk); -- -- if (!skb_head) -- return NULL; -- -- /* If penalization is optional (coming from mptcp_next_segment() and -- * We are not send-buffer-limited we do not penalize. The retransmission -- * is just an optimization to fix the idle-time due to the delay before -- * we wake up the application. -- */ -- if (!penal && sk_stream_memory_free(meta_sk)) -- goto retrans; -- -- /* Record the occurrence of a retransmission to update the lambda value */ -- blest_cb = blestsched_get_cb(tcp_sk(meta_sk)); -- blest_cb->retrans_flag = true; -- -- /* Only penalize again after an RTT has elapsed */ -- if (tcp_jiffies32 - blest_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3)) -- goto retrans; -- -- /* Half the cwnd of the slow flows */ -- mptcp_for_each_sub(tp->mpcb, mptcp) { -- struct tcp_sock *tp_it = mptcp->tp; -- -- if (tp_it != tp && -- TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -- if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) { -- u32 prior_cwnd = tp_it->snd_cwnd; -- -- tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U); -- -- /* If in slow start, do not reduce the ssthresh */ -- if (prior_cwnd >= tp_it->snd_ssthresh) -- tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U); -- -- blest_p->last_rbuf_opti = tcp_jiffies32; -- } -- } -- } -- --retrans: -- -- /* Segment not yet injected into this path? Take it!!! */ -- if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) { -- bool do_retrans = false; -- mptcp_for_each_sub(tp->mpcb, mptcp) { -- struct tcp_sock *tp_it = mptcp->tp; -- -- if (tp_it != tp && -- TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -- if (tp_it->snd_cwnd <= 4) { -- do_retrans = true; -- break; -- } -- -- if (4 * tp->srtt_us >= tp_it->srtt_us) { -- do_retrans = false; -- break; -- } else { -- do_retrans = true; -- } -- } -- } -- -- if (do_retrans && mptcp_is_available(sk, skb_head, false)) { -- trace_mptcp_retransmit(sk, skb_head); -- return skb_head; -- } -- } -- return NULL; --} -- --/* copy from mptcp_sched.c: __mptcp_next_segment */ --/* Returns the next segment to be sent from the mptcp meta-queue. -- * (chooses the reinject queue if any segment is waiting in it, otherwise, -- * chooses the normal write queue). -- * Sets *@reinject to 1 if the returned segment comes from the -- * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, -- * and sets it to -1 if it is a meta-level retransmission to optimize the -- * receive-buffer. -- */ --static struct sk_buff *__mptcp_blest_next_segment(struct sock *meta_sk, int *reinject) --{ -- const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -- struct sk_buff *skb = NULL; -- -- *reinject = 0; -- -- /* If we are in fallback-mode, just take from the meta-send-queue */ -- if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) -- return tcp_send_head(meta_sk); -- -- skb = skb_peek(&mpcb->reinject_queue); -- -- if (skb) { -- *reinject = 1; -- } else { -- skb = tcp_send_head(meta_sk); -- -- if (!skb && meta_sk->sk_socket && -- test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && -- sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -- struct sock *subsk = blest_get_available_subflow(meta_sk, NULL, -- false); -- if (!subsk) -- return NULL; -- -- skb = mptcp_blest_rcv_buf_optimization(subsk, 0); -- if (skb) -- *reinject = -1; -- } -- } -- return skb; --} -- --/* copy from mptcp_sched.c: mptcp_next_segment */ --static struct sk_buff *mptcp_blest_next_segment(struct sock *meta_sk, -- int *reinject, -- struct sock **subsk, -- unsigned int *limit) --{ -- struct sk_buff *skb = __mptcp_blest_next_segment(meta_sk, reinject); -- unsigned int mss_now; -- struct tcp_sock *subtp; -- u16 gso_max_segs; -- u32 max_len, max_segs, window, needed; -- -- /* As we set it, we have to reset it as well. */ -- *limit = 0; -- -- if (!skb) -- return NULL; -- -- *subsk = blest_get_available_subflow(meta_sk, skb, false); -- if (!*subsk) -- return NULL; -- -- subtp = tcp_sk(*subsk); -- mss_now = tcp_current_mss(*subsk); -- -- if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) { -- skb = mptcp_blest_rcv_buf_optimization(*subsk, 1); -- if (skb) -- *reinject = -1; -- else -- return NULL; -- } -- -- /* No splitting required, as we will only send one single segment */ -- if (skb->len <= mss_now) -- return skb; -- -- /* The following is similar to tcp_mss_split_point, but -- * we do not care about nagle, because we will anyways -- * use TCP_NAGLE_PUSH, which overrides this. -- * -- * So, we first limit according to the cwnd/gso-size and then according -- * to the subflow's window. -- */ -- -- gso_max_segs = (*subsk)->sk_gso_max_segs; -- if (!gso_max_segs) /* No gso supported on the subflow's NIC */ -- gso_max_segs = 1; -- max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); -- if (!max_segs) -- return NULL; -- -- max_len = mss_now * max_segs; -- window = tcp_wnd_end(subtp) - subtp->write_seq; -- -- needed = min(skb->len, window); -- if (max_len <= skb->len) -- /* Take max_win, which is actually the cwnd/gso-size */ -- *limit = max_len; -- else -- /* Or, take the window */ -- *limit = needed; -- -- return skb; --} -- - static void blestsched_init(struct sock *sk) - { - struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk)); -@@ -450,7 +254,7 @@ static void blestsched_init(struct sock *sk) - - static struct mptcp_sched_ops mptcp_sched_blest = { - .get_subflow = blest_get_available_subflow, -- .next_segment = mptcp_blest_next_segment, -+ .next_segment = mptcp_next_segment, - .init = blestsched_init, - .name = "blest", - .owner = THIS_MODULE, -diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c -index 18c3559b0d48..5bf2946a5caf 100644 ---- a/net/mptcp/mptcp_sched.c -+++ b/net/mptcp/mptcp_sched.c -@@ -372,8 +372,8 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject) - if (!skb && meta_sk->sk_socket && - test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && - sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -- struct sock *subsk = get_available_subflow(meta_sk, NULL, -- false); -+ struct sock *subsk = mpcb->sched_ops->get_subflow(meta_sk, NULL, -+ false); - if (!subsk) - return NULL; - -@@ -385,7 +385,7 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject) - return skb; - } - --static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, -+struct sk_buff *mptcp_next_segment(struct sock *meta_sk, - int *reinject, - struct sock **subsk, - unsigned int *limit) -@@ -402,7 +402,7 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, - if (!skb) - return NULL; - -- *subsk = get_available_subflow(meta_sk, skb, false); -+ *subsk = tcp_sk(meta_sk)->mpcb->sched_ops->get_subflow(meta_sk, skb, false); - if (!*subsk) - return NULL; - -@@ -449,6 +449,7 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk, - - return skb; - } -+EXPORT_SYMBOL_GPL(mptcp_next_segment); - - static void defsched_init(struct sock *sk) - { - -From 5e8425e43b38e7e0fe566ffd50e197c07807ebdf Mon Sep 17 00:00:00 2001 -From: Daniel Weber -Date: Mon, 9 Mar 2020 11:09:27 +0100 -Subject: [PATCH 3/3] mptcp: Remove code-duplication from ECF scheduler - -The ECF scheduler relies on large parts of the default scheduler. This -commit removes the copied blocks and reuses 'mptcp_next_segment' and -'mptcp_rcv_buf_optimization' directly from it via function pointers. - -Signed-off-by: Daniel Weber ---- - net/mptcp/mptcp_ecf.c | 191 +----------------------------------------- - 1 file changed, 1 insertion(+), 190 deletions(-) - -diff --git a/net/mptcp/mptcp_ecf.c b/net/mptcp/mptcp_ecf.c -index e0bd430a8943..6b976b2b0c72 100644 ---- a/net/mptcp/mptcp_ecf.c -+++ b/net/mptcp/mptcp_ecf.c -@@ -21,7 +21,6 @@ - - #include - #include --#include - - static unsigned int mptcp_ecf_r_beta __read_mostly = 4; /* beta = 1/r_beta = 0.25 */ - module_param(mptcp_ecf_r_beta, int, 0644); -@@ -154,194 +153,6 @@ static struct sock *ecf_get_available_subflow(struct sock *meta_sk, - return bestsk; - } - --/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */ --static struct sk_buff *mptcp_ecf_rcv_buf_optimization(struct sock *sk, int penal) --{ -- struct sock *meta_sk; -- const struct tcp_sock *tp = tcp_sk(sk); -- struct mptcp_tcp_sock *mptcp; -- struct sk_buff *skb_head; -- struct ecfsched_priv *ecf_p = ecfsched_get_priv(tp); -- -- meta_sk = mptcp_meta_sk(sk); -- skb_head = tcp_rtx_queue_head(meta_sk); -- -- if (!skb_head) -- return NULL; -- -- /* If penalization is optional (coming from mptcp_next_segment() and -- * We are not send-buffer-limited we do not penalize. The retransmission -- * is just an optimization to fix the idle-time due to the delay before -- * we wake up the application. -- */ -- if (!penal && sk_stream_memory_free(meta_sk)) -- goto retrans; -- -- /* Only penalize again after an RTT has elapsed */ -- if (tcp_jiffies32 - ecf_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3)) -- goto retrans; -- -- /* Half the cwnd of the slow flows */ -- mptcp_for_each_sub(tp->mpcb, mptcp) { -- struct tcp_sock *tp_it = mptcp->tp; -- -- if (tp_it != tp && -- TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -- if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) { -- u32 prior_cwnd = tp_it->snd_cwnd; -- -- tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U); -- -- /* If in slow start, do not reduce the ssthresh */ -- if (prior_cwnd >= tp_it->snd_ssthresh) -- tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U); -- -- ecf_p->last_rbuf_opti = tcp_jiffies32; -- } -- } -- } -- --retrans: -- -- /* Segment not yet injected into this path? Take it!!! */ -- if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) { -- bool do_retrans = false; -- mptcp_for_each_sub(tp->mpcb, mptcp) { -- struct tcp_sock *tp_it = mptcp->tp; -- -- if (tp_it != tp && -- TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -- if (tp_it->snd_cwnd <= 4) { -- do_retrans = true; -- break; -- } -- -- if (4 * tp->srtt_us >= tp_it->srtt_us) { -- do_retrans = false; -- break; -- } else { -- do_retrans = true; -- } -- } -- } -- -- if (do_retrans && mptcp_is_available(sk, skb_head, false)) { -- trace_mptcp_retransmit(sk, skb_head); -- return skb_head; -- } -- } -- return NULL; --} -- --/* copy from mptcp_sched.c: __mptcp_next_segment */ --/* Returns the next segment to be sent from the mptcp meta-queue. -- * (chooses the reinject queue if any segment is waiting in it, otherwise, -- * chooses the normal write queue). -- * Sets *@reinject to 1 if the returned segment comes from the -- * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, -- * and sets it to -1 if it is a meta-level retransmission to optimize the -- * receive-buffer. -- */ --static struct sk_buff *__mptcp_ecf_next_segment(struct sock *meta_sk, int *reinject) --{ -- const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -- struct sk_buff *skb = NULL; -- -- *reinject = 0; -- -- /* If we are in fallback-mode, just take from the meta-send-queue */ -- if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) -- return tcp_send_head(meta_sk); -- -- skb = skb_peek(&mpcb->reinject_queue); -- -- if (skb) { -- *reinject = 1; -- } else { -- skb = tcp_send_head(meta_sk); -- -- if (!skb && meta_sk->sk_socket && -- test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && -- sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -- struct sock *subsk = ecf_get_available_subflow(meta_sk, NULL, -- false); -- if (!subsk) -- return NULL; -- -- skb = mptcp_ecf_rcv_buf_optimization(subsk, 0); -- if (skb) -- *reinject = -1; -- } -- } -- return skb; --} -- --/* copy from mptcp_sched.c: mptcp_next_segment */ --static struct sk_buff *mptcp_ecf_next_segment(struct sock *meta_sk, -- int *reinject, -- struct sock **subsk, -- unsigned int *limit) --{ -- struct sk_buff *skb = __mptcp_ecf_next_segment(meta_sk, reinject); -- unsigned int mss_now; -- struct tcp_sock *subtp; -- u16 gso_max_segs; -- u32 max_len, max_segs, window, needed; -- -- /* As we set it, we have to reset it as well. */ -- *limit = 0; -- -- if (!skb) -- return NULL; -- -- *subsk = ecf_get_available_subflow(meta_sk, skb, false); -- if (!*subsk) -- return NULL; -- -- subtp = tcp_sk(*subsk); -- mss_now = tcp_current_mss(*subsk); -- -- if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) { -- skb = mptcp_ecf_rcv_buf_optimization(*subsk, 1); -- if (skb) -- *reinject = -1; -- else -- return NULL; -- } -- -- /* No splitting required, as we will only send one single segment */ -- if (skb->len <= mss_now) -- return skb; -- -- /* The following is similar to tcp_mss_split_point, but -- * we do not care about nagle, because we will anyways -- * use TCP_NAGLE_PUSH, which overrides this. -- * -- * So, we first limit according to the cwnd/gso-size and then according -- * to the subflow's window. -- */ -- -- gso_max_segs = (*subsk)->sk_gso_max_segs; -- if (!gso_max_segs) /* No gso supported on the subflow's NIC */ -- gso_max_segs = 1; -- max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); -- if (!max_segs) -- return NULL; -- -- max_len = mss_now * max_segs; -- window = tcp_wnd_end(subtp) - subtp->write_seq; -- -- needed = min(skb->len, window); -- if (max_len <= skb->len) -- /* Take max_win, which is actually the cwnd/gso-size */ -- *limit = max_len; -- else -- /* Or, take the window */ -- *limit = needed; -- -- return skb; --} -- - static void ecfsched_init(struct sock *sk) - { - struct ecfsched_priv *ecf_p = ecfsched_get_priv(tcp_sk(sk)); -@@ -353,7 +164,7 @@ static void ecfsched_init(struct sock *sk) - - struct mptcp_sched_ops mptcp_sched_ecf = { - .get_subflow = ecf_get_available_subflow, -- .next_segment = mptcp_ecf_next_segment, -+ .next_segment = mptcp_next_segment, - .init = ecfsched_init, - .name = "ecf", - .owner = THIS_MODULE,