From ea07e64ebed59b25edb3d69c23a93d4b5f514f8e Mon Sep 17 00:00:00 2001 From: "Ycarus (Yannick Chabanois)" Date: Thu, 29 Apr 2021 21:08:35 +0200 Subject: [PATCH] Update MPTCP with patches from tessares --- .../generic/hack-5.4/690-mptcp_trunk.patch | 209 ++++++++++++++++++ 1 file changed, 209 insertions(+) diff --git a/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch b/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch index b422429c..8e3ee022 100644 --- a/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch +++ b/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch @@ -23987,3 +23987,212 @@ index fc71d41c608d..bdea1a26e3fc 100644 return new_win; } +diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c +index 4b878d14492a..6cb8c5c7d098 100644 +--- a/net/mptcp/mptcp_sched.c ++++ b/net/mptcp/mptcp_sched.c +@@ -388,25 +388,32 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject) + } else { + skb = tcp_send_head(meta_sk); + +- if (!skb && meta_sk->sk_socket && +- test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && +- sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { ++ if (!skb) { + struct sock *subsk; + +- /* meta is send buffer limited */ +- tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED); +- + subsk = mpcb->sched_ops->get_subflow(meta_sk, + NULL, false); + if (!subsk) + return NULL; + +- skb = mptcp_rcv_buf_optimization(subsk, 0); +- if (skb) +- *reinject = -1; +- else ++ if (meta_sk->sk_socket && ++ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && ++ sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { ++ skb = mptcp_rcv_buf_optimization(subsk, 0); ++ if (skb) ++ *reinject = -1; ++ else ++ tcp_chrono_start(subsk, ++ TCP_CHRONO_SNDBUF_LIMITED); ++ } ++ ++ if (!skb) { ++ /* meta is send buffer limited */ ++ tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED); ++ + tcp_chrono_start(subsk, + TCP_CHRONO_SNDBUF_LIMITED); ++ } + } + } + return skb; +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 9d3fa5eb36d9..b6e9d709d1e1 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -350,6 +350,7 @@ int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib); + void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb); + int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask); ++u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now); + unsigned int tcp_mss_split_point(const struct sock *sk, + const struct sk_buff *skb, + unsigned int mss_now, +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index f72edfe89b4d..86bce63ab841 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1781,7 +1781,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + /* Return the number of segments we want in the skb we are transmitting. + * See if congestion control module wants to decide; otherwise, autosize. + */ +-static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) ++u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) + { + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + u32 min_tso, tso_segs; +diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c +index a4d8c4a5e52d..4b878d14492a 100644 +--- a/net/mptcp/mptcp_sched.c ++++ b/net/mptcp/mptcp_sched.c +@@ -1,5 +1,6 @@ + /* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ + ++#include + #include + #include + #include +@@ -37,12 +38,38 @@ bool mptcp_is_def_unavailable(struct sock *sk) + } + EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable); + ++/* estimate number of segments currently in flight + unsent in ++ * the subflow socket. ++ */ ++static int mptcp_subflow_queued(struct sock *sk, u32 max_tso_segs) ++{ ++ const struct tcp_sock *tp = tcp_sk(sk); ++ unsigned int queued; ++ ++ /* estimate the max number of segments in the write queue ++ * this is an overestimation, avoiding to iterate over the queue ++ * to make a better estimation. ++ * Having only one skb in the queue however might trigger tso deferral, ++ * delaying the sending of a tso segment in the hope that skb_entail ++ * will append more data to the skb soon. ++ * Therefore, in the case only one skb is in the queue, we choose to ++ * potentially underestimate, risking to schedule one skb too many onto ++ * the subflow rather than not enough. ++ */ ++ if (sk->sk_write_queue.qlen > 1) ++ queued = sk->sk_write_queue.qlen * max_tso_segs; ++ else ++ queued = sk->sk_write_queue.qlen; ++ ++ return queued + tcp_packets_in_flight(tp); ++} ++ + static bool mptcp_is_temp_unavailable(struct sock *sk, + const struct sk_buff *skb, + bool zero_wnd_test) + { + const struct tcp_sock *tp = tcp_sk(sk); +- unsigned int mss_now, space, in_flight; ++ unsigned int mss_now; + + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) { + /* If SACK is disabled, and we got a loss, TCP does not exit +@@ -66,19 +93,11 @@ static bool mptcp_is_temp_unavailable(struct sock *sk, + return true; + } + +- in_flight = tcp_packets_in_flight(tp); +- /* Not even a single spot in the cwnd */ +- if (in_flight >= tp->snd_cwnd) +- return true; +- + mss_now = tcp_current_mss(sk); + +- /* Now, check if what is queued in the subflow's send-queue +- * already fills the cwnd. +- */ +- space = (tp->snd_cwnd - in_flight) * mss_now; +- +- if (tp->write_seq - tp->snd_nxt >= space) ++ /* Not even a single spot in the cwnd */ ++ if (mptcp_subflow_queued(sk, tcp_tso_segs(sk, tcp_current_mss(sk))) ++ >= tp->snd_cwnd) + return true; + + if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp))) +@@ -399,11 +418,10 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk, + unsigned int *limit) + { + struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject); +- unsigned int mss_now, in_flight_space; +- int remaining_in_flight_space; +- u32 max_len, max_segs, window; ++ unsigned int mss_now; ++ u32 max_len, gso_max_segs, max_segs, max_tso_segs, window; + struct tcp_sock *subtp; +- u16 gso_max_segs; ++ int queued; + + /* As we set it, we have to reset it as well. */ + *limit = 0; +@@ -441,35 +459,29 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk, + if (skb->len <= mss_now) + return skb; + +- /* The following is similar to tcp_mss_split_point, but +- * we do not care about nagle, because we will anyways +- * use TCP_NAGLE_PUSH, which overrides this. ++ max_tso_segs = tcp_tso_segs(*subsk, tcp_current_mss(*subsk)); ++ queued = mptcp_subflow_queued(*subsk, max_tso_segs); ++ ++ /* this condition should already have been established in ++ * mptcp_is_temp_unavailable when selecting available flows + */ ++ WARN_ONCE(subtp->snd_cwnd <= queued, "Selected subflow no cwnd room"); + + gso_max_segs = (*subsk)->sk_gso_max_segs; + if (!gso_max_segs) /* No gso supported on the subflow's NIC */ + gso_max_segs = 1; +- max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); ++ ++ max_segs = min_t(unsigned int, subtp->snd_cwnd - queued, gso_max_segs); + if (!max_segs) + return NULL; + +- /* max_len is what would fit in the cwnd (respecting the 2GSO-limit of +- * tcp_cwnd_test), but ignoring whatever was already queued. ++ /* if there is room for a segment, schedule up to a complete TSO ++ * segment to avoid TSO splitting. Even if it is more than allowed by ++ * the congestion window. + */ +- max_len = min(mss_now * max_segs, skb->len); +- +- in_flight_space = (subtp->snd_cwnd - tcp_packets_in_flight(subtp)) * mss_now; +- remaining_in_flight_space = (int)in_flight_space - (subtp->write_seq - subtp->snd_nxt); ++ max_segs = max_t(unsigned int, max_tso_segs, max_segs); + +- if (remaining_in_flight_space <= 0) +- WARN_ONCE(1, "in_flight %u cwnd %u wseq %u snxt %u mss_now %u cache %u", +- tcp_packets_in_flight(subtp), subtp->snd_cwnd, +- subtp->write_seq, subtp->snd_nxt, mss_now, subtp->mss_cache); +- else +- /* max_len now fits exactly in the write-queue, taking into +- * account what was already queued. +- */ +- max_len = min_t(u32, max_len, remaining_in_flight_space); ++ max_len = min(mss_now * max_segs, skb->len); + + window = tcp_wnd_end(subtp) - subtp->write_seq;