From ea07e64ebed59b25edb3d69c23a93d4b5f514f8e Mon Sep 17 00:00:00 2001
From: "Ycarus (Yannick Chabanois)" <ycarus@zugaina.org>
Date: Thu, 29 Apr 2021 21:08:35 +0200
Subject: [PATCH] Update MPTCP with patches from tessares

---
 .../generic/hack-5.4/690-mptcp_trunk.patch    | 209 ++++++++++++++++++
 1 file changed, 209 insertions(+)

diff --git a/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch b/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch
index b422429c..8e3ee022 100644
--- a/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch
+++ b/root/target/linux/generic/hack-5.4/690-mptcp_trunk.patch
@@ -23987,3 +23987,212 @@ index fc71d41c608d..bdea1a26e3fc 100644
  
  	return new_win;
  }
+diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
+index 4b878d14492a..6cb8c5c7d098 100644
+--- a/net/mptcp/mptcp_sched.c
++++ b/net/mptcp/mptcp_sched.c
+@@ -388,25 +388,32 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject)
+ 	} else {
+ 		skb = tcp_send_head(meta_sk);
+ 
+-		if (!skb && meta_sk->sk_socket &&
+-		    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
+-		    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
++		if (!skb) {
+ 			struct sock *subsk;
+ 
+-			/* meta is send buffer limited */
+-			tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED);
+-
+ 			subsk = mpcb->sched_ops->get_subflow(meta_sk,
+ 							     NULL, false);
+ 			if (!subsk)
+ 				return NULL;
+ 
+-			skb = mptcp_rcv_buf_optimization(subsk, 0);
+-			if (skb)
+-				*reinject = -1;
+-			else
++			if (meta_sk->sk_socket &&
++			    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
++			    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
++				skb = mptcp_rcv_buf_optimization(subsk, 0);
++				if (skb)
++					*reinject = -1;
++				else
++					tcp_chrono_start(subsk,
++							 TCP_CHRONO_SNDBUF_LIMITED);
++			}
++
++			if (!skb) {
++				/* meta is send buffer limited */
++				tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED);
++
+ 				tcp_chrono_start(subsk,
+ 						 TCP_CHRONO_SNDBUF_LIMITED);
++			}
+ 		}
+ 	}
+ 	return skb;
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 9d3fa5eb36d9..b6e9d709d1e1 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -350,6 +350,7 @@ int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib);
+ void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb);
+ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ 		     gfp_t gfp_mask);
++u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now);
+ unsigned int tcp_mss_split_point(const struct sock *sk,
+ 				 const struct sk_buff *skb,
+ 				 unsigned int mss_now,
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index f72edfe89b4d..86bce63ab841 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1781,7 +1781,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ /* Return the number of segments we want in the skb we are transmitting.
+  * See if congestion control module wants to decide; otherwise, autosize.
+  */
+-static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
++u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+ {
+ 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+ 	u32 min_tso, tso_segs;
+diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
+index a4d8c4a5e52d..4b878d14492a 100644
+--- a/net/mptcp/mptcp_sched.c
++++ b/net/mptcp/mptcp_sched.c
+@@ -1,5 +1,6 @@
+ /* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
+ 
++#include <linux/bug.h>
+ #include <linux/module.h>
+ #include <net/mptcp.h>
+ #include <trace/events/tcp.h>
+@@ -37,12 +38,38 @@ bool mptcp_is_def_unavailable(struct sock *sk)
+ }
+ EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable);
+ 
++/* estimate number of segments currently in flight + unsent in
++ * the subflow socket.
++ */
++static int mptcp_subflow_queued(struct sock *sk, u32 max_tso_segs)
++{
++	const struct tcp_sock *tp = tcp_sk(sk);
++	unsigned int queued;
++
++	/* estimate the max number of segments in the write queue
++	 * this is an overestimation, avoiding to iterate over the queue
++	 * to make a better estimation.
++	 * Having only one skb in the queue however might trigger tso deferral,
++	 * delaying the sending of a tso segment in the hope that skb_entail
++	 * will append more data to the skb soon.
++	 * Therefore, in the case only one skb is in the queue, we choose to
++	 * potentially underestimate, risking to schedule one skb too many onto
++	 * the subflow rather than not enough.
++	 */
++	if (sk->sk_write_queue.qlen > 1)
++		queued = sk->sk_write_queue.qlen * max_tso_segs;
++	else
++		queued = sk->sk_write_queue.qlen;
++
++	return queued + tcp_packets_in_flight(tp);
++}
++
+ static bool mptcp_is_temp_unavailable(struct sock *sk,
+ 				      const struct sk_buff *skb,
+ 				      bool zero_wnd_test)
+ {
+ 	const struct tcp_sock *tp = tcp_sk(sk);
+-	unsigned int mss_now, space, in_flight;
++	unsigned int mss_now;
+ 
+    if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
+	/* If SACK is disabled, and we got a loss, TCP does not exit
+@@ -66,19 +93,11 @@ static bool mptcp_is_temp_unavailable(struct sock *sk,
+ 			    return true;
+ 	}
+ 
+-	in_flight = tcp_packets_in_flight(tp);
+-	/* Not even a single spot in the cwnd */
+-	if (in_flight >= tp->snd_cwnd)
+-		return true;
+-
+ 	mss_now = tcp_current_mss(sk);
+ 
+-	/* Now, check if what is queued in the subflow's send-queue
+-	 * already fills the cwnd.
+-	 */
+-	space = (tp->snd_cwnd - in_flight) * mss_now;
+-
+-	if (tp->write_seq - tp->snd_nxt >= space)
++	/* Not even a single spot in the cwnd */
++	if (mptcp_subflow_queued(sk, tcp_tso_segs(sk, tcp_current_mss(sk)))
++	    >= tp->snd_cwnd)
+ 		return true;
+ 
+    if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
+@@ -399,11 +418,10 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
+ 					  unsigned int *limit)
+ {
+ 	struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject);
+-	unsigned int mss_now, in_flight_space;
+-	int remaining_in_flight_space;
+-	u32 max_len, max_segs, window;
++	unsigned int mss_now;
++	u32 max_len, gso_max_segs, max_segs, max_tso_segs, window;
+ 	struct tcp_sock *subtp;
+-	u16 gso_max_segs;
++	int queued;
+ 
+ 	/* As we set it, we have to reset it as well. */
+ 	*limit = 0;
+@@ -441,35 +459,29 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
+ 	if (skb->len <= mss_now)
+ 		return skb;
+ 
+-	/* The following is similar to tcp_mss_split_point, but
+-	 * we do not care about nagle, because we will anyways
+-	 * use TCP_NAGLE_PUSH, which overrides this.
++	max_tso_segs = tcp_tso_segs(*subsk, tcp_current_mss(*subsk));
++	queued = mptcp_subflow_queued(*subsk, max_tso_segs);
++
++	/* this condition should already have been established in
++	 * mptcp_is_temp_unavailable when selecting available flows
+ 	 */
++	WARN_ONCE(subtp->snd_cwnd <= queued, "Selected subflow no cwnd room");
+ 
+ 	gso_max_segs = (*subsk)->sk_gso_max_segs;
+ 	if (!gso_max_segs) /* No gso supported on the subflow's NIC */
+ 		gso_max_segs = 1;
+-	max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
++
++	max_segs = min_t(unsigned int, subtp->snd_cwnd - queued, gso_max_segs);
+ 	if (!max_segs)
+ 		return NULL;
+ 
+-	/* max_len is what would fit in the cwnd (respecting the 2GSO-limit of
+-	 * tcp_cwnd_test), but ignoring whatever was already queued.
++	/* if there is room for a segment, schedule up to a complete TSO
++	 * segment to avoid TSO splitting. Even if it is more than allowed by
++	 * the congestion window.
+ 	 */
+-	max_len = min(mss_now * max_segs, skb->len);
+-
+-	in_flight_space = (subtp->snd_cwnd - tcp_packets_in_flight(subtp)) * mss_now;
+-	remaining_in_flight_space = (int)in_flight_space - (subtp->write_seq - subtp->snd_nxt);
++	max_segs = max_t(unsigned int, max_tso_segs, max_segs);
+ 
+-	if (remaining_in_flight_space <= 0)
+-		WARN_ONCE(1, "in_flight %u cwnd %u wseq %u snxt %u mss_now %u cache %u",
+-			  tcp_packets_in_flight(subtp), subtp->snd_cwnd,
+-			  subtp->write_seq, subtp->snd_nxt, mss_now, subtp->mss_cache);
+-	else
+-		/* max_len now fits exactly in the write-queue, taking into
+-		 * account what was already queued.
+-		 */
+-		max_len = min_t(u32, max_len, remaining_in_flight_space);
++	max_len = min(mss_now * max_segs, skb->len);
+ 
+ 	window = tcp_wnd_end(subtp) - subtp->write_seq;