1
0
Fork 0
mirror of https://github.com/Ysurac/openmptcprouter.git synced 2025-03-09 15:40:20 +00:00

Update MPTCP with patches from tessares

This commit is contained in:
Ycarus (Yannick Chabanois) 2021-04-29 21:08:35 +02:00
parent ff44ceeeba
commit ea07e64ebe

View file

@ -23987,3 +23987,212 @@ index fc71d41c608d..bdea1a26e3fc 100644
return new_win;
}
diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
index 4b878d14492a..6cb8c5c7d098 100644
--- a/net/mptcp/mptcp_sched.c
+++ b/net/mptcp/mptcp_sched.c
@@ -388,25 +388,32 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject)
} else {
skb = tcp_send_head(meta_sk);
- if (!skb && meta_sk->sk_socket &&
- test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
- sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
+ if (!skb) {
struct sock *subsk;
- /* meta is send buffer limited */
- tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED);
-
subsk = mpcb->sched_ops->get_subflow(meta_sk,
NULL, false);
if (!subsk)
return NULL;
- skb = mptcp_rcv_buf_optimization(subsk, 0);
- if (skb)
- *reinject = -1;
- else
+ if (meta_sk->sk_socket &&
+ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
+ sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
+ skb = mptcp_rcv_buf_optimization(subsk, 0);
+ if (skb)
+ *reinject = -1;
+ else
+ tcp_chrono_start(subsk,
+ TCP_CHRONO_SNDBUF_LIMITED);
+ }
+
+ if (!skb) {
+ /* meta is send buffer limited */
+ tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED);
+
tcp_chrono_start(subsk,
TCP_CHRONO_SNDBUF_LIMITED);
+ }
}
}
return skb;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9d3fa5eb36d9..b6e9d709d1e1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -350,6 +350,7 @@ int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib);
void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb);
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
gfp_t gfp_mask);
+u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now);
unsigned int tcp_mss_split_point(const struct sock *sk,
const struct sk_buff *skb,
unsigned int mss_now,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f72edfe89b4d..86bce63ab841 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1781,7 +1781,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
/* Return the number of segments we want in the skb we are transmitting.
* See if congestion control module wants to decide; otherwise, autosize.
*/
-static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
u32 min_tso, tso_segs;
diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
index a4d8c4a5e52d..4b878d14492a 100644
--- a/net/mptcp/mptcp_sched.c
+++ b/net/mptcp/mptcp_sched.c
@@ -1,5 +1,6 @@
/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
+#include <linux/bug.h>
#include <linux/module.h>
#include <net/mptcp.h>
#include <trace/events/tcp.h>
@@ -37,12 +38,38 @@ bool mptcp_is_def_unavailable(struct sock *sk)
}
EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable);
+/* estimate number of segments currently in flight + unsent in
+ * the subflow socket.
+ */
+static int mptcp_subflow_queued(struct sock *sk, u32 max_tso_segs)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int queued;
+
+ /* estimate the max number of segments in the write queue
+ * this is an overestimation, avoiding to iterate over the queue
+ * to make a better estimation.
+ * Having only one skb in the queue however might trigger tso deferral,
+ * delaying the sending of a tso segment in the hope that skb_entail
+ * will append more data to the skb soon.
+ * Therefore, in the case only one skb is in the queue, we choose to
+ * potentially underestimate, risking to schedule one skb too many onto
+ * the subflow rather than not enough.
+ */
+ if (sk->sk_write_queue.qlen > 1)
+ queued = sk->sk_write_queue.qlen * max_tso_segs;
+ else
+ queued = sk->sk_write_queue.qlen;
+
+ return queued + tcp_packets_in_flight(tp);
+}
+
static bool mptcp_is_temp_unavailable(struct sock *sk,
const struct sk_buff *skb,
bool zero_wnd_test)
{
const struct tcp_sock *tp = tcp_sk(sk);
- unsigned int mss_now, space, in_flight;
+ unsigned int mss_now;
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
/* If SACK is disabled, and we got a loss, TCP does not exit
@@ -66,19 +93,11 @@ static bool mptcp_is_temp_unavailable(struct sock *sk,
return true;
}
- in_flight = tcp_packets_in_flight(tp);
- /* Not even a single spot in the cwnd */
- if (in_flight >= tp->snd_cwnd)
- return true;
-
mss_now = tcp_current_mss(sk);
- /* Now, check if what is queued in the subflow's send-queue
- * already fills the cwnd.
- */
- space = (tp->snd_cwnd - in_flight) * mss_now;
-
- if (tp->write_seq - tp->snd_nxt >= space)
+ /* Not even a single spot in the cwnd */
+ if (mptcp_subflow_queued(sk, tcp_tso_segs(sk, tcp_current_mss(sk)))
+ >= tp->snd_cwnd)
return true;
if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
@@ -399,11 +418,10 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
unsigned int *limit)
{
struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject);
- unsigned int mss_now, in_flight_space;
- int remaining_in_flight_space;
- u32 max_len, max_segs, window;
+ unsigned int mss_now;
+ u32 max_len, gso_max_segs, max_segs, max_tso_segs, window;
struct tcp_sock *subtp;
- u16 gso_max_segs;
+ int queued;
/* As we set it, we have to reset it as well. */
*limit = 0;
@@ -441,35 +459,29 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
if (skb->len <= mss_now)
return skb;
- /* The following is similar to tcp_mss_split_point, but
- * we do not care about nagle, because we will anyways
- * use TCP_NAGLE_PUSH, which overrides this.
+ max_tso_segs = tcp_tso_segs(*subsk, tcp_current_mss(*subsk));
+ queued = mptcp_subflow_queued(*subsk, max_tso_segs);
+
+ /* this condition should already have been established in
+ * mptcp_is_temp_unavailable when selecting available flows
*/
+ WARN_ONCE(subtp->snd_cwnd <= queued, "Selected subflow no cwnd room");
gso_max_segs = (*subsk)->sk_gso_max_segs;
if (!gso_max_segs) /* No gso supported on the subflow's NIC */
gso_max_segs = 1;
- max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
+
+ max_segs = min_t(unsigned int, subtp->snd_cwnd - queued, gso_max_segs);
if (!max_segs)
return NULL;
- /* max_len is what would fit in the cwnd (respecting the 2GSO-limit of
- * tcp_cwnd_test), but ignoring whatever was already queued.
+ /* if there is room for a segment, schedule up to a complete TSO
+ * segment to avoid TSO splitting. Even if it is more than allowed by
+ * the congestion window.
*/
- max_len = min(mss_now * max_segs, skb->len);
-
- in_flight_space = (subtp->snd_cwnd - tcp_packets_in_flight(subtp)) * mss_now;
- remaining_in_flight_space = (int)in_flight_space - (subtp->write_seq - subtp->snd_nxt);
+ max_segs = max_t(unsigned int, max_tso_segs, max_segs);
- if (remaining_in_flight_space <= 0)
- WARN_ONCE(1, "in_flight %u cwnd %u wseq %u snxt %u mss_now %u cache %u",
- tcp_packets_in_flight(subtp), subtp->snd_cwnd,
- subtp->write_seq, subtp->snd_nxt, mss_now, subtp->mss_cache);
- else
- /* max_len now fits exactly in the write-queue, taking into
- * account what was already queued.
- */
- max_len = min_t(u32, max_len, remaining_in_flight_space);
+ max_len = min(mss_now * max_segs, skb->len);
window = tcp_wnd_end(subtp) - subtp->write_seq;