mirror of
https://github.com/Ysurac/openmptcprouter.git
synced 2025-03-09 15:40:20 +00:00
Update MPTCP with patches from tessares
This commit is contained in:
parent
ff44ceeeba
commit
ea07e64ebe
1 changed files with 209 additions and 0 deletions
|
@ -23987,3 +23987,212 @@ index fc71d41c608d..bdea1a26e3fc 100644
|
|||
|
||||
return new_win;
|
||||
}
|
||||
diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
|
||||
index 4b878d14492a..6cb8c5c7d098 100644
|
||||
--- a/net/mptcp/mptcp_sched.c
|
||||
+++ b/net/mptcp/mptcp_sched.c
|
||||
@@ -388,25 +388,32 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject)
|
||||
} else {
|
||||
skb = tcp_send_head(meta_sk);
|
||||
|
||||
- if (!skb && meta_sk->sk_socket &&
|
||||
- test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
|
||||
- sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
|
||||
+ if (!skb) {
|
||||
struct sock *subsk;
|
||||
|
||||
- /* meta is send buffer limited */
|
||||
- tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED);
|
||||
-
|
||||
subsk = mpcb->sched_ops->get_subflow(meta_sk,
|
||||
NULL, false);
|
||||
if (!subsk)
|
||||
return NULL;
|
||||
|
||||
- skb = mptcp_rcv_buf_optimization(subsk, 0);
|
||||
- if (skb)
|
||||
- *reinject = -1;
|
||||
- else
|
||||
+ if (meta_sk->sk_socket &&
|
||||
+ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
|
||||
+ sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
|
||||
+ skb = mptcp_rcv_buf_optimization(subsk, 0);
|
||||
+ if (skb)
|
||||
+ *reinject = -1;
|
||||
+ else
|
||||
+ tcp_chrono_start(subsk,
|
||||
+ TCP_CHRONO_SNDBUF_LIMITED);
|
||||
+ }
|
||||
+
|
||||
+ if (!skb) {
|
||||
+ /* meta is send buffer limited */
|
||||
+ tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED);
|
||||
+
|
||||
tcp_chrono_start(subsk,
|
||||
TCP_CHRONO_SNDBUF_LIMITED);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
return skb;
|
||||
diff --git a/include/net/tcp.h b/include/net/tcp.h
|
||||
index 9d3fa5eb36d9..b6e9d709d1e1 100644
|
||||
--- a/include/net/tcp.h
|
||||
+++ b/include/net/tcp.h
|
||||
@@ -350,6 +350,7 @@ int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib);
|
||||
void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb);
|
||||
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
||||
gfp_t gfp_mask);
|
||||
+u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now);
|
||||
unsigned int tcp_mss_split_point(const struct sock *sk,
|
||||
const struct sk_buff *skb,
|
||||
unsigned int mss_now,
|
||||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
|
||||
index f72edfe89b4d..86bce63ab841 100644
|
||||
--- a/net/ipv4/tcp_output.c
|
||||
+++ b/net/ipv4/tcp_output.c
|
||||
@@ -1781,7 +1781,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
|
||||
/* Return the number of segments we want in the skb we are transmitting.
|
||||
* See if congestion control module wants to decide; otherwise, autosize.
|
||||
*/
|
||||
-static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
+u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
|
||||
{
|
||||
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
|
||||
u32 min_tso, tso_segs;
|
||||
diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
|
||||
index a4d8c4a5e52d..4b878d14492a 100644
|
||||
--- a/net/mptcp/mptcp_sched.c
|
||||
+++ b/net/mptcp/mptcp_sched.c
|
||||
@@ -1,5 +1,6 @@
|
||||
/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */
|
||||
|
||||
+#include <linux/bug.h>
|
||||
#include <linux/module.h>
|
||||
#include <net/mptcp.h>
|
||||
#include <trace/events/tcp.h>
|
||||
@@ -37,12 +38,38 @@ bool mptcp_is_def_unavailable(struct sock *sk)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable);
|
||||
|
||||
+/* estimate number of segments currently in flight + unsent in
|
||||
+ * the subflow socket.
|
||||
+ */
|
||||
+static int mptcp_subflow_queued(struct sock *sk, u32 max_tso_segs)
|
||||
+{
|
||||
+ const struct tcp_sock *tp = tcp_sk(sk);
|
||||
+ unsigned int queued;
|
||||
+
|
||||
+ /* estimate the max number of segments in the write queue
|
||||
+ * this is an overestimation, avoiding to iterate over the queue
|
||||
+ * to make a better estimation.
|
||||
+ * Having only one skb in the queue however might trigger tso deferral,
|
||||
+ * delaying the sending of a tso segment in the hope that skb_entail
|
||||
+ * will append more data to the skb soon.
|
||||
+ * Therefore, in the case only one skb is in the queue, we choose to
|
||||
+ * potentially underestimate, risking to schedule one skb too many onto
|
||||
+ * the subflow rather than not enough.
|
||||
+ */
|
||||
+ if (sk->sk_write_queue.qlen > 1)
|
||||
+ queued = sk->sk_write_queue.qlen * max_tso_segs;
|
||||
+ else
|
||||
+ queued = sk->sk_write_queue.qlen;
|
||||
+
|
||||
+ return queued + tcp_packets_in_flight(tp);
|
||||
+}
|
||||
+
|
||||
static bool mptcp_is_temp_unavailable(struct sock *sk,
|
||||
const struct sk_buff *skb,
|
||||
bool zero_wnd_test)
|
||||
{
|
||||
const struct tcp_sock *tp = tcp_sk(sk);
|
||||
- unsigned int mss_now, space, in_flight;
|
||||
+ unsigned int mss_now;
|
||||
|
||||
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) {
|
||||
/* If SACK is disabled, and we got a loss, TCP does not exit
|
||||
@@ -66,19 +93,11 @@ static bool mptcp_is_temp_unavailable(struct sock *sk,
|
||||
return true;
|
||||
}
|
||||
|
||||
- in_flight = tcp_packets_in_flight(tp);
|
||||
- /* Not even a single spot in the cwnd */
|
||||
- if (in_flight >= tp->snd_cwnd)
|
||||
- return true;
|
||||
-
|
||||
mss_now = tcp_current_mss(sk);
|
||||
|
||||
- /* Now, check if what is queued in the subflow's send-queue
|
||||
- * already fills the cwnd.
|
||||
- */
|
||||
- space = (tp->snd_cwnd - in_flight) * mss_now;
|
||||
-
|
||||
- if (tp->write_seq - tp->snd_nxt >= space)
|
||||
+ /* Not even a single spot in the cwnd */
|
||||
+ if (mptcp_subflow_queued(sk, tcp_tso_segs(sk, tcp_current_mss(sk)))
|
||||
+ >= tp->snd_cwnd)
|
||||
return true;
|
||||
|
||||
if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp)))
|
||||
@@ -399,11 +418,10 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
|
||||
unsigned int *limit)
|
||||
{
|
||||
struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject);
|
||||
- unsigned int mss_now, in_flight_space;
|
||||
- int remaining_in_flight_space;
|
||||
- u32 max_len, max_segs, window;
|
||||
+ unsigned int mss_now;
|
||||
+ u32 max_len, gso_max_segs, max_segs, max_tso_segs, window;
|
||||
struct tcp_sock *subtp;
|
||||
- u16 gso_max_segs;
|
||||
+ int queued;
|
||||
|
||||
/* As we set it, we have to reset it as well. */
|
||||
*limit = 0;
|
||||
@@ -441,35 +459,29 @@ struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
|
||||
if (skb->len <= mss_now)
|
||||
return skb;
|
||||
|
||||
- /* The following is similar to tcp_mss_split_point, but
|
||||
- * we do not care about nagle, because we will anyways
|
||||
- * use TCP_NAGLE_PUSH, which overrides this.
|
||||
+ max_tso_segs = tcp_tso_segs(*subsk, tcp_current_mss(*subsk));
|
||||
+ queued = mptcp_subflow_queued(*subsk, max_tso_segs);
|
||||
+
|
||||
+ /* this condition should already have been established in
|
||||
+ * mptcp_is_temp_unavailable when selecting available flows
|
||||
*/
|
||||
+ WARN_ONCE(subtp->snd_cwnd <= queued, "Selected subflow no cwnd room");
|
||||
|
||||
gso_max_segs = (*subsk)->sk_gso_max_segs;
|
||||
if (!gso_max_segs) /* No gso supported on the subflow's NIC */
|
||||
gso_max_segs = 1;
|
||||
- max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
|
||||
+
|
||||
+ max_segs = min_t(unsigned int, subtp->snd_cwnd - queued, gso_max_segs);
|
||||
if (!max_segs)
|
||||
return NULL;
|
||||
|
||||
- /* max_len is what would fit in the cwnd (respecting the 2GSO-limit of
|
||||
- * tcp_cwnd_test), but ignoring whatever was already queued.
|
||||
+ /* if there is room for a segment, schedule up to a complete TSO
|
||||
+ * segment to avoid TSO splitting. Even if it is more than allowed by
|
||||
+ * the congestion window.
|
||||
*/
|
||||
- max_len = min(mss_now * max_segs, skb->len);
|
||||
-
|
||||
- in_flight_space = (subtp->snd_cwnd - tcp_packets_in_flight(subtp)) * mss_now;
|
||||
- remaining_in_flight_space = (int)in_flight_space - (subtp->write_seq - subtp->snd_nxt);
|
||||
+ max_segs = max_t(unsigned int, max_tso_segs, max_segs);
|
||||
|
||||
- if (remaining_in_flight_space <= 0)
|
||||
- WARN_ONCE(1, "in_flight %u cwnd %u wseq %u snxt %u mss_now %u cache %u",
|
||||
- tcp_packets_in_flight(subtp), subtp->snd_cwnd,
|
||||
- subtp->write_seq, subtp->snd_nxt, mss_now, subtp->mss_cache);
|
||||
- else
|
||||
- /* max_len now fits exactly in the write-queue, taking into
|
||||
- * account what was already queued.
|
||||
- */
|
||||
- max_len = min_t(u32, max_len, remaining_in_flight_space);
|
||||
+ max_len = min(mss_now * max_segs, skb->len);
|
||||
|
||||
window = tcp_wnd_end(subtp) - subtp->write_seq;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue