diff --git a/root/target/linux/generic/hack-4.19/690-mptcp_v0.95.patch b/root/target/linux/generic/hack-4.19/690-mptcp_v0.95.patch index f8dacff7..6f32ed2c 100644 --- a/root/target/linux/generic/hack-4.19/690-mptcp_v0.95.patch +++ b/root/target/linux/generic/hack-4.19/690-mptcp_v0.95.patch @@ -8654,487 +8654,6 @@ diff -aurN linux-4.19.67/net/mptcp/mptcp_blest.c mptcp-mptcp_v0.95/net/mptcp/mpt +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("BLEST scheduler for MPTCP, based on default minimum RTT scheduler"); +MODULE_VERSION("0.95"); -diff -aurN linux-4.19.67/net/mptcp/mptcp_blest.c.orig mptcp-mptcp_v0.95/net/mptcp/mptcp_blest.c.orig ---- linux-4.19.67/net/mptcp/mptcp_blest.c.orig 1970-01-01 01:00:00.000000000 +0100 -+++ mptcp-mptcp_v0.95/net/mptcp/mptcp_blest.c.orig 2019-08-22 18:59:39.000000000 +0200 -@@ -0,0 +1,477 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* MPTCP Scheduler to reduce HoL-blocking and spurious retransmissions. -+ * -+ * Algorithm Design: -+ * Simone Ferlin -+ * Ozgu Alay -+ * Olivier Mehani -+ * Roksana Boreli -+ * -+ * Initial Implementation: -+ * Simone Ferlin -+ * -+ * Additional Authors: -+ * Daniel Weber -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+#include -+ -+static unsigned char lambda __read_mostly = 12; -+module_param(lambda, byte, 0644); -+MODULE_PARM_DESC(lambda, "Divided by 10 for scaling factor of fast flow rate estimation"); -+ -+static unsigned char max_lambda __read_mostly = 13; -+module_param(max_lambda, byte, 0644); -+MODULE_PARM_DESC(max_lambda, "Divided by 10 for maximum scaling factor of fast flow rate estimation"); -+ -+static unsigned char min_lambda __read_mostly = 10; -+module_param(min_lambda, byte, 0644); -+MODULE_PARM_DESC(min_lambda, "Divided by 10 for minimum scaling factor of fast flow rate estimation"); -+ -+static unsigned char dyn_lambda_good = 10; /* 1% */ -+module_param(dyn_lambda_good, byte, 0644); -+MODULE_PARM_DESC(dyn_lambda_good, "Decrease of lambda in positive case."); -+ -+static unsigned char dyn_lambda_bad = 40; /* 4% */ -+module_param(dyn_lambda_bad, byte, 0644); -+MODULE_PARM_DESC(dyn_lambda_bad, "Increase of lambda in negative case."); -+ -+struct blestsched_priv { -+ u32 last_rbuf_opti; -+ u32 min_srtt_us; -+ u32 max_srtt_us; -+}; -+ -+struct blestsched_cb { -+ bool retrans_flag; -+ s16 lambda_1000; /* values range from min_lambda * 100 to max_lambda * 100 */ -+ u32 last_lambda_update; -+}; -+ -+static struct blestsched_priv *blestsched_get_priv(const struct tcp_sock *tp) -+{ -+ return (struct blestsched_priv *)&tp->mptcp->mptcp_sched[0]; -+} -+ -+static struct blestsched_cb *blestsched_get_cb(const struct tcp_sock *tp) -+{ -+ return (struct blestsched_cb *)&tp->mpcb->mptcp_sched[0]; -+} -+ -+static void blestsched_update_lambda(struct sock *meta_sk, struct sock *sk) -+{ -+ struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(meta_sk)); -+ struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk)); -+ -+ if (tcp_jiffies32 - blest_cb->last_lambda_update < usecs_to_jiffies(blest_p->min_srtt_us >> 3)) -+ return; -+ -+ /* if there have been retransmissions of packets of the slow flow -+ * during the slow flows last RTT => increase lambda -+ * otherwise decrease -+ */ -+ if (blest_cb->retrans_flag) { -+ /* need to slow down on the slow flow */ -+ blest_cb->lambda_1000 += dyn_lambda_bad; -+ } else { -+ /* use the slow flow more */ -+ blest_cb->lambda_1000 -= dyn_lambda_good; -+ } -+ blest_cb->retrans_flag = false; -+ -+ /* cap lambda_1000 to its value range */ -+ blest_cb->lambda_1000 = min_t(s16, blest_cb->lambda_1000, max_lambda * 100); -+ blest_cb->lambda_1000 = max_t(s16, blest_cb->lambda_1000, min_lambda * 100); -+ -+ blest_cb->last_lambda_update = tcp_jiffies32; -+} -+ -+/* how many bytes will sk send during the rtt of another, slower flow? */ -+static u32 blestsched_estimate_bytes(struct sock *sk, u32 time_8) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct blestsched_priv *blest_p = blestsched_get_priv(tp); -+ struct blestsched_cb *blest_cb = blestsched_get_cb(mptcp_meta_tp(tp)); -+ u32 avg_rtt, num_rtts, ca_cwnd, packets; -+ -+ avg_rtt = (blest_p->min_srtt_us + blest_p->max_srtt_us) / 2; -+ if (avg_rtt == 0) -+ num_rtts = 1; /* sanity */ -+ else -+ num_rtts = (time_8 / avg_rtt) + 1; /* round up */ -+ -+ /* during num_rtts, how many bytes will be sent on the flow? -+ * assumes for simplification that Reno is applied as congestion-control -+ */ -+ if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) { -+ /* we are in initial slow start */ -+ if (num_rtts > 16) -+ num_rtts = 16; /* cap for sanity */ -+ packets = tp->snd_cwnd * ((1 << num_rtts) - 1); /* cwnd + 2*cwnd + 4*cwnd */ -+ } else { -+ ca_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh + 1); /* assume we jump to CA already */ -+ packets = (ca_cwnd + (num_rtts - 1) / 2) * num_rtts; -+ } -+ -+ return div_u64(((u64)packets) * tp->mss_cache * blest_cb->lambda_1000, 1000); -+} -+ -+static u32 blestsched_estimate_linger_time(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct blestsched_priv *blest_p = blestsched_get_priv(tp); -+ u32 estimate, slope, inflight, cwnd; -+ -+ inflight = tcp_packets_in_flight(tp) + 1; /* take into account the new one */ -+ cwnd = tp->snd_cwnd; -+ -+ if (inflight >= cwnd) { -+ estimate = blest_p->max_srtt_us; -+ } else { -+ slope = blest_p->max_srtt_us - blest_p->min_srtt_us; -+ if (cwnd == 0) -+ cwnd = 1; /* sanity */ -+ estimate = blest_p->min_srtt_us + (slope * inflight) / cwnd; -+ } -+ -+ return (tp->srtt_us > estimate) ? tp->srtt_us : estimate; -+} -+ -+/* This is the BLEST scheduler. This function decides on which flow to send -+ * a given MSS. If all subflows are found to be busy or the currently best -+ * subflow is estimated to possibly cause HoL-blocking, NULL is returned. -+ */ -+struct sock *blest_get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sock *bestsk, *minsk = NULL; -+ struct tcp_sock *meta_tp, *besttp; -+ struct mptcp_tcp_sock *mptcp; -+ struct blestsched_priv *blest_p; -+ u32 min_srtt = U32_MAX; -+ -+ /* Answer data_fin on same subflow!!! */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && -+ skb && mptcp_is_data_fin(skb)) { -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index && -+ mptcp_is_available(bestsk, skb, zero_wnd_test)) -+ return bestsk; -+ } -+ } -+ -+ /* First, find the overall best subflow */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ besttp = tcp_sk(bestsk); -+ blest_p = blestsched_get_priv(besttp); -+ -+ /* Set of states for which we are allowed to send data */ -+ if (!mptcp_sk_can_send(bestsk)) -+ continue; -+ -+ /* We do not send data on this subflow unless it is -+ * fully established, i.e. the 4th ack has been received. -+ */ -+ if (besttp->mptcp->pre_established) -+ continue; -+ -+ blest_p->min_srtt_us = min(blest_p->min_srtt_us, besttp->srtt_us); -+ blest_p->max_srtt_us = max(blest_p->max_srtt_us, besttp->srtt_us); -+ -+ /* record minimal rtt */ -+ if (besttp->srtt_us < min_srtt) { -+ min_srtt = besttp->srtt_us; -+ minsk = bestsk; -+ } -+ } -+ -+ /* find the current best subflow according to the default scheduler */ -+ bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test); -+ -+ /* if we decided to use a slower flow, we have the option of not using it at all */ -+ if (bestsk && minsk && bestsk != minsk) { -+ u32 slow_linger_time, fast_bytes, slow_inflight_bytes, slow_bytes, avail_space; -+ -+ meta_tp = tcp_sk(meta_sk); -+ besttp = tcp_sk(bestsk); -+ -+ blestsched_update_lambda(meta_sk, bestsk); -+ -+ /* if we send this SKB now, it will be acked in besttp->srtt seconds -+ * during this time: how many bytes will we send on the fast flow? -+ */ -+ slow_linger_time = blestsched_estimate_linger_time(bestsk); -+ fast_bytes = blestsched_estimate_bytes(minsk, slow_linger_time); -+ -+ /* is the required space available in the mptcp meta send window? -+ * we assume that all bytes inflight on the slow path will be acked in besttp->srtt seconds -+ * (just like the SKB if it was sent now) -> that means that those inflight bytes will -+ * keep occupying space in the meta window until then -+ */ -+ slow_inflight_bytes = besttp->write_seq - besttp->snd_una; -+ slow_bytes = skb->len + slow_inflight_bytes; // bytes of this SKB plus those in flight already -+ -+ avail_space = (slow_bytes < meta_tp->snd_wnd) ? (meta_tp->snd_wnd - slow_bytes) : 0; -+ -+ if (fast_bytes > avail_space) { -+ /* sending this SKB on the slow flow means -+ * we wouldn't be able to send all the data we'd like to send on the fast flow -+ * so don't do that -+ */ -+ return NULL; -+ } -+ } -+ -+ return bestsk; -+} -+ -+/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */ -+static struct sk_buff *mptcp_blest_rcv_buf_optimization(struct sock *sk, int penal) -+{ -+ struct sock *meta_sk; -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_tcp_sock *mptcp; -+ struct sk_buff *skb_head; -+ struct blestsched_priv *blest_p = blestsched_get_priv(tp); -+ struct blestsched_cb *blest_cb; -+ -+ meta_sk = mptcp_meta_sk(sk); -+ skb_head = tcp_rtx_queue_head(meta_sk); -+ -+ if (!skb_head) -+ return NULL; -+ -+ /* If penalization is optional (coming from mptcp_next_segment() and -+ * We are not send-buffer-limited we do not penalize. The retransmission -+ * is just an optimization to fix the idle-time due to the delay before -+ * we wake up the application. -+ */ -+ if (!penal && sk_stream_memory_free(meta_sk)) -+ goto retrans; -+ -+ /* Record the occurrence of a retransmission to update the lambda value */ -+ blest_cb = blestsched_get_cb(tcp_sk(meta_sk)); -+ blest_cb->retrans_flag = true; -+ -+ /* Only penalize again after an RTT has elapsed */ -+ if (tcp_jiffies32 - blest_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3)) -+ goto retrans; -+ -+ /* Half the cwnd of the slow flows */ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) { -+ u32 prior_cwnd = tp_it->snd_cwnd; -+ -+ tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U); -+ -+ /* If in slow start, do not reduce the ssthresh */ -+ if (prior_cwnd >= tp_it->snd_ssthresh) -+ tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U); -+ -+ blest_p->last_rbuf_opti = tcp_jiffies32; -+ } -+ } -+ } -+ -+retrans: -+ -+ /* Segment not yet injected into this path? Take it!!! */ -+ if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) { -+ bool do_retrans = false; -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp_it->snd_cwnd <= 4) { -+ do_retrans = true; -+ break; -+ } -+ -+ if (4 * tp->srtt_us >= tp_it->srtt_us) { -+ do_retrans = false; -+ break; -+ } else { -+ do_retrans = true; -+ } -+ } -+ } -+ -+ if (do_retrans && mptcp_is_available(sk, skb_head, false)) { -+ trace_mptcp_retransmit(sk, skb_head); -+ return skb_head; -+ } -+ } -+ return NULL; -+} -+ -+/* copy from mptcp_sched.c: __mptcp_next_segment */ -+/* Returns the next segment to be sent from the mptcp meta-queue. -+ * (chooses the reinject queue if any segment is waiting in it, otherwise, -+ * chooses the normal write queue). -+ * Sets *@reinject to 1 if the returned segment comes from the -+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, -+ * and sets it to -1 if it is a meta-level retransmission to optimize the -+ * receive-buffer. -+ */ -+static struct sk_buff *__mptcp_blest_next_segment(struct sock *meta_sk, int *reinject) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sk_buff *skb = NULL; -+ -+ *reinject = 0; -+ -+ /* If we are in fallback-mode, just take from the meta-send-queue */ -+ if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) -+ return tcp_send_head(meta_sk); -+ -+ skb = skb_peek(&mpcb->reinject_queue); -+ -+ if (skb) { -+ *reinject = 1; -+ } else { -+ skb = tcp_send_head(meta_sk); -+ -+ if (!skb && meta_sk->sk_socket && -+ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && -+ sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -+ struct sock *subsk = blest_get_available_subflow(meta_sk, NULL, -+ false); -+ if (!subsk) -+ return NULL; -+ -+ skb = mptcp_blest_rcv_buf_optimization(subsk, 0); -+ if (skb) -+ *reinject = -1; -+ } -+ } -+ return skb; -+} -+ -+/* copy from mptcp_sched.c: mptcp_next_segment */ -+static struct sk_buff *mptcp_blest_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit) -+{ -+ struct sk_buff *skb = __mptcp_blest_next_segment(meta_sk, reinject); -+ unsigned int mss_now; -+ struct tcp_sock *subtp; -+ u16 gso_max_segs; -+ u32 max_len, max_segs, window, needed; -+ -+ /* As we set it, we have to reset it as well. */ -+ *limit = 0; -+ -+ if (!skb) -+ return NULL; -+ -+ *subsk = blest_get_available_subflow(meta_sk, skb, false); -+ if (!*subsk) -+ return NULL; -+ -+ subtp = tcp_sk(*subsk); -+ mss_now = tcp_current_mss(*subsk); -+ -+ if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) { -+ skb = mptcp_blest_rcv_buf_optimization(*subsk, 1); -+ if (skb) -+ *reinject = -1; -+ else -+ return NULL; -+ } -+ -+ /* No splitting required, as we will only send one single segment */ -+ if (skb->len <= mss_now) -+ return skb; -+ -+ /* The following is similar to tcp_mss_split_point, but -+ * we do not care about nagle, because we will anyways -+ * use TCP_NAGLE_PUSH, which overrides this. -+ * -+ * So, we first limit according to the cwnd/gso-size and then according -+ * to the subflow's window. -+ */ -+ -+ gso_max_segs = (*subsk)->sk_gso_max_segs; -+ if (!gso_max_segs) /* No gso supported on the subflow's NIC */ -+ gso_max_segs = 1; -+ max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs); -+ if (!max_segs) -+ return NULL; -+ -+ max_len = mss_now * max_segs; -+ window = tcp_wnd_end(subtp) - subtp->write_seq; -+ -+ needed = min(skb->len, window); -+ if (max_len <= skb->len) -+ /* Take max_win, which is actually the cwnd/gso-size */ -+ *limit = max_len; -+ else -+ /* Or, take the window */ -+ *limit = needed; -+ -+ return skb; -+} -+ -+static void blestsched_init(struct sock *sk) -+{ -+ struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk)); -+ struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(mptcp_meta_sk(sk))); -+ -+ blest_p->last_rbuf_opti = tcp_jiffies32; -+ blest_p->min_srtt_us = U32_MAX; -+ blest_p->max_srtt_us = 0; -+ -+ if (!blest_cb->lambda_1000) { -+ blest_cb->lambda_1000 = lambda * 100; -+ blest_cb->last_lambda_update = tcp_jiffies32; -+ } -+} -+ -+static struct mptcp_sched_ops mptcp_sched_blest = { -+ .get_subflow = blest_get_available_subflow, -+ .next_segment = mptcp_blest_next_segment, -+ .init = blestsched_init, -+ .name = "blest", -+ .owner = THIS_MODULE, -+}; -+ -+static int __init blest_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct blestsched_priv) > MPTCP_SCHED_SIZE); -+ BUILD_BUG_ON(sizeof(struct blestsched_cb) > MPTCP_SCHED_DATA_SIZE); -+ -+ if (mptcp_register_scheduler(&mptcp_sched_blest)) -+ return -1; -+ -+ return 0; -+} -+ -+static void blest_unregister(void) -+{ -+ mptcp_unregister_scheduler(&mptcp_sched_blest); -+} -+ -+module_init(blest_register); -+module_exit(blest_unregister); -+ -+MODULE_AUTHOR("Simone Ferlin, Daniel Weber"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("BLEST scheduler for MPTCP, based on default minimum RTT scheduler"); -+MODULE_VERSION("0.95"); diff -aurN linux-4.19.67/net/mptcp/mptcp_coupled.c mptcp-mptcp_v0.95/net/mptcp/mptcp_coupled.c --- linux-4.19.67/net/mptcp/mptcp_coupled.c 1970-01-01 01:00:00.000000000 +0100 +++ mptcp-mptcp_v0.95/net/mptcp/mptcp_coupled.c 2019-08-22 18:59:39.000000000 +0200