mirror of
				https://github.com/Ysurac/openmptcprouter.git
				synced 2025-03-09 15:40:20 +00:00 
			
		
		
		
	Update MPTCP
This commit is contained in:
		
							parent
							
								
									59ce9be78b
								
							
						
					
					
						commit
						e131493bf1
					
				
					 2 changed files with 393 additions and 1350 deletions
				
			
		
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -1,988 +0,0 @@ | |||
| From 025619486cf04c0beb9f395609d7711726fd63c6 Mon Sep 17 00:00:00 2001 | ||||
| From: Daniel Weber <weberdaniel@gmx.net> | ||||
| Date: Mon, 5 Aug 2019 14:02:30 +0200 | ||||
| Subject: [PATCH 1/3] mptcp: Earliest Completion First (ECF) Scheduler | ||||
| 
 | ||||
| This scheduler works much like the default MPTCP scheduler. It always | ||||
| prefers the subflow with the smallest round-trip-time that is available. | ||||
| 
 | ||||
| Signed-off-by: Daniel Weber <weberdaniel@gmx.net> | ||||
| ---
 | ||||
|  net/mptcp/Kconfig     |   6 + | ||||
|  net/mptcp/Makefile    |   1 + | ||||
|  net/mptcp/mptcp_ecf.c | 384 ++++++++++++++++++++++++++++++++++++++++++ | ||||
|  3 files changed, 391 insertions(+) | ||||
|  create mode 100644 net/mptcp/mptcp_ecf.c | ||||
| 
 | ||||
| diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
 | ||||
| index 37f3af3db2a6..829ea084cf70 100644
 | ||||
| --- a/net/mptcp/Kconfig
 | ||||
| +++ b/net/mptcp/Kconfig
 | ||||
| @@ -109,6 +109,12 @@ config MPTCP_REDUNDANT
 | ||||
|  	  This scheduler sends all packets redundantly over all subflows to decreases | ||||
|  	  latency and jitter on the cost of lower throughput. | ||||
|   | ||||
| +config MPTCP_ECF
 | ||||
| +	tristate "MPTCP ECF"
 | ||||
| +	depends on (MPTCP=y)
 | ||||
| +	---help---
 | ||||
| +	  This is an experimental Earliest Completion First (ECF) scheduler.
 | ||||
| +
 | ||||
|  choice | ||||
|  	prompt "Default MPTCP Scheduler" | ||||
|  	default DEFAULT_SCHEDULER | ||||
| diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
 | ||||
| index 82a2d4d945ae..369248a2f68e 100644
 | ||||
| --- a/net/mptcp/Makefile
 | ||||
| +++ b/net/mptcp/Makefile
 | ||||
| @@ -20,5 +20,6 @@ obj-$(CONFIG_MPTCP_NETLINK) += mptcp_netlink.o
 | ||||
|  obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o | ||||
|  obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o | ||||
|  obj-$(CONFIG_MPTCP_BLEST) += mptcp_blest.o | ||||
| +obj-$(CONFIG_MPTCP_ECF) += mptcp_ecf.o
 | ||||
|   | ||||
|  mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o | ||||
| diff --git a/net/mptcp/mptcp_ecf.c b/net/mptcp/mptcp_ecf.c
 | ||||
| new file mode 100644 | ||||
| index 000000000000..e0bd430a8943
 | ||||
| --- /dev/null
 | ||||
| +++ b/net/mptcp/mptcp_ecf.c
 | ||||
| @@ -0,0 +1,384 @@
 | ||||
| +// SPDX-License-Identifier: GPL-2.0
 | ||||
| +/*	MPTCP ECF Scheduler
 | ||||
| + *
 | ||||
| + *	Algorithm Design:
 | ||||
| + *	Yeon-sup Lim <ylim@cs.umass.edu>
 | ||||
| + *	Don Towsley <towsley@cs.umass.edu>
 | ||||
| + *	Erich M. Nahum <nahum@us.ibm.com>
 | ||||
| + *	Richard J. Gibbens <richard.gibbens@cl.cam.ac.uk>
 | ||||
| + *
 | ||||
| + *	Initial Implementation:
 | ||||
| + *	Yeon-sup Lim <ylim@cs.umass.edu>
 | ||||
| + *
 | ||||
| + *	Additional Authors:
 | ||||
| + *	Daniel Weber <weberd@cs.uni-bonn.de>
 | ||||
| + *
 | ||||
| + *	This program is free software; you can redistribute it and/or
 | ||||
| + *	modify it under the terms of the GNU General Public License
 | ||||
| + *	as published by the Free Software Foundation; either version
 | ||||
| + *	2 of the License, or (at your option) any later version.
 | ||||
| + */
 | ||||
| +
 | ||||
| +#include <linux/module.h>
 | ||||
| +#include <net/mptcp.h>
 | ||||
| +#include <trace/events/tcp.h>
 | ||||
| +
 | ||||
| +static unsigned int mptcp_ecf_r_beta __read_mostly = 4; /* beta = 1/r_beta = 0.25 */
 | ||||
| +module_param(mptcp_ecf_r_beta, int, 0644);
 | ||||
| +MODULE_PARM_DESC(mptcp_ecf_r_beta, "beta for ECF");
 | ||||
| +
 | ||||
| +struct ecfsched_priv {
 | ||||
| +	u32 last_rbuf_opti;
 | ||||
| +};
 | ||||
| +
 | ||||
| +struct ecfsched_cb {
 | ||||
| +	u32 switching_margin; /* this is "waiting" in algorithm description */
 | ||||
| +};
 | ||||
| +
 | ||||
| +static struct ecfsched_priv *ecfsched_get_priv(const struct tcp_sock *tp)
 | ||||
| +{
 | ||||
| +	return (struct ecfsched_priv *)&tp->mptcp->mptcp_sched[0];
 | ||||
| +}
 | ||||
| +
 | ||||
| +static struct ecfsched_cb *ecfsched_get_cb(const struct tcp_sock *tp)
 | ||||
| +{
 | ||||
| +	return (struct ecfsched_cb *)&tp->mpcb->mptcp_sched[0];
 | ||||
| +}
 | ||||
| +
 | ||||
| +/* This is the ECF scheduler. This function decides on which flow to send
 | ||||
| + * a given MSS. If all subflows are found to be busy or the currently best
 | ||||
| + * subflow is estimated to be slower than waiting for minsk, NULL is returned.
 | ||||
| + */
 | ||||
| +static struct sock *ecf_get_available_subflow(struct sock *meta_sk,
 | ||||
| +					      struct sk_buff *skb,
 | ||||
| +					      bool zero_wnd_test)
 | ||||
| +{
 | ||||
| +	struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
 | ||||
| +	struct sock *bestsk, *minsk = NULL;
 | ||||
| +	struct tcp_sock *besttp;
 | ||||
| +	struct mptcp_tcp_sock *mptcp;
 | ||||
| +	struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(meta_sk));
 | ||||
| +	u32 min_srtt = U32_MAX;
 | ||||
| +	u32 sub_sndbuf = 0;
 | ||||
| +	u32 sub_packets_out = 0;
 | ||||
| +
 | ||||
| +	/* Answer data_fin on same subflow!!! */
 | ||||
| +	if (meta_sk->sk_shutdown & RCV_SHUTDOWN &&
 | ||||
| +	    skb && mptcp_is_data_fin(skb)) {
 | ||||
| +		mptcp_for_each_sub(mpcb, mptcp) {
 | ||||
| +			bestsk = mptcp_to_sock(mptcp);
 | ||||
| +
 | ||||
| +			if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index &&
 | ||||
| +			    mptcp_is_available(bestsk, skb, zero_wnd_test))
 | ||||
| +				return bestsk;
 | ||||
| +		}
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	/* First, find the overall best (fastest) subflow */
 | ||||
| +	mptcp_for_each_sub(mpcb, mptcp) {
 | ||||
| +		bestsk = mptcp_to_sock(mptcp);
 | ||||
| +		besttp = tcp_sk(bestsk);
 | ||||
| +
 | ||||
| +		/* Set of states for which we are allowed to send data */
 | ||||
| +		if (!mptcp_sk_can_send(bestsk))
 | ||||
| +			continue;
 | ||||
| +
 | ||||
| +		/* We do not send data on this subflow unless it is
 | ||||
| +		 * fully established, i.e. the 4th ack has been received.
 | ||||
| +		 */
 | ||||
| +		if (besttp->mptcp->pre_established)
 | ||||
| +			continue;
 | ||||
| +
 | ||||
| +		sub_sndbuf += bestsk->sk_wmem_queued;
 | ||||
| +		sub_packets_out += besttp->packets_out;
 | ||||
| +
 | ||||
| +		/* record minimal rtt */
 | ||||
| +		if (besttp->srtt_us < min_srtt) {
 | ||||
| +			min_srtt = besttp->srtt_us;
 | ||||
| +			minsk = bestsk;
 | ||||
| +		}
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	/* find the current best subflow according to the default scheduler */
 | ||||
| +	bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test);
 | ||||
| +
 | ||||
| +	/* if we decided to use a slower flow, we have the option of not using it at all */
 | ||||
| +	if (bestsk && minsk && bestsk != minsk) {
 | ||||
| +		u32 mss = tcp_current_mss(bestsk); /* assuming equal MSS */
 | ||||
| +		u32 sndbuf_meta = meta_sk->sk_wmem_queued;
 | ||||
| +		u32 sndbuf_minus = sub_sndbuf;
 | ||||
| +		u32 sndbuf = 0;
 | ||||
| +
 | ||||
| +		u32 cwnd_f = tcp_sk(minsk)->snd_cwnd;
 | ||||
| +		u32 srtt_f = tcp_sk(minsk)->srtt_us >> 3;
 | ||||
| +		u32 rttvar_f = tcp_sk(minsk)->rttvar_us >> 1;
 | ||||
| +
 | ||||
| +		u32 cwnd_s = tcp_sk(bestsk)->snd_cwnd;
 | ||||
| +		u32 srtt_s = tcp_sk(bestsk)->srtt_us >> 3;
 | ||||
| +		u32 rttvar_s = tcp_sk(bestsk)->rttvar_us >> 1;
 | ||||
| +
 | ||||
| +		u32 delta = max(rttvar_f, rttvar_s);
 | ||||
| +
 | ||||
| +		u32 x_f;
 | ||||
| +		u64 lhs, rhs; /* to avoid overflow, using u64 */
 | ||||
| +
 | ||||
| +		if (tcp_sk(meta_sk)->packets_out > sub_packets_out)
 | ||||
| +			sndbuf_minus += (tcp_sk(meta_sk)->packets_out - sub_packets_out) * mss;
 | ||||
| +
 | ||||
| +		if (sndbuf_meta > sndbuf_minus)
 | ||||
| +			sndbuf = sndbuf_meta - sndbuf_minus;
 | ||||
| +
 | ||||
| +		/* we have something to send.
 | ||||
| +		 * at least one time tx over fastest subflow is required
 | ||||
| +		 */
 | ||||
| +		x_f = sndbuf > cwnd_f * mss ? sndbuf : cwnd_f * mss;
 | ||||
| +		lhs = srtt_f * (x_f + cwnd_f * mss);
 | ||||
| +		rhs = cwnd_f * mss * (srtt_s + delta);
 | ||||
| +
 | ||||
| +		if (mptcp_ecf_r_beta * lhs < mptcp_ecf_r_beta * rhs + ecf_cb->switching_margin * rhs) {
 | ||||
| +			u32 x_s = sndbuf > cwnd_s * mss ? sndbuf : cwnd_s * mss;
 | ||||
| +			u64 lhs_s = srtt_s * x_s;
 | ||||
| +			u64 rhs_s = cwnd_s * mss * (2 * srtt_f + delta);
 | ||||
| +
 | ||||
| +			if (lhs_s >= rhs_s) {
 | ||||
| +				/* too slower than fastest */
 | ||||
| +				ecf_cb->switching_margin = 1;
 | ||||
| +				return NULL;
 | ||||
| +			}
 | ||||
| +		} else {
 | ||||
| +			/* use slower one */
 | ||||
| +			ecf_cb->switching_margin = 0;
 | ||||
| +		}
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	return bestsk;
 | ||||
| +}
 | ||||
| +
 | ||||
| +/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */
 | ||||
| +static struct sk_buff *mptcp_ecf_rcv_buf_optimization(struct sock *sk, int penal)
 | ||||
| +{
 | ||||
| +	struct sock *meta_sk;
 | ||||
| +	const struct tcp_sock *tp = tcp_sk(sk);
 | ||||
| +	struct mptcp_tcp_sock *mptcp;
 | ||||
| +	struct sk_buff *skb_head;
 | ||||
| +	struct ecfsched_priv *ecf_p = ecfsched_get_priv(tp);
 | ||||
| +
 | ||||
| +	meta_sk = mptcp_meta_sk(sk);
 | ||||
| +	skb_head = tcp_rtx_queue_head(meta_sk);
 | ||||
| +
 | ||||
| +	if (!skb_head)
 | ||||
| +		return NULL;
 | ||||
| +
 | ||||
| +	/* If penalization is optional (coming from mptcp_next_segment() and
 | ||||
| +	 * We are not send-buffer-limited we do not penalize. The retransmission
 | ||||
| +	 * is just an optimization to fix the idle-time due to the delay before
 | ||||
| +	 * we wake up the application.
 | ||||
| +	 */
 | ||||
| +	if (!penal && sk_stream_memory_free(meta_sk))
 | ||||
| +		goto retrans;
 | ||||
| +
 | ||||
| +	/* Only penalize again after an RTT has elapsed */
 | ||||
| +	if (tcp_jiffies32 - ecf_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3))
 | ||||
| +		goto retrans;
 | ||||
| +
 | ||||
| +	/* Half the cwnd of the slow flows */
 | ||||
| +	mptcp_for_each_sub(tp->mpcb, mptcp) {
 | ||||
| +		struct tcp_sock *tp_it = mptcp->tp;
 | ||||
| +
 | ||||
| +		if (tp_it != tp &&
 | ||||
| +		    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
 | ||||
| +			if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
 | ||||
| +				u32 prior_cwnd = tp_it->snd_cwnd;
 | ||||
| +
 | ||||
| +				tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
 | ||||
| +
 | ||||
| +				/* If in slow start, do not reduce the ssthresh */
 | ||||
| +				if (prior_cwnd >= tp_it->snd_ssthresh)
 | ||||
| +					tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
 | ||||
| +
 | ||||
| +				ecf_p->last_rbuf_opti = tcp_jiffies32;
 | ||||
| +			}
 | ||||
| +		}
 | ||||
| +	}
 | ||||
| +
 | ||||
| +retrans:
 | ||||
| +
 | ||||
| +	/* Segment not yet injected into this path? Take it!!! */
 | ||||
| +	if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
 | ||||
| +		bool do_retrans = false;
 | ||||
| +		mptcp_for_each_sub(tp->mpcb, mptcp) {
 | ||||
| +			struct tcp_sock *tp_it = mptcp->tp;
 | ||||
| +
 | ||||
| +			if (tp_it != tp &&
 | ||||
| +			    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
 | ||||
| +				if (tp_it->snd_cwnd <= 4) {
 | ||||
| +					do_retrans = true;
 | ||||
| +					break;
 | ||||
| +				}
 | ||||
| +
 | ||||
| +				if (4 * tp->srtt_us >= tp_it->srtt_us) {
 | ||||
| +					do_retrans = false;
 | ||||
| +					break;
 | ||||
| +				} else {
 | ||||
| +					do_retrans = true;
 | ||||
| +				}
 | ||||
| +			}
 | ||||
| +		}
 | ||||
| +
 | ||||
| +		if (do_retrans && mptcp_is_available(sk, skb_head, false)) {
 | ||||
| +			trace_mptcp_retransmit(sk, skb_head);
 | ||||
| +			return skb_head;
 | ||||
| +		}
 | ||||
| +	}
 | ||||
| +	return NULL;
 | ||||
| +}
 | ||||
| +
 | ||||
| +/* copy from mptcp_sched.c: __mptcp_next_segment */
 | ||||
| +/* Returns the next segment to be sent from the mptcp meta-queue.
 | ||||
| + * (chooses the reinject queue if any segment is waiting in it, otherwise,
 | ||||
| + * chooses the normal write queue).
 | ||||
| + * Sets *@reinject to 1 if the returned segment comes from the
 | ||||
| + * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
 | ||||
| + * and sets it to -1 if it is a meta-level retransmission to optimize the
 | ||||
| + * receive-buffer.
 | ||||
| + */
 | ||||
| +static struct sk_buff *__mptcp_ecf_next_segment(struct sock *meta_sk, int *reinject)
 | ||||
| +{
 | ||||
| +	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
 | ||||
| +	struct sk_buff *skb = NULL;
 | ||||
| +
 | ||||
| +	*reinject = 0;
 | ||||
| +
 | ||||
| +	/* If we are in fallback-mode, just take from the meta-send-queue */
 | ||||
| +	if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
 | ||||
| +		return tcp_send_head(meta_sk);
 | ||||
| +
 | ||||
| +	skb = skb_peek(&mpcb->reinject_queue);
 | ||||
| +
 | ||||
| +	if (skb) {
 | ||||
| +		*reinject = 1;
 | ||||
| +	} else {
 | ||||
| +		skb = tcp_send_head(meta_sk);
 | ||||
| +
 | ||||
| +		if (!skb && meta_sk->sk_socket &&
 | ||||
| +		    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
 | ||||
| +		    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
 | ||||
| +			struct sock *subsk = ecf_get_available_subflow(meta_sk, NULL,
 | ||||
| +								   false);
 | ||||
| +			if (!subsk)
 | ||||
| +				return NULL;
 | ||||
| +
 | ||||
| +			skb = mptcp_ecf_rcv_buf_optimization(subsk, 0);
 | ||||
| +			if (skb)
 | ||||
| +				*reinject = -1;
 | ||||
| +		}
 | ||||
| +	}
 | ||||
| +	return skb;
 | ||||
| +}
 | ||||
| +
 | ||||
| +/* copy from mptcp_sched.c: mptcp_next_segment */
 | ||||
| +static struct sk_buff *mptcp_ecf_next_segment(struct sock *meta_sk,
 | ||||
| +					      int *reinject,
 | ||||
| +					      struct sock **subsk,
 | ||||
| +					      unsigned int *limit)
 | ||||
| +{
 | ||||
| +	struct sk_buff *skb = __mptcp_ecf_next_segment(meta_sk, reinject);
 | ||||
| +	unsigned int mss_now;
 | ||||
| +	struct tcp_sock *subtp;
 | ||||
| +	u16 gso_max_segs;
 | ||||
| +	u32 max_len, max_segs, window, needed;
 | ||||
| +
 | ||||
| +	/* As we set it, we have to reset it as well. */
 | ||||
| +	*limit = 0;
 | ||||
| +
 | ||||
| +	if (!skb)
 | ||||
| +		return NULL;
 | ||||
| +
 | ||||
| +	*subsk = ecf_get_available_subflow(meta_sk, skb, false);
 | ||||
| +	if (!*subsk)
 | ||||
| +		return NULL;
 | ||||
| +
 | ||||
| +	subtp = tcp_sk(*subsk);
 | ||||
| +	mss_now = tcp_current_mss(*subsk);
 | ||||
| +
 | ||||
| +	if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) {
 | ||||
| +		skb = mptcp_ecf_rcv_buf_optimization(*subsk, 1);
 | ||||
| +		if (skb)
 | ||||
| +			*reinject = -1;
 | ||||
| +		else
 | ||||
| +			return NULL;
 | ||||
| +	}
 | ||||
| +
 | ||||
| +	/* No splitting required, as we will only send one single segment */
 | ||||
| +	if (skb->len <= mss_now)
 | ||||
| +		return skb;
 | ||||
| +
 | ||||
| +	/* The following is similar to tcp_mss_split_point, but
 | ||||
| +	 * we do not care about nagle, because we will anyways
 | ||||
| +	 * use TCP_NAGLE_PUSH, which overrides this.
 | ||||
| +	 *
 | ||||
| +	 * So, we first limit according to the cwnd/gso-size and then according
 | ||||
| +	 * to the subflow's window.
 | ||||
| +	 */
 | ||||
| +
 | ||||
| +	gso_max_segs = (*subsk)->sk_gso_max_segs;
 | ||||
| +	if (!gso_max_segs) /* No gso supported on the subflow's NIC */
 | ||||
| +		gso_max_segs = 1;
 | ||||
| +	max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
 | ||||
| +	if (!max_segs)
 | ||||
| +		return NULL;
 | ||||
| +
 | ||||
| +	max_len = mss_now * max_segs;
 | ||||
| +	window = tcp_wnd_end(subtp) - subtp->write_seq;
 | ||||
| +
 | ||||
| +	needed = min(skb->len, window);
 | ||||
| +	if (max_len <= skb->len)
 | ||||
| +		/* Take max_win, which is actually the cwnd/gso-size */
 | ||||
| +		*limit = max_len;
 | ||||
| +	else
 | ||||
| +		/* Or, take the window */
 | ||||
| +		*limit = needed;
 | ||||
| +
 | ||||
| +	return skb;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void ecfsched_init(struct sock *sk)
 | ||||
| +{
 | ||||
| +	struct ecfsched_priv *ecf_p = ecfsched_get_priv(tcp_sk(sk));
 | ||||
| +	struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(mptcp_meta_sk(sk)));
 | ||||
| +
 | ||||
| +	ecf_p->last_rbuf_opti = tcp_jiffies32;
 | ||||
| +	ecf_cb->switching_margin = 0;
 | ||||
| +}
 | ||||
| +
 | ||||
| +struct mptcp_sched_ops mptcp_sched_ecf = {
 | ||||
| +	.get_subflow = ecf_get_available_subflow,
 | ||||
| +	.next_segment = mptcp_ecf_next_segment,
 | ||||
| +	.init = ecfsched_init,
 | ||||
| +	.name = "ecf",
 | ||||
| +	.owner = THIS_MODULE,
 | ||||
| +};
 | ||||
| +
 | ||||
| +static int __init ecf_register(void)
 | ||||
| +{
 | ||||
| +	BUILD_BUG_ON(sizeof(struct ecfsched_priv) > MPTCP_SCHED_SIZE);
 | ||||
| +	BUILD_BUG_ON(sizeof(struct ecfsched_cb) > MPTCP_SCHED_DATA_SIZE);
 | ||||
| +
 | ||||
| +	if (mptcp_register_scheduler(&mptcp_sched_ecf))
 | ||||
| +		return -1;
 | ||||
| +
 | ||||
| +	return 0;
 | ||||
| +}
 | ||||
| +
 | ||||
| +static void ecf_unregister(void)
 | ||||
| +{
 | ||||
| +	mptcp_unregister_scheduler(&mptcp_sched_ecf);
 | ||||
| +}
 | ||||
| +
 | ||||
| +module_init(ecf_register);
 | ||||
| +module_exit(ecf_unregister);
 | ||||
| +
 | ||||
| +MODULE_AUTHOR("Yeon-sup Lim, Daniel Weber");
 | ||||
| +MODULE_LICENSE("GPL");
 | ||||
| +MODULE_DESCRIPTION("ECF (Earliest Completion First) scheduler for MPTCP, based on default minimum RTT scheduler");
 | ||||
| +MODULE_VERSION("0.95");
 | ||||
| 
 | ||||
| From 5a9641c84cbb5a49749d7533c20035631985dbe7 Mon Sep 17 00:00:00 2001 | ||||
| From: Daniel Weber <weberdaniel@gmx.net> | ||||
| Date: Mon, 9 Mar 2020 11:00:23 +0100 | ||||
| Subject: [PATCH 2/3] mptcp: Reduce code-duplication for other schedulers | ||||
| 
 | ||||
| 'mptcp_next_segment' now honors the function pointer to the actual part | ||||
| that makes the scheduling decision in 'sched_ops->get_subflow'. This | ||||
| allows for a better reuse by other schedulers. | ||||
| 
 | ||||
| The BLEST scheduler needs to adapt the direction of lambda value change | ||||
| depending on the occurrence of a retransmission. In order to remove the | ||||
| copied 'mptcp_rcv_buf_optimization' as well the scheduler now checks the | ||||
| tcp 'retrans_stamp' of the meta socket. | ||||
| 
 | ||||
| Signed-off-by: Daniel Weber <weberdaniel@gmx.net> | ||||
| ---
 | ||||
|  include/net/mptcp.h     |   4 + | ||||
|  net/mptcp/mptcp_blest.c | 200 +--------------------------------------- | ||||
|  net/mptcp/mptcp_sched.c |   9 +- | ||||
|  3 files changed, 11 insertions(+), 202 deletions(-) | ||||
| 
 | ||||
| diff --git a/include/net/mptcp.h b/include/net/mptcp.h
 | ||||
| index 02312c9ea3a3..82f66ce206cc 100644
 | ||||
| --- a/include/net/mptcp.h
 | ||||
| +++ b/include/net/mptcp.h
 | ||||
| @@ -902,6 +902,10 @@ bool subflow_is_active(const struct tcp_sock *tp);
 | ||||
|  bool subflow_is_backup(const struct tcp_sock *tp); | ||||
|  struct sock *get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, | ||||
|  				   bool zero_wnd_test); | ||||
| +struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
 | ||||
| +				   int *reinject,
 | ||||
| +				   struct sock **subsk,
 | ||||
| +				   unsigned int *limit);
 | ||||
|  extern struct mptcp_sched_ops mptcp_sched_default; | ||||
|   | ||||
|  /* Initializes function-pointers and MPTCP-flags */ | ||||
| diff --git a/net/mptcp/mptcp_blest.c b/net/mptcp/mptcp_blest.c
 | ||||
| index 40905a0d1fe5..22e25dd0d44e 100644
 | ||||
| --- a/net/mptcp/mptcp_blest.c
 | ||||
| +++ b/net/mptcp/mptcp_blest.c
 | ||||
| @@ -21,7 +21,6 @@
 | ||||
|   | ||||
|  #include <linux/module.h> | ||||
|  #include <net/mptcp.h> | ||||
| -#include <trace/events/tcp.h>
 | ||||
|   | ||||
|  static unsigned char lambda __read_mostly = 12; | ||||
|  module_param(lambda, byte, 0644); | ||||
| @@ -50,7 +49,6 @@ struct blestsched_priv {
 | ||||
|  }; | ||||
|   | ||||
|  struct blestsched_cb { | ||||
| -	bool retrans_flag;
 | ||||
|  	s16 lambda_1000; /* values range from min_lambda * 100 to max_lambda * 100 */ | ||||
|  	u32 last_lambda_update; | ||||
|  }; | ||||
| @@ -77,14 +75,13 @@ static void blestsched_update_lambda(struct sock *meta_sk, struct sock *sk)
 | ||||
|  	 * during the slow flows last RTT => increase lambda | ||||
|  	 * otherwise decrease | ||||
|  	 */ | ||||
| -	if (blest_cb->retrans_flag) {
 | ||||
| +	if (tcp_sk(meta_sk)->retrans_stamp) {
 | ||||
|  		/* need to slow down on the slow flow */ | ||||
|  		blest_cb->lambda_1000 += dyn_lambda_bad; | ||||
|  	} else { | ||||
|  		/* use the slow flow more */ | ||||
|  		blest_cb->lambda_1000 -= dyn_lambda_good; | ||||
|  	} | ||||
| -	blest_cb->retrans_flag = false;
 | ||||
|   | ||||
|  	/* cap lambda_1000 to its value range */ | ||||
|  	blest_cb->lambda_1000 = min_t(s16, blest_cb->lambda_1000, max_lambda * 100); | ||||
| @@ -240,199 +237,6 @@ struct sock *blest_get_available_subflow(struct sock *meta_sk, struct sk_buff *s
 | ||||
|  	return bestsk; | ||||
|  } | ||||
|   | ||||
| -/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */
 | ||||
| -static struct sk_buff *mptcp_blest_rcv_buf_optimization(struct sock *sk, int penal)
 | ||||
| -{
 | ||||
| -	struct sock *meta_sk;
 | ||||
| -	const struct tcp_sock *tp = tcp_sk(sk);
 | ||||
| -	struct mptcp_tcp_sock *mptcp;
 | ||||
| -	struct sk_buff *skb_head;
 | ||||
| -	struct blestsched_priv *blest_p = blestsched_get_priv(tp);
 | ||||
| -	struct blestsched_cb *blest_cb;
 | ||||
| -
 | ||||
| -	meta_sk = mptcp_meta_sk(sk);
 | ||||
| -	skb_head = tcp_rtx_queue_head(meta_sk);
 | ||||
| -
 | ||||
| -	if (!skb_head)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	/* If penalization is optional (coming from mptcp_next_segment() and
 | ||||
| -	 * We are not send-buffer-limited we do not penalize. The retransmission
 | ||||
| -	 * is just an optimization to fix the idle-time due to the delay before
 | ||||
| -	 * we wake up the application.
 | ||||
| -	 */
 | ||||
| -	if (!penal && sk_stream_memory_free(meta_sk))
 | ||||
| -		goto retrans;
 | ||||
| -
 | ||||
| -	/* Record the occurrence of a retransmission to update the lambda value */
 | ||||
| -	blest_cb = blestsched_get_cb(tcp_sk(meta_sk));
 | ||||
| -	blest_cb->retrans_flag = true;
 | ||||
| -
 | ||||
| -	/* Only penalize again after an RTT has elapsed */
 | ||||
| -	if (tcp_jiffies32 - blest_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3))
 | ||||
| -		goto retrans;
 | ||||
| -
 | ||||
| -	/* Half the cwnd of the slow flows */
 | ||||
| -	mptcp_for_each_sub(tp->mpcb, mptcp) {
 | ||||
| -		struct tcp_sock *tp_it = mptcp->tp;
 | ||||
| -
 | ||||
| -		if (tp_it != tp &&
 | ||||
| -		    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
 | ||||
| -			if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
 | ||||
| -				u32 prior_cwnd = tp_it->snd_cwnd;
 | ||||
| -
 | ||||
| -				tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
 | ||||
| -
 | ||||
| -				/* If in slow start, do not reduce the ssthresh */
 | ||||
| -				if (prior_cwnd >= tp_it->snd_ssthresh)
 | ||||
| -					tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
 | ||||
| -
 | ||||
| -				blest_p->last_rbuf_opti = tcp_jiffies32;
 | ||||
| -			}
 | ||||
| -		}
 | ||||
| -	}
 | ||||
| -
 | ||||
| -retrans:
 | ||||
| -
 | ||||
| -	/* Segment not yet injected into this path? Take it!!! */
 | ||||
| -	if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
 | ||||
| -		bool do_retrans = false;
 | ||||
| -		mptcp_for_each_sub(tp->mpcb, mptcp) {
 | ||||
| -			struct tcp_sock *tp_it = mptcp->tp;
 | ||||
| -
 | ||||
| -			if (tp_it != tp &&
 | ||||
| -			    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
 | ||||
| -				if (tp_it->snd_cwnd <= 4) {
 | ||||
| -					do_retrans = true;
 | ||||
| -					break;
 | ||||
| -				}
 | ||||
| -
 | ||||
| -				if (4 * tp->srtt_us >= tp_it->srtt_us) {
 | ||||
| -					do_retrans = false;
 | ||||
| -					break;
 | ||||
| -				} else {
 | ||||
| -					do_retrans = true;
 | ||||
| -				}
 | ||||
| -			}
 | ||||
| -		}
 | ||||
| -
 | ||||
| -		if (do_retrans && mptcp_is_available(sk, skb_head, false)) {
 | ||||
| -			trace_mptcp_retransmit(sk, skb_head);
 | ||||
| -			return skb_head;
 | ||||
| -		}
 | ||||
| -	}
 | ||||
| -	return NULL;
 | ||||
| -}
 | ||||
| -
 | ||||
| -/* copy from mptcp_sched.c: __mptcp_next_segment */
 | ||||
| -/* Returns the next segment to be sent from the mptcp meta-queue.
 | ||||
| - * (chooses the reinject queue if any segment is waiting in it, otherwise,
 | ||||
| - * chooses the normal write queue).
 | ||||
| - * Sets *@reinject to 1 if the returned segment comes from the
 | ||||
| - * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
 | ||||
| - * and sets it to -1 if it is a meta-level retransmission to optimize the
 | ||||
| - * receive-buffer.
 | ||||
| - */
 | ||||
| -static struct sk_buff *__mptcp_blest_next_segment(struct sock *meta_sk, int *reinject)
 | ||||
| -{
 | ||||
| -	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
 | ||||
| -	struct sk_buff *skb = NULL;
 | ||||
| -
 | ||||
| -	*reinject = 0;
 | ||||
| -
 | ||||
| -	/* If we are in fallback-mode, just take from the meta-send-queue */
 | ||||
| -	if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
 | ||||
| -		return tcp_send_head(meta_sk);
 | ||||
| -
 | ||||
| -	skb = skb_peek(&mpcb->reinject_queue);
 | ||||
| -
 | ||||
| -	if (skb) {
 | ||||
| -		*reinject = 1;
 | ||||
| -	} else {
 | ||||
| -		skb = tcp_send_head(meta_sk);
 | ||||
| -
 | ||||
| -		if (!skb && meta_sk->sk_socket &&
 | ||||
| -		    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
 | ||||
| -		    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
 | ||||
| -			struct sock *subsk = blest_get_available_subflow(meta_sk, NULL,
 | ||||
| -									 false);
 | ||||
| -			if (!subsk)
 | ||||
| -				return NULL;
 | ||||
| -
 | ||||
| -			skb = mptcp_blest_rcv_buf_optimization(subsk, 0);
 | ||||
| -			if (skb)
 | ||||
| -				*reinject = -1;
 | ||||
| -		}
 | ||||
| -	}
 | ||||
| -	return skb;
 | ||||
| -}
 | ||||
| -
 | ||||
| -/* copy from mptcp_sched.c: mptcp_next_segment */
 | ||||
| -static struct sk_buff *mptcp_blest_next_segment(struct sock *meta_sk,
 | ||||
| -						int *reinject,
 | ||||
| -						struct sock **subsk,
 | ||||
| -						unsigned int *limit)
 | ||||
| -{
 | ||||
| -	struct sk_buff *skb = __mptcp_blest_next_segment(meta_sk, reinject);
 | ||||
| -	unsigned int mss_now;
 | ||||
| -	struct tcp_sock *subtp;
 | ||||
| -	u16 gso_max_segs;
 | ||||
| -	u32 max_len, max_segs, window, needed;
 | ||||
| -
 | ||||
| -	/* As we set it, we have to reset it as well. */
 | ||||
| -	*limit = 0;
 | ||||
| -
 | ||||
| -	if (!skb)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	*subsk = blest_get_available_subflow(meta_sk, skb, false);
 | ||||
| -	if (!*subsk)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	subtp = tcp_sk(*subsk);
 | ||||
| -	mss_now = tcp_current_mss(*subsk);
 | ||||
| -
 | ||||
| -	if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) {
 | ||||
| -		skb = mptcp_blest_rcv_buf_optimization(*subsk, 1);
 | ||||
| -		if (skb)
 | ||||
| -			*reinject = -1;
 | ||||
| -		else
 | ||||
| -			return NULL;
 | ||||
| -	}
 | ||||
| -
 | ||||
| -	/* No splitting required, as we will only send one single segment */
 | ||||
| -	if (skb->len <= mss_now)
 | ||||
| -		return skb;
 | ||||
| -
 | ||||
| -	/* The following is similar to tcp_mss_split_point, but
 | ||||
| -	 * we do not care about nagle, because we will anyways
 | ||||
| -	 * use TCP_NAGLE_PUSH, which overrides this.
 | ||||
| -	 *
 | ||||
| -	 * So, we first limit according to the cwnd/gso-size and then according
 | ||||
| -	 * to the subflow's window.
 | ||||
| -	 */
 | ||||
| -
 | ||||
| -	gso_max_segs = (*subsk)->sk_gso_max_segs;
 | ||||
| -	if (!gso_max_segs) /* No gso supported on the subflow's NIC */
 | ||||
| -		gso_max_segs = 1;
 | ||||
| -	max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
 | ||||
| -	if (!max_segs)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	max_len = mss_now * max_segs;
 | ||||
| -	window = tcp_wnd_end(subtp) - subtp->write_seq;
 | ||||
| -
 | ||||
| -	needed = min(skb->len, window);
 | ||||
| -	if (max_len <= skb->len)
 | ||||
| -		/* Take max_win, which is actually the cwnd/gso-size */
 | ||||
| -		*limit = max_len;
 | ||||
| -	else
 | ||||
| -		/* Or, take the window */
 | ||||
| -		*limit = needed;
 | ||||
| -
 | ||||
| -	return skb;
 | ||||
| -}
 | ||||
| -
 | ||||
|  static void blestsched_init(struct sock *sk) | ||||
|  { | ||||
|  	struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk)); | ||||
| @@ -450,7 +254,7 @@ static void blestsched_init(struct sock *sk)
 | ||||
|   | ||||
|  static struct mptcp_sched_ops mptcp_sched_blest = { | ||||
|  	.get_subflow = blest_get_available_subflow, | ||||
| -	.next_segment = mptcp_blest_next_segment,
 | ||||
| +	.next_segment = mptcp_next_segment,
 | ||||
|  	.init = blestsched_init, | ||||
|  	.name = "blest", | ||||
|  	.owner = THIS_MODULE, | ||||
| diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c
 | ||||
| index 18c3559b0d48..5bf2946a5caf 100644
 | ||||
| --- a/net/mptcp/mptcp_sched.c
 | ||||
| +++ b/net/mptcp/mptcp_sched.c
 | ||||
| @@ -372,8 +372,8 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject)
 | ||||
|  		if (!skb && meta_sk->sk_socket && | ||||
|  		    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && | ||||
|  		    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { | ||||
| -			struct sock *subsk = get_available_subflow(meta_sk, NULL,
 | ||||
| -								   false);
 | ||||
| +			struct sock *subsk = mpcb->sched_ops->get_subflow(meta_sk, NULL,
 | ||||
| +									  false);
 | ||||
|  			if (!subsk) | ||||
|  				return NULL; | ||||
|   | ||||
| @@ -385,7 +385,7 @@ static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject)
 | ||||
|  	return skb; | ||||
|  } | ||||
|   | ||||
| -static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
 | ||||
| +struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
 | ||||
|  					  int *reinject, | ||||
|  					  struct sock **subsk, | ||||
|  					  unsigned int *limit) | ||||
| @@ -402,7 +402,7 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
 | ||||
|  	if (!skb) | ||||
|  		return NULL; | ||||
|   | ||||
| -	*subsk = get_available_subflow(meta_sk, skb, false);
 | ||||
| +	*subsk = tcp_sk(meta_sk)->mpcb->sched_ops->get_subflow(meta_sk, skb, false);
 | ||||
|  	if (!*subsk) | ||||
|  		return NULL; | ||||
|   | ||||
| @@ -449,6 +449,7 @@ static struct sk_buff *mptcp_next_segment(struct sock *meta_sk,
 | ||||
|   | ||||
|  	return skb; | ||||
|  } | ||||
| +EXPORT_SYMBOL_GPL(mptcp_next_segment);
 | ||||
|   | ||||
|  static void defsched_init(struct sock *sk) | ||||
|  { | ||||
| 
 | ||||
| From 5e8425e43b38e7e0fe566ffd50e197c07807ebdf Mon Sep 17 00:00:00 2001 | ||||
| From: Daniel Weber <weberdaniel@gmx.net> | ||||
| Date: Mon, 9 Mar 2020 11:09:27 +0100 | ||||
| Subject: [PATCH 3/3] mptcp: Remove code-duplication from ECF scheduler | ||||
| 
 | ||||
| The ECF scheduler relies on large parts of the default scheduler. This | ||||
| commit removes the copied blocks and reuses 'mptcp_next_segment' and | ||||
| 'mptcp_rcv_buf_optimization' directly from it via function pointers. | ||||
| 
 | ||||
| Signed-off-by: Daniel Weber <weberdaniel@gmx.net> | ||||
| ---
 | ||||
|  net/mptcp/mptcp_ecf.c | 191 +----------------------------------------- | ||||
|  1 file changed, 1 insertion(+), 190 deletions(-) | ||||
| 
 | ||||
| diff --git a/net/mptcp/mptcp_ecf.c b/net/mptcp/mptcp_ecf.c
 | ||||
| index e0bd430a8943..6b976b2b0c72 100644
 | ||||
| --- a/net/mptcp/mptcp_ecf.c
 | ||||
| +++ b/net/mptcp/mptcp_ecf.c
 | ||||
| @@ -21,7 +21,6 @@
 | ||||
|   | ||||
|  #include <linux/module.h> | ||||
|  #include <net/mptcp.h> | ||||
| -#include <trace/events/tcp.h>
 | ||||
|   | ||||
|  static unsigned int mptcp_ecf_r_beta __read_mostly = 4; /* beta = 1/r_beta = 0.25 */ | ||||
|  module_param(mptcp_ecf_r_beta, int, 0644); | ||||
| @@ -154,194 +153,6 @@ static struct sock *ecf_get_available_subflow(struct sock *meta_sk,
 | ||||
|  	return bestsk; | ||||
|  } | ||||
|   | ||||
| -/* copy from mptcp_sched.c: mptcp_rcv_buf_optimization */
 | ||||
| -static struct sk_buff *mptcp_ecf_rcv_buf_optimization(struct sock *sk, int penal)
 | ||||
| -{
 | ||||
| -	struct sock *meta_sk;
 | ||||
| -	const struct tcp_sock *tp = tcp_sk(sk);
 | ||||
| -	struct mptcp_tcp_sock *mptcp;
 | ||||
| -	struct sk_buff *skb_head;
 | ||||
| -	struct ecfsched_priv *ecf_p = ecfsched_get_priv(tp);
 | ||||
| -
 | ||||
| -	meta_sk = mptcp_meta_sk(sk);
 | ||||
| -	skb_head = tcp_rtx_queue_head(meta_sk);
 | ||||
| -
 | ||||
| -	if (!skb_head)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	/* If penalization is optional (coming from mptcp_next_segment() and
 | ||||
| -	 * We are not send-buffer-limited we do not penalize. The retransmission
 | ||||
| -	 * is just an optimization to fix the idle-time due to the delay before
 | ||||
| -	 * we wake up the application.
 | ||||
| -	 */
 | ||||
| -	if (!penal && sk_stream_memory_free(meta_sk))
 | ||||
| -		goto retrans;
 | ||||
| -
 | ||||
| -	/* Only penalize again after an RTT has elapsed */
 | ||||
| -	if (tcp_jiffies32 - ecf_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3))
 | ||||
| -		goto retrans;
 | ||||
| -
 | ||||
| -	/* Half the cwnd of the slow flows */
 | ||||
| -	mptcp_for_each_sub(tp->mpcb, mptcp) {
 | ||||
| -		struct tcp_sock *tp_it = mptcp->tp;
 | ||||
| -
 | ||||
| -		if (tp_it != tp &&
 | ||||
| -		    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
 | ||||
| -			if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) {
 | ||||
| -				u32 prior_cwnd = tp_it->snd_cwnd;
 | ||||
| -
 | ||||
| -				tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U);
 | ||||
| -
 | ||||
| -				/* If in slow start, do not reduce the ssthresh */
 | ||||
| -				if (prior_cwnd >= tp_it->snd_ssthresh)
 | ||||
| -					tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U);
 | ||||
| -
 | ||||
| -				ecf_p->last_rbuf_opti = tcp_jiffies32;
 | ||||
| -			}
 | ||||
| -		}
 | ||||
| -	}
 | ||||
| -
 | ||||
| -retrans:
 | ||||
| -
 | ||||
| -	/* Segment not yet injected into this path? Take it!!! */
 | ||||
| -	if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) {
 | ||||
| -		bool do_retrans = false;
 | ||||
| -		mptcp_for_each_sub(tp->mpcb, mptcp) {
 | ||||
| -			struct tcp_sock *tp_it = mptcp->tp;
 | ||||
| -
 | ||||
| -			if (tp_it != tp &&
 | ||||
| -			    TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) {
 | ||||
| -				if (tp_it->snd_cwnd <= 4) {
 | ||||
| -					do_retrans = true;
 | ||||
| -					break;
 | ||||
| -				}
 | ||||
| -
 | ||||
| -				if (4 * tp->srtt_us >= tp_it->srtt_us) {
 | ||||
| -					do_retrans = false;
 | ||||
| -					break;
 | ||||
| -				} else {
 | ||||
| -					do_retrans = true;
 | ||||
| -				}
 | ||||
| -			}
 | ||||
| -		}
 | ||||
| -
 | ||||
| -		if (do_retrans && mptcp_is_available(sk, skb_head, false)) {
 | ||||
| -			trace_mptcp_retransmit(sk, skb_head);
 | ||||
| -			return skb_head;
 | ||||
| -		}
 | ||||
| -	}
 | ||||
| -	return NULL;
 | ||||
| -}
 | ||||
| -
 | ||||
| -/* copy from mptcp_sched.c: __mptcp_next_segment */
 | ||||
| -/* Returns the next segment to be sent from the mptcp meta-queue.
 | ||||
| - * (chooses the reinject queue if any segment is waiting in it, otherwise,
 | ||||
| - * chooses the normal write queue).
 | ||||
| - * Sets *@reinject to 1 if the returned segment comes from the
 | ||||
| - * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk,
 | ||||
| - * and sets it to -1 if it is a meta-level retransmission to optimize the
 | ||||
| - * receive-buffer.
 | ||||
| - */
 | ||||
| -static struct sk_buff *__mptcp_ecf_next_segment(struct sock *meta_sk, int *reinject)
 | ||||
| -{
 | ||||
| -	const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
 | ||||
| -	struct sk_buff *skb = NULL;
 | ||||
| -
 | ||||
| -	*reinject = 0;
 | ||||
| -
 | ||||
| -	/* If we are in fallback-mode, just take from the meta-send-queue */
 | ||||
| -	if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping)
 | ||||
| -		return tcp_send_head(meta_sk);
 | ||||
| -
 | ||||
| -	skb = skb_peek(&mpcb->reinject_queue);
 | ||||
| -
 | ||||
| -	if (skb) {
 | ||||
| -		*reinject = 1;
 | ||||
| -	} else {
 | ||||
| -		skb = tcp_send_head(meta_sk);
 | ||||
| -
 | ||||
| -		if (!skb && meta_sk->sk_socket &&
 | ||||
| -		    test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) &&
 | ||||
| -		    sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) {
 | ||||
| -			struct sock *subsk = ecf_get_available_subflow(meta_sk, NULL,
 | ||||
| -								   false);
 | ||||
| -			if (!subsk)
 | ||||
| -				return NULL;
 | ||||
| -
 | ||||
| -			skb = mptcp_ecf_rcv_buf_optimization(subsk, 0);
 | ||||
| -			if (skb)
 | ||||
| -				*reinject = -1;
 | ||||
| -		}
 | ||||
| -	}
 | ||||
| -	return skb;
 | ||||
| -}
 | ||||
| -
 | ||||
| -/* copy from mptcp_sched.c: mptcp_next_segment */
 | ||||
| -static struct sk_buff *mptcp_ecf_next_segment(struct sock *meta_sk,
 | ||||
| -					      int *reinject,
 | ||||
| -					      struct sock **subsk,
 | ||||
| -					      unsigned int *limit)
 | ||||
| -{
 | ||||
| -	struct sk_buff *skb = __mptcp_ecf_next_segment(meta_sk, reinject);
 | ||||
| -	unsigned int mss_now;
 | ||||
| -	struct tcp_sock *subtp;
 | ||||
| -	u16 gso_max_segs;
 | ||||
| -	u32 max_len, max_segs, window, needed;
 | ||||
| -
 | ||||
| -	/* As we set it, we have to reset it as well. */
 | ||||
| -	*limit = 0;
 | ||||
| -
 | ||||
| -	if (!skb)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	*subsk = ecf_get_available_subflow(meta_sk, skb, false);
 | ||||
| -	if (!*subsk)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	subtp = tcp_sk(*subsk);
 | ||||
| -	mss_now = tcp_current_mss(*subsk);
 | ||||
| -
 | ||||
| -	if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) {
 | ||||
| -		skb = mptcp_ecf_rcv_buf_optimization(*subsk, 1);
 | ||||
| -		if (skb)
 | ||||
| -			*reinject = -1;
 | ||||
| -		else
 | ||||
| -			return NULL;
 | ||||
| -	}
 | ||||
| -
 | ||||
| -	/* No splitting required, as we will only send one single segment */
 | ||||
| -	if (skb->len <= mss_now)
 | ||||
| -		return skb;
 | ||||
| -
 | ||||
| -	/* The following is similar to tcp_mss_split_point, but
 | ||||
| -	 * we do not care about nagle, because we will anyways
 | ||||
| -	 * use TCP_NAGLE_PUSH, which overrides this.
 | ||||
| -	 *
 | ||||
| -	 * So, we first limit according to the cwnd/gso-size and then according
 | ||||
| -	 * to the subflow's window.
 | ||||
| -	 */
 | ||||
| -
 | ||||
| -	gso_max_segs = (*subsk)->sk_gso_max_segs;
 | ||||
| -	if (!gso_max_segs) /* No gso supported on the subflow's NIC */
 | ||||
| -		gso_max_segs = 1;
 | ||||
| -	max_segs = min_t(unsigned int, tcp_cwnd_test(subtp, skb), gso_max_segs);
 | ||||
| -	if (!max_segs)
 | ||||
| -		return NULL;
 | ||||
| -
 | ||||
| -	max_len = mss_now * max_segs;
 | ||||
| -	window = tcp_wnd_end(subtp) - subtp->write_seq;
 | ||||
| -
 | ||||
| -	needed = min(skb->len, window);
 | ||||
| -	if (max_len <= skb->len)
 | ||||
| -		/* Take max_win, which is actually the cwnd/gso-size */
 | ||||
| -		*limit = max_len;
 | ||||
| -	else
 | ||||
| -		/* Or, take the window */
 | ||||
| -		*limit = needed;
 | ||||
| -
 | ||||
| -	return skb;
 | ||||
| -}
 | ||||
| -
 | ||||
|  static void ecfsched_init(struct sock *sk) | ||||
|  { | ||||
|  	struct ecfsched_priv *ecf_p = ecfsched_get_priv(tcp_sk(sk)); | ||||
| @@ -353,7 +164,7 @@ static void ecfsched_init(struct sock *sk)
 | ||||
|   | ||||
|  struct mptcp_sched_ops mptcp_sched_ecf = { | ||||
|  	.get_subflow = ecf_get_available_subflow, | ||||
| -	.next_segment = mptcp_ecf_next_segment,
 | ||||
| +	.next_segment = mptcp_next_segment,
 | ||||
|  	.init = ecfsched_init, | ||||
|  	.name = "ecf", | ||||
|  	.owner = THIS_MODULE, | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue