diff --git a/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch b/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch index 03490aaa..e5b44dd0 100644 --- a/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch +++ b/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch @@ -14,7 +14,7 @@ index db9d53b879f8..3d859ac99b73 100644 Format: ,,, diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt -index 8af3771a3ebf..e8fecb8f6370 100644 +index 5cf601c94e35..e8ff86e5078b 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -818,6 +818,18 @@ tcp_rx_skb_cache - BOOLEAN @@ -264,7 +264,7 @@ index 13792c0ef46e..e99cc510610f 100644 /* * Pointers to address related TCP functions diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h -index 34c4436fd18f..828f79528b32 100644 +index 58db7c69c146..1acaa5e45f15 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -79,7 +79,7 @@ struct inet_request_sock { @@ -287,10 +287,10 @@ index 34c4436fd18f..828f79528b32 100644 union { diff --git a/include/net/mptcp.h b/include/net/mptcp.h new file mode 100644 -index 000000000000..630977f67614 +index 000000000000..f2efa46027d0 --- /dev/null +++ b/include/net/mptcp.h -@@ -0,0 +1,1548 @@ +@@ -0,0 +1,1549 @@ +/* + * MPTCP implementation + * @@ -1025,6 +1025,7 @@ index 000000000000..630977f67614 + +#define MPTCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mptcp.mptcp_statistics, field) +#define MPTCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mptcp.mptcp_statistics, field) ++#define MPTCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mptcp.mptcp_statistics, field, val) + +enum +{ @@ -2097,7 +2098,7 @@ index cb8ced4380a6..0aa0d10af2ce 100644 #define TCP_MIB_MAX __TCP_MIB_MAX struct tcp_mib { diff --git a/include/net/sock.h b/include/net/sock.h -index 7f213cfcb3cc..c1be2daccb54 100644 +index 9d687070d272..4c40e2af33f1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -821,6 +821,7 @@ enum sock_flags { @@ -2116,8 +2117,39 @@ index 7f213cfcb3cc..c1be2daccb54 100644 /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS +@@ -1541,12 +1543,26 @@ static inline void lock_sock(struct sock *sk) + void __release_sock(struct sock *sk); + void release_sock(struct sock *sk); + ++#ifdef CONFIG_MPTCP_DEBUG_LOCK ++extern void mptcp_check_lock(struct sock* sk); ++#endif ++static inline void lock_sock_check_mptcp(struct sock* sk) ++{ ++#ifdef CONFIG_MPTCP_DEBUG_LOCK ++ if (sk && sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP) ++ mptcp_check_lock(sk); ++#endif ++} ++ + /* BH context may only use the following locking interface. */ +-#define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) +-#define bh_lock_sock_nested(__sk) \ ++#define bh_lock_sock(__sk) do { lock_sock_check_mptcp(__sk); \ ++ spin_lock(&((__sk)->sk_lock.slock)); } while (0) ++#define bh_lock_sock_nested(__sk) do { \ ++ lock_sock_check_mptcp(__sk); \ + spin_lock_nested(&((__sk)->sk_lock.slock), \ +- SINGLE_DEPTH_NESTING) +-#define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) ++ SINGLE_DEPTH_NESTING); } while (0) ++#define bh_unlock_sock(__sk) do { lock_sock_check_mptcp(__sk); \ ++ spin_unlock(&((__sk)->sk_lock.slock)); } while (0) + + bool lock_sock_fast(struct sock *sk); + /** diff --git a/include/net/tcp.h b/include/net/tcp.h -index 65be8bd1f0f4..cf89f928640e 100644 +index 8459145497b7..c1b90862e3cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -182,6 +182,7 @@ @@ -2160,7 +2192,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; -@@ -310,6 +336,98 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) +@@ -310,6 +336,96 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) @@ -2237,8 +2269,6 @@ index 65be8bd1f0f4..cf89f928640e 100644 +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb); +void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, u32 prior_snd_una); + -+/* void skb_clone_fraglist(struct sk_buff *skb); */ -+ +void inet_twsk_free(struct inet_timewait_sock *tw); +int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb); +/* These states need RST on ABORT according to RFC793 */ @@ -2259,7 +2289,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 void tcp_tasklet_init(void); int tcp_v4_err(struct sk_buff *skb, u32); -@@ -411,7 +529,9 @@ int tcp_mmap(struct file *file, struct socket *sock, +@@ -411,7 +527,9 @@ int tcp_mmap(struct file *file, struct socket *sock, #endif void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, @@ -2270,7 +2300,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* -@@ -430,6 +550,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, +@@ -430,6 +548,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); @@ -2278,7 +2308,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 void tcp_req_err(struct sock *sk, u32 seq, bool abort); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(const struct sock *sk, -@@ -453,6 +574,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, +@@ -453,6 +572,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type); @@ -2286,7 +2316,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 int tcp_disconnect(struct sock *sk, int flags); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); -@@ -462,6 +584,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, +@@ -462,6 +582,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -2294,7 +2324,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 struct dst_entry *dst, u32 tsoff); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); -@@ -547,7 +670,8 @@ static inline u32 tcp_cookie_time(void) +@@ -547,7 +668,8 @@ static inline u32 tcp_cookie_time(void) u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); @@ -2304,7 +2334,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 u64 cookie_init_timestamp(struct request_sock *req); bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *opt); -@@ -561,7 +685,8 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, +@@ -561,7 +683,8 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); @@ -2314,7 +2344,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 #endif /* tcp_output.c */ -@@ -597,10 +722,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, +@@ -597,10 +720,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); @@ -2331,7 +2361,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); -@@ -646,7 +777,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) +@@ -646,7 +775,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) } /* tcp.c */ @@ -2340,7 +2370,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, -@@ -724,7 +855,7 @@ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) +@@ -724,7 +853,7 @@ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) * Rcv_nxt can be after the window if our peer push more data * than the offered window. */ @@ -2349,7 +2379,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 { s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; -@@ -733,6 +864,32 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp) +@@ -733,6 +862,32 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp) return (u32) win; } @@ -2382,7 +2412,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 /* Choose a new window, without checks for shrinking, and without * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. -@@ -830,6 +987,12 @@ struct tcp_skb_cb { +@@ -830,6 +985,12 @@ struct tcp_skb_cb { u16 tcp_gso_size; }; }; @@ -2395,7 +2425,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ __u8 sacked; /* State flags for SACK. */ -@@ -848,6 +1011,14 @@ struct tcp_skb_cb { +@@ -848,6 +1009,14 @@ struct tcp_skb_cb { has_rxtstamp:1, /* SKB has a RX timestamp */ unused:5; __u32 ack_seq; /* Sequence number ACK'd */ @@ -2410,7 +2440,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 union { struct { /* There is space for up to 24 bytes */ -@@ -1090,6 +1261,8 @@ struct tcp_congestion_ops { +@@ -1090,6 +1259,8 @@ struct tcp_congestion_ops { int tcp_set_allowed_congestion_control(char *allowed); int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit, bool cap_net_admin); @@ -2419,7 +2449,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); -@@ -1396,6 +1569,19 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) +@@ -1396,6 +1567,19 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) space - (space>>tcp_adv_win_scale); } @@ -2439,7 +2469,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { -@@ -1988,6 +2174,30 @@ struct tcp_sock_af_ops { +@@ -1989,6 +2173,30 @@ struct tcp_sock_af_ops { #endif }; @@ -2470,7 +2500,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 struct tcp_request_sock_ops { u16 mss_clamp; #ifdef CONFIG_TCP_MD5SIG -@@ -1998,12 +2208,13 @@ struct tcp_request_sock_ops { +@@ -1999,12 +2207,13 @@ struct tcp_request_sock_ops { const struct sock *sk, const struct sk_buff *skb); #endif @@ -2489,7 +2519,7 @@ index 65be8bd1f0f4..cf89f928640e 100644 #endif struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req); -@@ -2022,15 +2233,17 @@ struct tcp_request_sock_ops { +@@ -2023,15 +2232,17 @@ struct tcp_request_sock_ops { #ifdef CONFIG_SYN_COOKIES static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, @@ -2916,7 +2946,7 @@ index a03036456221..aebb337662c3 100644 IFF_ALLMULTI)); diff --git a/net/core/filter.c b/net/core/filter.c -index eba96343c7af..c84249eec838 100644 +index 72bf78032f45..b96c3c126b7c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -73,6 +73,7 @@ @@ -3173,7 +3203,7 @@ index a926de2e42b5..6d73dc6e2586 100644 default "dctcp" if DEFAULT_DCTCP default "cdg" if DEFAULT_CDG diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c -index a7a6b1adb698..e1ccbe866a90 100644 +index d61ca7be6eda..8e474fe9dfd8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -100,6 +100,7 @@ @@ -3239,7 +3269,7 @@ index a7a6b1adb698..e1ccbe866a90 100644 tcp_init(); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c -index 6cbf0db57ad0..1d70d8f22075 100644 +index 6cbf0db57ad0..be95a4b0ffa2 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -19,6 +19,7 @@ @@ -3273,7 +3303,22 @@ index 6cbf0db57ad0..1d70d8f22075 100644 if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); -@@ -1019,7 +1025,14 @@ void inet_csk_listen_stop(struct sock *sk) +@@ -966,6 +972,14 @@ struct sock *inet_csk_reqsk_queue_add(struct sock *sk, + + spin_lock(&queue->rskq_lock); + if (unlikely(sk->sk_state != TCP_LISTEN)) { ++ struct tcp_sock *tp = tcp_sk(sk); ++ ++ /* in case of mptcp, two locks may been taken, one ++ * on the meta, the other on master_sk ++ */ ++ if (mptcp(tp) && tp->mpcb && tp->mpcb->master_sk) ++ bh_unlock_sock(tp->mpcb->master_sk); ++ + inet_child_forget(sk, req, child); + child = NULL; + } else { +@@ -1019,7 +1033,14 @@ void inet_csk_listen_stop(struct sock *sk) */ while ((req = reqsk_queue_remove(queue, sk)) != NULL) { struct sock *child = req->sk; @@ -3288,7 +3333,7 @@ index 6cbf0db57ad0..1d70d8f22075 100644 local_bh_disable(); bh_lock_sock(child); WARN_ON(sock_owned_by_user(child)); -@@ -1029,6 +1042,10 @@ void inet_csk_listen_stop(struct sock *sk) +@@ -1029,6 +1050,10 @@ void inet_csk_listen_stop(struct sock *sk) reqsk_put(req); bh_unlock_sock(child); local_bh_enable(); @@ -3343,7 +3388,7 @@ index aa3fd61818c4..8b3e955ec165 100644 break; case IP_TTL: diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c -index 6811174ad518..1de2196515ea 100644 +index 3f6c9514c7a9..9dc0cff27ad8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -12,6 +12,8 @@ @@ -3409,7 +3454,7 @@ index 6811174ad518..1de2196515ea 100644 struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); -@@ -313,7 +338,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +@@ -314,7 +339,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -3419,7 +3464,7 @@ index 6811174ad518..1de2196515ea 100644 if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcp_ts_off(sock_net(sk), -@@ -326,7 +352,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +@@ -327,7 +353,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; @@ -3433,7 +3478,7 @@ index 6811174ad518..1de2196515ea 100644 if (!req) goto out; -@@ -347,6 +378,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +@@ -348,6 +379,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; @@ -3442,7 +3487,7 @@ index 6811174ad518..1de2196515ea 100644 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = 0; treq->tfo_listener = false; -@@ -355,6 +388,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +@@ -356,6 +389,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_iif = inet_request_bound_dev_if(sk, skb); @@ -3452,7 +3497,7 @@ index 6811174ad518..1de2196515ea 100644 /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ -@@ -393,15 +429,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +@@ -394,15 +430,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; @@ -3474,7 +3519,7 @@ index 6811174ad518..1de2196515ea 100644 * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 4815cf72569e..61469ec77734 100644 +index 0a570d5d0b38..99ca3c55eb19 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -270,6 +270,7 @@ @@ -3627,7 +3672,7 @@ index 4815cf72569e..61469ec77734 100644 return sock_no_sendpage_locked(sk, page, offset, size, flags); tcp_rate_check_app_limited(sk); /* is sending application-limited? */ -@@ -1231,12 +1292,21 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) +@@ -1232,12 +1293,21 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && @@ -3650,7 +3695,7 @@ index 4815cf72569e..61469ec77734 100644 if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { copied = tcp_send_rcvq(sk, msg, size); -@@ -1529,7 +1599,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) +@@ -1530,7 +1600,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) * calculation of whether or not we must ACK for the sake of * a window update. */ @@ -3659,7 +3704,7 @@ index 4815cf72569e..61469ec77734 100644 { struct tcp_sock *tp = tcp_sk(sk); bool time_to_ack = false; -@@ -1568,11 +1638,11 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) +@@ -1569,11 +1639,11 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) * in states, where we will not receive more. It is useless. */ if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { @@ -3673,7 +3718,7 @@ index 4815cf72569e..61469ec77734 100644 /* Send ACK now, if this read freed lots of space * in our buffer. Certainly, new_window is new window. -@@ -1690,7 +1760,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, +@@ -1691,7 +1761,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, /* Clean up data we have read: This will do ACK frames. */ if (copied > 0) { tcp_recv_skb(sk, seq, &offset); @@ -3682,7 +3727,7 @@ index 4815cf72569e..61469ec77734 100644 } return copied; } -@@ -1981,6 +2051,16 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, +@@ -1982,6 +2052,16 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, lock_sock(sk); @@ -3699,7 +3744,7 @@ index 4815cf72569e..61469ec77734 100644 err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; -@@ -2099,7 +2179,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, +@@ -2100,7 +2180,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, } } @@ -3708,7 +3753,7 @@ index 4815cf72569e..61469ec77734 100644 if (copied >= target) { /* Do not sleep, just process backlog. */ -@@ -2191,7 +2271,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, +@@ -2192,7 +2272,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, */ /* Clean up data we have read: This will do ACK frames. */ @@ -3717,7 +3762,7 @@ index 4815cf72569e..61469ec77734 100644 release_sock(sk); -@@ -2250,8 +2330,11 @@ void tcp_set_state(struct sock *sk, int state) +@@ -2251,8 +2331,11 @@ void tcp_set_state(struct sock *sk, int state) switch (state) { case TCP_ESTABLISHED: @@ -3730,7 +3775,7 @@ index 4815cf72569e..61469ec77734 100644 break; case TCP_CLOSE: -@@ -2264,8 +2347,11 @@ void tcp_set_state(struct sock *sk, int state) +@@ -2265,8 +2348,11 @@ void tcp_set_state(struct sock *sk, int state) inet_put_port(sk); /* fall through */ default: @@ -3743,7 +3788,7 @@ index 4815cf72569e..61469ec77734 100644 } /* Change state AFTER socket is unhashed to avoid closed -@@ -2297,9 +2383,10 @@ void tcp_set_state(struct sock *sk, int state) +@@ -2298,9 +2384,10 @@ void tcp_set_state(struct sock *sk, int state) [TCP_LISTEN] = TCP_CLOSE, [TCP_CLOSING] = TCP_CLOSING, [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ @@ -3755,7 +3800,7 @@ index 4815cf72569e..61469ec77734 100644 { int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; -@@ -2329,7 +2416,7 @@ void tcp_shutdown(struct sock *sk, int how) +@@ -2330,7 +2417,7 @@ void tcp_shutdown(struct sock *sk, int how) TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) @@ -3764,7 +3809,7 @@ index 4815cf72569e..61469ec77734 100644 } } EXPORT_SYMBOL(tcp_shutdown); -@@ -2354,6 +2441,17 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2355,6 +2442,17 @@ void tcp_close(struct sock *sk, long timeout) int data_was_unread = 0; int state; @@ -3782,7 +3827,7 @@ index 4815cf72569e..61469ec77734 100644 lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; -@@ -2398,7 +2496,7 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2399,7 +2497,7 @@ void tcp_close(struct sock *sk, long timeout) /* Unread data was tossed, zap the connection. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); @@ -3791,7 +3836,7 @@ index 4815cf72569e..61469ec77734 100644 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); -@@ -2472,7 +2570,7 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2473,7 +2571,7 @@ void tcp_close(struct sock *sk, long timeout) struct tcp_sock *tp = tcp_sk(sk); if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); @@ -3800,7 +3845,7 @@ index 4815cf72569e..61469ec77734 100644 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { -@@ -2482,7 +2580,8 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2483,7 +2581,8 @@ void tcp_close(struct sock *sk, long timeout) inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { @@ -3810,7 +3855,7 @@ index 4815cf72569e..61469ec77734 100644 goto out; } } -@@ -2491,7 +2590,7 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2492,7 +2591,7 @@ void tcp_close(struct sock *sk, long timeout) sk_mem_reclaim(sk); if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); @@ -3819,7 +3864,7 @@ index 4815cf72569e..61469ec77734 100644 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } else if (!check_net(sock_net(sk))) { -@@ -2523,15 +2622,6 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2524,15 +2623,6 @@ void tcp_close(struct sock *sk, long timeout) } EXPORT_SYMBOL(tcp_close); @@ -3835,7 +3880,7 @@ index 4815cf72569e..61469ec77734 100644 static void tcp_rtx_queue_purge(struct sock *sk) { struct rb_node *p = rb_first(&sk->tcp_rtx_queue); -@@ -2553,6 +2643,10 @@ void tcp_write_queue_purge(struct sock *sk) +@@ -2554,6 +2644,10 @@ void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; @@ -3846,7 +3891,7 @@ index 4815cf72569e..61469ec77734 100644 tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { tcp_skb_tsorted_anchor_cleanup(skb); -@@ -2571,6 +2665,36 @@ void tcp_write_queue_purge(struct sock *sk) +@@ -2572,6 +2666,36 @@ void tcp_write_queue_purge(struct sock *sk) inet_csk(sk)->icsk_backoff = 0; } @@ -3883,7 +3928,7 @@ index 4815cf72569e..61469ec77734 100644 int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); -@@ -2593,7 +2717,7 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2594,7 +2718,7 @@ int tcp_disconnect(struct sock *sk, int flags) /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ @@ -3892,7 +3937,7 @@ index 4815cf72569e..61469ec77734 100644 sk->sk_err = ECONNRESET; } else if (old_state == TCP_SYN_SENT) sk->sk_err = ECONNRESET; -@@ -2615,11 +2739,16 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2616,11 +2740,16 @@ int tcp_disconnect(struct sock *sk, int flags) if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -3912,7 +3957,7 @@ index 4815cf72569e..61469ec77734 100644 seq = tp->write_seq + tp->max_window + 2; if (!seq) -@@ -2629,21 +2758,14 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2630,21 +2759,14 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_backoff = 0; tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; @@ -3937,7 +3982,7 @@ index 4815cf72569e..61469ec77734 100644 inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() -@@ -2655,14 +2777,6 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2656,14 +2778,6 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); tp->compressed_ack = 0; @@ -3952,7 +3997,7 @@ index 4815cf72569e..61469ec77734 100644 tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; -@@ -2671,8 +2785,6 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2672,8 +2786,6 @@ int tcp_disconnect(struct sock *sk, int flags) tp->sacked_out = 0; tp->tlp_high_seq = 0; tp->last_oow_ack_time = 0; @@ -3961,7 +4006,7 @@ index 4815cf72569e..61469ec77734 100644 tp->rack.mstamp = 0; tp->rack.advanced = 0; tp->rack.reo_wnd_steps = 1; -@@ -2706,7 +2818,7 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2707,7 +2819,7 @@ int tcp_disconnect(struct sock *sk, int flags) static inline bool tcp_can_repair_sock(const struct sock *sk) { return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && @@ -3970,7 +4015,7 @@ index 4815cf72569e..61469ec77734 100644 } static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) -@@ -2737,6 +2849,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l +@@ -2738,6 +2850,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l tp->rcv_wnd = opt.rcv_wnd; tp->rcv_wup = opt.rcv_wup; @@ -3978,7 +4023,7 @@ index 4815cf72569e..61469ec77734 100644 return 0; } -@@ -2875,6 +2988,61 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -2876,6 +2989,61 @@ static int do_tcp_setsockopt(struct sock *sk, int level, return tcp_fastopen_reset_cipher(net, sk, key, backup_key); } @@ -4040,7 +4085,7 @@ index 4815cf72569e..61469ec77734 100644 default: /* fallthru */ break; -@@ -3064,6 +3232,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -3065,6 +3233,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: @@ -4053,7 +4098,7 @@ index 4815cf72569e..61469ec77734 100644 /* Translate value in seconds to number of retransmits */ icsk->icsk_accept_queue.rskq_defer_accept = secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, -@@ -3091,7 +3265,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -3092,7 +3266,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; @@ -4062,7 +4107,7 @@ index 4815cf72569e..61469ec77734 100644 if (!(val & 1)) inet_csk_enter_pingpong_mode(sk); } -@@ -3101,7 +3275,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -3102,7 +3276,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: @@ -4074,7 +4119,7 @@ index 4815cf72569e..61469ec77734 100644 break; #endif case TCP_USER_TIMEOUT: -@@ -3157,6 +3334,33 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -3159,6 +3336,33 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->notsent_lowat = val; sk->sk_write_space(sk); break; @@ -4108,7 +4153,7 @@ index 4815cf72569e..61469ec77734 100644 case TCP_INQ: if (val > 1 || val < 0) err = -EINVAL; -@@ -3221,7 +3425,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, +@@ -3223,7 +3427,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, } /* Return information about state of tcp endpoint in API format. */ @@ -4117,7 +4162,7 @@ index 4815cf72569e..61469ec77734 100644 { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); -@@ -3258,7 +3462,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) +@@ -3260,7 +3464,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) return; } @@ -4127,7 +4172,7 @@ index 4815cf72569e..61469ec77734 100644 info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; -@@ -3334,7 +3539,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) +@@ -3336,7 +3541,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_reord_seen = tp->reord_seen; info->tcpi_rcv_ooopack = tp->rcv_ooopack; info->tcpi_snd_wnd = tp->snd_wnd; @@ -4138,7 +4183,7 @@ index 4815cf72569e..61469ec77734 100644 } EXPORT_SYMBOL_GPL(tcp_get_info); -@@ -3481,7 +3688,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, +@@ -3483,7 +3690,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; @@ -4147,7 +4192,7 @@ index 4815cf72569e..61469ec77734 100644 len = min_t(unsigned int, len, sizeof(info)); if (put_user(len, optlen)) -@@ -3670,6 +3877,87 @@ static int do_tcp_getsockopt(struct sock *sk, int level, +@@ -3672,6 +3879,87 @@ static int do_tcp_getsockopt(struct sock *sk, int level, } return 0; } @@ -4235,7 +4280,7 @@ index 4815cf72569e..61469ec77734 100644 #ifdef CONFIG_MMU case TCP_ZEROCOPY_RECEIVE: { struct tcp_zerocopy_receive zc; -@@ -3875,7 +4163,9 @@ void tcp_done(struct sock *sk) +@@ -3877,7 +4165,9 @@ void tcp_done(struct sock *sk) if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); @@ -4245,7 +4290,7 @@ index 4815cf72569e..61469ec77734 100644 tcp_clear_xmit_timers(sk); if (req) reqsk_fastopen_remove(sk, req, false); -@@ -3891,6 +4181,8 @@ void tcp_done(struct sock *sk) +@@ -3893,6 +4183,8 @@ void tcp_done(struct sock *sk) int tcp_abort(struct sock *sk, int err) { @@ -4254,7 +4299,7 @@ index 4815cf72569e..61469ec77734 100644 if (!sk_fullsock(sk)) { if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); -@@ -3904,7 +4196,7 @@ int tcp_abort(struct sock *sk, int err) +@@ -3906,7 +4198,7 @@ int tcp_abort(struct sock *sk, int err) } /* Don't race with userspace socket closes such as tcp_close. */ @@ -4263,7 +4308,7 @@ index 4815cf72569e..61469ec77734 100644 if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); -@@ -3913,7 +4205,7 @@ int tcp_abort(struct sock *sk, int err) +@@ -3915,7 +4207,7 @@ int tcp_abort(struct sock *sk, int err) /* Don't race with BH socket closes such as inet_csk_listen_stop. */ local_bh_disable(); @@ -4272,7 +4317,7 @@ index 4815cf72569e..61469ec77734 100644 if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_err = err; -@@ -3921,14 +4213,14 @@ int tcp_abort(struct sock *sk, int err) +@@ -3923,14 +4215,14 @@ int tcp_abort(struct sock *sk, int err) smp_wmb(); sk->sk_error_report(sk); if (tcp_need_reset(sk->sk_state)) @@ -4330,7 +4375,7 @@ index 549506162dde..e5a530e0b1c5 100644 #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c -index a5ec77a5ad6f..f9fb4a268b9b 100644 +index 21705b2ddaff..8b439c148e2c 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -9,6 +9,7 @@ @@ -4393,7 +4438,7 @@ index a5ec77a5ad6f..f9fb4a268b9b 100644 * and queues the child into listener accept queue. */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 0808110451a0..d278b28035ad 100644 +index f4e00ff909da..b31087c02c2d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -76,35 +76,15 @@ @@ -4537,7 +4582,7 @@ index 0808110451a0..d278b28035ad 100644 /* Normally R but no L won't result in plain S */ if (!dup_sack && (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) -@@ -2965,7 +2972,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, +@@ -2969,7 +2976,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, */ tcp_update_rtt_min(sk, ca_rtt_us, flag); tcp_rtt_estimator(sk, seq_rtt_us); @@ -4546,7 +4591,7 @@ index 0808110451a0..d278b28035ad 100644 /* RFC6298: only reset backoff on valid RTT measurement. */ inet_csk(sk)->icsk_backoff = 0; -@@ -3033,7 +3040,7 @@ static void tcp_set_xmit_timer(struct sock *sk) +@@ -3037,7 +3044,7 @@ static void tcp_set_xmit_timer(struct sock *sk) } /* If we get here, the whole TSO packet has not been acked. */ @@ -4555,7 +4600,7 @@ index 0808110451a0..d278b28035ad 100644 { struct tcp_sock *tp = tcp_sk(sk); u32 packets_acked; -@@ -3053,8 +3060,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) +@@ -3057,8 +3064,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) return packets_acked; } @@ -4565,7 +4610,7 @@ index 0808110451a0..d278b28035ad 100644 { const struct skb_shared_info *shinfo; -@@ -3159,6 +3165,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, +@@ -3163,6 +3169,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, */ if (likely(!(scb->tcp_flags & TCPHDR_SYN))) { flag |= FLAG_DATA_ACKED; @@ -4574,7 +4619,7 @@ index 0808110451a0..d278b28035ad 100644 } else { flag |= FLAG_SYN_ACKED; tp->retrans_stamp = 0; -@@ -3279,7 +3287,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, +@@ -3283,7 +3291,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, return flag; } @@ -4583,7 +4628,7 @@ index 0808110451a0..d278b28035ad 100644 { struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *head = tcp_send_head(sk); -@@ -3353,9 +3361,8 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, +@@ -3358,9 +3366,8 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ @@ -4595,7 +4640,7 @@ index 0808110451a0..d278b28035ad 100644 { return after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || -@@ -3593,7 +3600,7 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag) +@@ -3599,7 +3606,7 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag) } /* This routine deals with incoming acks, but not outgoing ones. */ @@ -4604,7 +4649,7 @@ index 0808110451a0..d278b28035ad 100644 { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); -@@ -3716,6 +3723,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +@@ -3722,6 +3729,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_rack_update_reo_wnd(sk, &rs); @@ -4619,7 +4664,7 @@ index 0808110451a0..d278b28035ad 100644 if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); -@@ -3860,8 +3875,10 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +@@ -3866,8 +3881,10 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) */ void tcp_parse_options(const struct net *net, const struct sk_buff *skb, @@ -4632,7 +4677,7 @@ index 0808110451a0..d278b28035ad 100644 { const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); -@@ -3947,6 +3964,10 @@ void tcp_parse_options(const struct net *net, +@@ -3953,6 +3970,10 @@ void tcp_parse_options(const struct net *net, */ break; #endif @@ -4643,7 +4688,7 @@ index 0808110451a0..d278b28035ad 100644 case TCPOPT_FASTOPEN: tcp_parse_fastopen_option( opsize - TCPOLEN_FASTOPEN_BASE, -@@ -4014,7 +4035,9 @@ static bool tcp_fast_parse_options(const struct net *net, +@@ -4020,7 +4041,9 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } @@ -4654,7 +4699,7 @@ index 0808110451a0..d278b28035ad 100644 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; -@@ -4124,7 +4147,7 @@ static inline bool tcp_paws_discard(const struct sock *sk, +@@ -4130,7 +4153,7 @@ static inline bool tcp_paws_discard(const struct sock *sk, static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) { return !before(end_seq, tp->rcv_wup) && @@ -4663,7 +4708,7 @@ index 0808110451a0..d278b28035ad 100644 } /* When we get a reset we do this. */ -@@ -4173,6 +4196,11 @@ void tcp_fin(struct sock *sk) +@@ -4179,6 +4202,11 @@ void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -4675,7 +4720,7 @@ index 0808110451a0..d278b28035ad 100644 inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN; -@@ -4183,6 +4211,10 @@ void tcp_fin(struct sock *sk) +@@ -4189,6 +4217,10 @@ void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); @@ -4686,7 +4731,7 @@ index 0808110451a0..d278b28035ad 100644 inet_csk_enter_pingpong_mode(sk); break; -@@ -4205,9 +4237,16 @@ void tcp_fin(struct sock *sk) +@@ -4211,9 +4243,16 @@ void tcp_fin(struct sock *sk) tcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: @@ -4704,7 +4749,7 @@ index 0808110451a0..d278b28035ad 100644 break; default: /* Only TCP_LISTEN and TCP_CLOSE are left, in these -@@ -4229,6 +4268,10 @@ void tcp_fin(struct sock *sk) +@@ -4235,6 +4274,10 @@ void tcp_fin(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); @@ -4715,7 +4760,7 @@ index 0808110451a0..d278b28035ad 100644 /* Do not send POLL_HUP for half duplex close. */ if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) -@@ -4443,6 +4486,9 @@ static bool tcp_try_coalesce(struct sock *sk, +@@ -4449,6 +4492,9 @@ static bool tcp_try_coalesce(struct sock *sk, *fragstolen = false; @@ -4725,7 +4770,7 @@ index 0808110451a0..d278b28035ad 100644 /* Its possible this segment overlaps with prior segment in queue */ if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; -@@ -4497,7 +4543,7 @@ static void tcp_drop(struct sock *sk, struct sk_buff *skb) +@@ -4503,7 +4549,7 @@ static void tcp_drop(struct sock *sk, struct sk_buff *skb) /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ @@ -4734,7 +4779,7 @@ index 0808110451a0..d278b28035ad 100644 { struct tcp_sock *tp = tcp_sk(sk); __u32 dsack_high = tp->rcv_nxt; -@@ -4520,7 +4566,14 @@ static void tcp_ofo_queue(struct sock *sk) +@@ -4526,7 +4572,14 @@ static void tcp_ofo_queue(struct sock *sk) p = rb_next(p); rb_erase(&skb->rbnode, &tp->out_of_order_queue); @@ -4750,17 +4795,26 @@ index 0808110451a0..d278b28035ad 100644 tcp_drop(sk, skb); continue; } -@@ -4550,6 +4603,9 @@ static void tcp_ofo_queue(struct sock *sk) +@@ -4556,21 +4609,23 @@ static void tcp_ofo_queue(struct sock *sk) static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size) { -+ if (mptcp(tcp_sk(sk))) -+ sk = mptcp_meta_sk(sk); +- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || ++ struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk; + - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || ++ if (atomic_read(&meta_sk->sk_rmem_alloc) > meta_sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, size)) { -@@ -4564,7 +4620,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, +- if (tcp_prune_queue(sk) < 0) ++ if (tcp_prune_queue(meta_sk) < 0) + return -1; + + while (!sk_rmem_schedule(sk, skb, size)) { +- if (!tcp_prune_ofo_queue(sk)) ++ if (!tcp_prune_ofo_queue(meta_sk)) + return -1; + } + } return 0; } @@ -4769,7 +4823,7 @@ index 0808110451a0..d278b28035ad 100644 { struct tcp_sock *tp = tcp_sk(sk); struct rb_node **p, *parent; -@@ -4636,7 +4692,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +@@ -4642,7 +4697,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) continue; } if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { @@ -4779,7 +4833,7 @@ index 0808110451a0..d278b28035ad 100644 /* All the bits are present. Drop. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); -@@ -4683,6 +4740,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +@@ -4689,6 +4745,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq); break; } @@ -4791,7 +4845,7 @@ index 0808110451a0..d278b28035ad 100644 rb_erase(&skb1->rbnode, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); -@@ -4694,7 +4756,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +@@ -4700,7 +4761,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tp->ooo_last_skb = skb; add_sack: @@ -4800,7 +4854,7 @@ index 0808110451a0..d278b28035ad 100644 tcp_sack_new_ofo_skb(sk, seq, end_seq); end: if (skb) { -@@ -4708,8 +4770,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +@@ -4714,8 +4775,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) } } @@ -4811,7 +4865,7 @@ index 0808110451a0..d278b28035ad 100644 { int eaten; struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); -@@ -4784,7 +4846,8 @@ void tcp_data_ready(struct sock *sk) +@@ -4790,7 +4851,8 @@ void tcp_data_ready(struct sock *sk) if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && !sock_flag(sk, SOCK_DONE) && @@ -4821,7 +4875,7 @@ index 0808110451a0..d278b28035ad 100644 return; sk->sk_data_ready(sk); -@@ -4796,10 +4859,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) +@@ -4802,10 +4864,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) bool fragstolen; int eaten; @@ -4837,7 +4891,7 @@ index 0808110451a0..d278b28035ad 100644 skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); -@@ -4810,7 +4877,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) +@@ -4816,7 +4882,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) * Out of sequence packets to the out_of_order_queue. */ if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { @@ -4846,7 +4900,7 @@ index 0808110451a0..d278b28035ad 100644 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; } -@@ -4826,7 +4893,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) +@@ -4832,7 +4898,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) } eaten = tcp_queue_rcv(sk, skb, &fragstolen); @@ -4855,7 +4909,7 @@ index 0808110451a0..d278b28035ad 100644 tcp_event_data_recv(sk, skb); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); -@@ -4848,7 +4915,11 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) +@@ -4854,7 +4920,11 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten > 0) kfree_skb_partial(skb, fragstolen); @@ -4868,7 +4922,7 @@ index 0808110451a0..d278b28035ad 100644 tcp_data_ready(sk); return; } -@@ -4868,7 +4939,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) +@@ -4874,7 +4944,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) } /* Out of window. F.e. zero window probe. */ @@ -4878,7 +4932,7 @@ index 0808110451a0..d278b28035ad 100644 goto out_of_window; if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { -@@ -4878,7 +4950,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) +@@ -4884,7 +4955,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) /* If window is closed, drop tail of packet. But after * remembering D-SACK for its head made in previous line. */ @@ -4887,7 +4941,7 @@ index 0808110451a0..d278b28035ad 100644 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); goto out_of_window; } -@@ -5191,7 +5263,7 @@ static int tcp_prune_queue(struct sock *sk) +@@ -5197,7 +5268,7 @@ static int tcp_prune_queue(struct sock *sk) return -1; } @@ -4896,7 +4950,7 @@ index 0808110451a0..d278b28035ad 100644 { const struct tcp_sock *tp = tcp_sk(sk); -@@ -5226,7 +5298,7 @@ static void tcp_new_space(struct sock *sk) +@@ -5232,7 +5303,7 @@ static void tcp_new_space(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -4905,7 +4959,7 @@ index 0808110451a0..d278b28035ad 100644 tcp_sndbuf_expand(sk); tp->snd_cwnd_stamp = tcp_jiffies32; } -@@ -5250,10 +5322,11 @@ void tcp_check_space(struct sock *sk) +@@ -5256,10 +5327,11 @@ void tcp_check_space(struct sock *sk) sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ smp_mb(); @@ -4920,7 +4974,7 @@ index 0808110451a0..d278b28035ad 100644 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } } -@@ -5272,6 +5345,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) +@@ -5278,6 +5350,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) { struct tcp_sock *tp = tcp_sk(sk); unsigned long rtt, delay; @@ -4929,7 +4983,7 @@ index 0808110451a0..d278b28035ad 100644 /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && -@@ -5280,8 +5355,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) +@@ -5286,8 +5360,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) * If application uses SO_RCVLOWAT, we want send ack now if * we have not received enough bytes to satisfy the condition. */ @@ -4940,7 +4994,7 @@ index 0808110451a0..d278b28035ad 100644 /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ -@@ -5416,6 +5491,10 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t +@@ -5423,6 +5497,10 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t { struct tcp_sock *tp = tcp_sk(sk); @@ -4951,7 +5005,7 @@ index 0808110451a0..d278b28035ad 100644 /* Check if we get a new urgent pointer - normally not. */ if (th->urg) tcp_check_urg(sk, th); -@@ -5558,9 +5637,15 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, +@@ -5565,9 +5643,15 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, goto discard; } @@ -4967,7 +5021,7 @@ index 0808110451a0..d278b28035ad 100644 tcp_drop(sk, skb); return false; } -@@ -5617,6 +5702,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) +@@ -5624,6 +5708,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) tp->rx_opt.saw_tstamp = 0; @@ -4978,7 +5032,7 @@ index 0808110451a0..d278b28035ad 100644 /* pred_flags is 0xS?10 << 16 + snd_wnd * if header_prediction is to be made * 'S' will always be tp->tcp_header_len >> 2 -@@ -5791,7 +5880,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op) +@@ -5798,7 +5886,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op) tcp_call_bpf(sk, bpf_op, 0, NULL); tcp_init_congestion_control(sk); @@ -4987,7 +5041,7 @@ index 0808110451a0..d278b28035ad 100644 } void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) -@@ -5828,17 +5917,24 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, +@@ -5835,17 +5923,24 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, struct tcp_fastopen_cookie *cookie) { struct tcp_sock *tp = tcp_sk(sk); @@ -5014,7 +5068,7 @@ index 0808110451a0..d278b28035ad 100644 mss = opt.mss_clamp; } -@@ -5862,7 +5958,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, +@@ -5869,7 +5964,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); @@ -5027,7 +5081,7 @@ index 0808110451a0..d278b28035ad 100644 skb_rbtree_walk_from(data) { if (__tcp_retransmit_skb(sk, data, 1)) break; -@@ -5917,9 +6017,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, +@@ -5924,9 +6023,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; @@ -5042,7 +5096,7 @@ index 0808110451a0..d278b28035ad 100644 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; -@@ -5980,11 +6084,41 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, +@@ -5987,11 +6090,41 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_try_undo_spurious_syn(sk); tcp_ack(sk, skb, FLAG_SLOWPATH); @@ -5084,7 +5138,7 @@ index 0808110451a0..d278b28035ad 100644 /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. -@@ -6006,6 +6140,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, +@@ -6013,6 +6146,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } @@ -5096,7 +5150,7 @@ index 0808110451a0..d278b28035ad 100644 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); -@@ -6029,9 +6168,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, +@@ -6036,9 +6174,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, } if (fastopen_fail) return -1; @@ -5111,7 +5165,7 @@ index 0808110451a0..d278b28035ad 100644 /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * -@@ -6070,6 +6212,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, +@@ -6077,6 +6218,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_paws_reject(&tp->rx_opt, 0)) goto discard_and_undo; @@ -5119,7 +5173,7 @@ index 0808110451a0..d278b28035ad 100644 if (th->syn) { /* We see SYN without ACK. It is attempt of * simultaneous connect with crossed SYNs. -@@ -6086,9 +6229,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, +@@ -6093,9 +6235,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } @@ -5135,7 +5189,7 @@ index 0808110451a0..d278b28035ad 100644 /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. -@@ -6176,6 +6325,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) +@@ -6183,6 +6331,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) @@ -5143,7 +5197,7 @@ index 0808110451a0..d278b28035ad 100644 { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); -@@ -6218,6 +6368,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +@@ -6225,6 +6374,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tp->rx_opt.saw_tstamp = 0; tcp_mstamp_refresh(tp); queued = tcp_rcv_synsent_state_process(sk, skb, th); @@ -5160,7 +5214,7 @@ index 0808110451a0..d278b28035ad 100644 if (queued >= 0) return queued; -@@ -6290,6 +6450,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +@@ -6297,6 +6456,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; @@ -5169,7 +5223,7 @@ index 0808110451a0..d278b28035ad 100644 if (!inet_csk(sk)->icsk_ca_ops->cong_control) tcp_update_pacing_rate(sk); -@@ -6299,6 +6461,30 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +@@ -6306,9 +6467,34 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); @@ -5200,17 +5254,21 @@ index 0808110451a0..d278b28035ad 100644 break; case TCP_FIN_WAIT1: { -@@ -6339,7 +6525,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) ++ struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk; + int tmo; + + if (req) +@@ -6346,7 +6532,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { + } else if (th->fin || mptcp_is_data_fin(skb) || -+ sock_owned_by_user(sk)) { ++ sock_owned_by_user(meta_sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing * and not so rare event. We still can lose it now, -@@ -6348,7 +6535,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +@@ -6355,7 +6542,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) */ inet_csk_reset_keepalive_timer(sk, tmo); } else { @@ -5219,7 +5277,7 @@ index 0808110451a0..d278b28035ad 100644 goto discard; } break; -@@ -6356,7 +6543,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +@@ -6363,7 +6550,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_CLOSING: if (tp->snd_una == tp->write_seq) { @@ -5228,7 +5286,7 @@ index 0808110451a0..d278b28035ad 100644 goto discard; } break; -@@ -6368,6 +6555,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +@@ -6375,6 +6562,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) goto discard; } break; @@ -5238,7 +5296,7 @@ index 0808110451a0..d278b28035ad 100644 } /* step 6: check the URG bit */ -@@ -6389,7 +6579,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +@@ -6396,7 +6586,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) */ if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && @@ -5248,7 +5306,7 @@ index 0808110451a0..d278b28035ad 100644 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; -@@ -6491,6 +6682,8 @@ static void tcp_openreq_init(struct request_sock *req, +@@ -6498,6 +6689,8 @@ static void tcp_openreq_init(struct request_sock *req, ireq->wscale_ok = rx_opt->wscale_ok; ireq->acked = 0; ireq->ecn_ok = 0; @@ -5257,7 +5315,7 @@ index 0808110451a0..d278b28035ad 100644 ireq->ir_rmt_port = tcp_hdr(skb)->source; ireq->ir_num = ntohs(tcp_hdr(skb)->dest); ireq->ir_mark = inet_request_mark(sk, skb); -@@ -6616,12 +6809,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, +@@ -6628,12 +6821,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, /* TW buckets are converted to open requests without * limitations, they conserve resources and peer is * evidently real one. @@ -5276,7 +5334,7 @@ index 0808110451a0..d278b28035ad 100644 } if (sk_acceptq_is_full(sk)) { -@@ -6639,8 +6837,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, +@@ -6650,8 +6849,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; @@ -5287,7 +5345,7 @@ index 0808110451a0..d278b28035ad 100644 if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); -@@ -6655,7 +6853,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, +@@ -6666,7 +6865,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, /* Note: tcp_v6_init_req() might override ir_iif for link locals */ inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); @@ -5297,7 +5355,7 @@ index 0808110451a0..d278b28035ad 100644 if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; -@@ -6691,7 +6890,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, +@@ -6704,7 +6904,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_ecn_create_request(req, skb, sk, dst); if (want_cookie) { @@ -5306,7 +5364,7 @@ index 0808110451a0..d278b28035ad 100644 req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; -@@ -6706,17 +6905,25 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, +@@ -6719,17 +6919,31 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); } if (fastopen_sk) { @@ -5320,21 +5378,28 @@ index 0808110451a0..d278b28035ad 100644 - if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) { + if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) { reqsk_fastopen_remove(fastopen_sk, req, false); - bh_unlock_sock(fastopen_sk); +- bh_unlock_sock(fastopen_sk); ++ /* in the case of mptcp, on failure, the master subflow ++ * socket (==fastopen_sk) will already have been unlocked ++ * by the failed call to inet_csk_reqsk_queue_add ++ */ ++ bh_unlock_sock(meta_sk); + if (meta_sk != fastopen_sk) -+ bh_unlock_sock(meta_sk); ++ sock_put(meta_sk); sock_put(fastopen_sk); goto drop_and_free; } sk->sk_data_ready(sk); bh_unlock_sock(fastopen_sk); -+ if (meta_sk != fastopen_sk) ++ if (meta_sk != fastopen_sk) { + bh_unlock_sock(meta_sk); ++ sock_put(meta_sk); ++ } sock_put(fastopen_sk); } else { tcp_rsk(req)->tfo_listener = false; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 72fe93ace7d7..6e8b769189f1 100644 +index b95e1a3487c8..c6f13e32fdd0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -62,6 +62,8 @@ @@ -5394,6 +5459,15 @@ index 72fe93ace7d7..6e8b769189f1 100644 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; +@@ -491,7 +500,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) + + switch (type) { + case ICMP_REDIRECT: +- if (!sock_owned_by_user(sk)) ++ if (!sock_owned_by_user(meta_sk)) + do_redirect(icmp_skb, sk); + goto out; + case ICMP_SOURCE_QUENCH: @@ -513,11 +522,13 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; @@ -5800,7 +5874,7 @@ index 72fe93ace7d7..6e8b769189f1 100644 EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c -index 9b038cb0a43d..84db337f5282 100644 +index 324f43fadb37..9bcd0b8aa0af 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -19,11 +19,13 @@ @@ -6065,7 +6139,7 @@ index 9b038cb0a43d..84db337f5282 100644 return ret; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 739fc69cdcc6..a4fa05e5562d 100644 +index c37028af0db0..08c79424fbc3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -37,6 +37,12 @@ @@ -6368,7 +6442,20 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { struct tcp_sock *tp = tcp_sk(sk); -@@ -1401,7 +1437,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, +@@ -1355,6 +1391,12 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; + ++#ifdef CONFIG_MPTCP ++ memcpy(TCP_SKB_CB(buff)->dss, TCP_SKB_CB(skb)->dss, ++ sizeof(TCP_SKB_CB(skb)->dss)); ++ TCP_SKB_CB(buff)->mptcp_flags = TCP_SKB_CB(skb)->mptcp_flags; ++#endif ++ + /* PSH and FIN should only be set in the second packet. */ + flags = TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); +@@ -1401,7 +1443,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, /* This is similar to __pskb_pull_tail(). The difference is that pulled * data is not copied, but immediately discarded. */ @@ -6377,7 +6464,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { struct skb_shared_info *shinfo; int i, k, eat; -@@ -1624,6 +1660,7 @@ unsigned int tcp_current_mss(struct sock *sk) +@@ -1625,6 +1667,7 @@ unsigned int tcp_current_mss(struct sock *sk) return mss_now; } @@ -6385,7 +6472,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. * As additional protections, we do not touch cwnd in retransmission phases, -@@ -1683,8 +1720,11 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) +@@ -1684,8 +1727,11 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * 2) not cwnd limited (this else condition) * 3) no more data to send (tcp_write_queue_empty()) * 4) application is hitting buffer limit (SOCK_NOSPACE) @@ -6397,7 +6484,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); -@@ -1706,8 +1746,8 @@ static bool tcp_minshall_check(const struct tcp_sock *tp) +@@ -1707,8 +1753,8 @@ static bool tcp_minshall_check(const struct tcp_sock *tp) * But we can avoid doing the divide again given we already have * skb_pcount = skb->len / mss_now */ @@ -6408,7 +6495,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { if (skb->len < tcp_skb_pcount(skb) * mss_now) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; -@@ -1753,7 +1793,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, +@@ -1754,7 +1800,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. */ @@ -6417,7 +6504,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 min_tso, tso_segs; -@@ -1767,11 +1807,11 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) +@@ -1768,11 +1814,11 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) } /* Returns the portion of skb which can be sent right away */ @@ -6434,7 +6521,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { const struct tcp_sock *tp = tcp_sk(sk); u32 partial, needed, window, max_len; -@@ -1801,13 +1841,14 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, +@@ -1802,13 +1848,14 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ @@ -6452,7 +6539,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 tcp_skb_pcount(skb) == 1) return 1; -@@ -1822,12 +1863,13 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, +@@ -1823,12 +1870,13 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, halfcwnd = max(cwnd >> 1, 1U); return min(halfcwnd, cwnd - in_flight); } @@ -6467,7 +6554,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { int tso_segs = tcp_skb_pcount(skb); -@@ -1842,8 +1884,8 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) +@@ -1843,8 +1891,8 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) /* Return true if the Nagle test allows this packet to be * sent now. */ @@ -6478,7 +6565,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { /* Nagle rule does not apply to frames, which sit in the middle of the * write_queue (they have no chances to get new data). -@@ -1855,7 +1897,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf +@@ -1856,7 +1904,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf return true; /* Don't use the nagle rule for urgent data (or for the final FIN). */ @@ -6488,7 +6575,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 return true; if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle)) -@@ -1865,9 +1908,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf +@@ -1866,9 +1915,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf } /* Does at least the first segment of SKB fit into the send window? */ @@ -6500,7 +6587,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { u32 end_seq = TCP_SKB_CB(skb)->end_seq; -@@ -1876,6 +1918,7 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, +@@ -1877,6 +1925,7 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, return !after(end_seq, tcp_wnd_end(tp)); } @@ -6508,7 +6595,20 @@ index 739fc69cdcc6..a4fa05e5562d 100644 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet * which is put after SKB on the list. It is very much like -@@ -2034,7 +2077,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, +@@ -1912,6 +1961,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, + TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; + TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; + ++#ifdef CONFIG_MPTCP ++ memcpy(TCP_SKB_CB(buff)->dss, TCP_SKB_CB(skb)->dss, ++ sizeof(TCP_SKB_CB(skb)->dss)); ++ TCP_SKB_CB(buff)->mptcp_flags = TCP_SKB_CB(skb)->mptcp_flags; ++#endif ++ + /* PSH and FIN should only be set in the second packet. */ + flags = TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); +@@ -2035,7 +2090,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, /* If this packet won't get more data, do not wait. */ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || @@ -6518,7 +6618,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 goto send_now; return true; -@@ -2367,7 +2411,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) +@@ -2368,7 +2424,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) * Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. */ @@ -6527,7 +6627,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 int push_one, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); -@@ -2381,7 +2425,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, +@@ -2382,7 +2438,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, sent_pkts = 0; tcp_mstamp_refresh(tp); @@ -6541,7 +6641,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 /* Do MTU probing. */ result = tcp_mtu_probe(sk); if (!result) { -@@ -2577,7 +2626,7 @@ void tcp_send_loss_probe(struct sock *sk) +@@ -2578,7 +2639,7 @@ void tcp_send_loss_probe(struct sock *sk) skb = tcp_send_head(sk); if (skb && tcp_snd_wnd_test(tp, skb, mss)) { pcount = tp->packets_out; @@ -6550,7 +6650,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 if (tp->packets_out > pcount) goto probe_sent; goto rearm_timer; -@@ -2639,8 +2688,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, +@@ -2640,8 +2701,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; @@ -6561,7 +6661,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 tcp_check_probe_timer(sk); } -@@ -2653,7 +2702,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) +@@ -2654,7 +2715,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!skb || skb->len < mss_now); @@ -6571,7 +6671,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 } /* This function returns the amount that we can raise the -@@ -2875,6 +2925,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, +@@ -2876,6 +2938,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; @@ -6582,7 +6682,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 skb_rbtree_walk_from_safe(skb, tmp) { if (!tcp_can_collapse(sk, skb)) break; -@@ -3356,7 +3410,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, +@@ -3367,7 +3433,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rsk_rcv_wnd, 65535U)); @@ -6591,7 +6691,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 th->doff = (tcp_header_size >> 2); __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); -@@ -3438,13 +3492,13 @@ static void tcp_connect_init(struct sock *sk) +@@ -3449,13 +3515,13 @@ static void tcp_connect_init(struct sock *sk) if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); @@ -6606,13 +6706,13 @@ index 739fc69cdcc6..a4fa05e5562d 100644 + tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), + &tp->rcv_wnd, + &tp->window_clamp, -+ sock_net(sk)->ipv4.sysctl_tcp_window_scaling, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling), + &rcv_wscale, + rcv_wnd); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; -@@ -3464,11 +3518,43 @@ static void tcp_connect_init(struct sock *sk) +@@ -3475,11 +3541,43 @@ static void tcp_connect_init(struct sock *sk) else tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; @@ -6656,7 +6756,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 } static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) -@@ -3735,6 +3821,7 @@ void tcp_send_ack(struct sock *sk) +@@ -3746,6 +3844,7 @@ void tcp_send_ack(struct sock *sk) { __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); } @@ -6664,7 +6764,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. -@@ -3747,7 +3834,7 @@ void tcp_send_ack(struct sock *sk) +@@ -3758,7 +3857,7 @@ void tcp_send_ack(struct sock *sk) * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ @@ -6673,7 +6773,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; -@@ -3834,7 +3921,7 @@ void tcp_send_probe0(struct sock *sk) +@@ -3845,7 +3944,7 @@ void tcp_send_probe0(struct sock *sk) unsigned long timeout; int err; @@ -6683,7 +6783,7 @@ index 739fc69cdcc6..a4fa05e5562d 100644 if (tp->packets_out || tcp_write_queue_empty(sk)) { /* Cancel probe timer, if it is not required. */ diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index fa2ae96ecdc4..d2b3e30b8788 100644 +index a0107eb02ae4..7017ee200aec 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -21,6 +21,7 @@ @@ -6712,7 +6812,21 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; -@@ -206,9 +207,9 @@ static unsigned int tcp_model_timeout(struct sock *sk, +@@ -149,6 +150,13 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) + if (sk->sk_err_soft && !alive) + retries = 0; + ++ /* If the app called close() and we don't have any subflows left, ++ * be aggressive at killing the connection. Otherwise we will linger ++ * around for a very long time. ++ */ ++ if (is_meta_sk(sk) && hlist_empty(&tcp_sk(sk)->mpcb->conn_list)) ++ retries = 1; ++ + /* However, if socket sent something recently, select some safe + * number of retries. 8 corresponds to >100 seconds with minimal + * RTO of 200msec. */ +@@ -206,9 +214,9 @@ static unsigned int tcp_model_timeout(struct sock *sk, * after "boundary" unsuccessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ @@ -6725,7 +6839,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 { unsigned int start_ts; -@@ -228,7 +229,7 @@ static bool retransmits_timed_out(struct sock *sk, +@@ -228,7 +236,7 @@ static bool retransmits_timed_out(struct sock *sk, } /* A write timeout has occurred. Process the after effects. */ @@ -6734,7 +6848,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); -@@ -243,6 +244,17 @@ static int tcp_write_timeout(struct sock *sk) +@@ -243,6 +251,17 @@ static int tcp_write_timeout(struct sock *sk) sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; @@ -6751,8 +6865,8 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 + expired = icsk->icsk_retransmits >= retry_until; } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { -@@ -338,18 +350,22 @@ static void tcp_delack_timer(struct timer_list *t) + if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { +@@ -338,18 +357,22 @@ static void tcp_delack_timer(struct timer_list *t) struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_delack_timer); struct sock *sk = &icsk->icsk_inet.sk; @@ -6779,7 +6893,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 sock_put(sk); } -@@ -393,7 +409,12 @@ static void tcp_probe_timer(struct sock *sk) +@@ -393,7 +416,12 @@ static void tcp_probe_timer(struct sock *sk) } if (icsk->icsk_probes_out >= max_probes) { @@ -6793,7 +6907,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 } else { /* Only send another probe if we didn't close things up. */ tcp_send_probe0(sk); -@@ -614,7 +635,7 @@ void tcp_write_timer_handler(struct sock *sk) +@@ -614,7 +642,7 @@ void tcp_write_timer_handler(struct sock *sk) break; case ICSK_TIME_RETRANS: icsk->icsk_pending = 0; @@ -6802,7 +6916,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 break; case ICSK_TIME_PROBE0: icsk->icsk_pending = 0; -@@ -631,16 +652,19 @@ static void tcp_write_timer(struct timer_list *t) +@@ -631,16 +659,19 @@ static void tcp_write_timer(struct timer_list *t) struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; @@ -6825,7 +6939,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 sock_put(sk); } -@@ -670,11 +694,12 @@ static void tcp_keepalive_timer (struct timer_list *t) +@@ -670,11 +701,12 @@ static void tcp_keepalive_timer (struct timer_list *t) struct sock *sk = from_timer(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -6840,7 +6954,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 /* Try again later. */ inet_csk_reset_keepalive_timer (sk, HZ/20); goto out; -@@ -686,16 +711,31 @@ static void tcp_keepalive_timer (struct timer_list *t) +@@ -686,16 +718,31 @@ static void tcp_keepalive_timer (struct timer_list *t) } tcp_mstamp_refresh(tp); @@ -6874,7 +6988,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 goto death; } -@@ -720,11 +760,11 @@ static void tcp_keepalive_timer (struct timer_list *t) +@@ -720,11 +767,11 @@ static void tcp_keepalive_timer (struct timer_list *t) icsk->icsk_probes_out > 0) || (icsk->icsk_user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { @@ -6888,7 +7002,7 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 icsk->icsk_probes_out++; elapsed = keepalive_intvl_when(tp); } else { -@@ -748,7 +788,7 @@ static void tcp_keepalive_timer (struct timer_list *t) +@@ -748,7 +795,7 @@ static void tcp_keepalive_timer (struct timer_list *t) tcp_done(sk); out: @@ -6897,8 +7011,30 @@ index fa2ae96ecdc4..d2b3e30b8788 100644 sock_put(sk); } +@@ -756,9 +803,10 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) + { + struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer); + struct sock *sk = (struct sock *)tp; ++ struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; + +- bh_lock_sock(sk); +- if (!sock_owned_by_user(sk)) { ++ bh_lock_sock(meta_sk); ++ if (!sock_owned_by_user(meta_sk)) { + if (tp->compressed_ack > TCP_FASTRETRANS_THRESH) + tcp_send_ack(sk); + } else { +@@ -766,7 +814,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) + &sk->sk_tsq_flags)) + sock_hold(sk); + } +- bh_unlock_sock(sk); ++ bh_unlock_sock(meta_sk); + + sock_put(sk); + diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c -index e29553e4f4ee..a4882b96f59a 100644 +index a0123760fb2c..587cbcfe10e5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -978,6 +978,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) @@ -6910,7 +7046,7 @@ index e29553e4f4ee..a4882b96f59a 100644 static void ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c -index 942da168f18f..e36520f9dcd5 100644 +index 56f396ecc26b..768a8717dc10 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -104,8 +104,7 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) @@ -6969,7 +7105,7 @@ index 5352c7e68c42..534a9d2e4858 100644 break; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c -index 37ab254f7b92..c0b637adf337 100644 +index 7e5550546594..248255d2883a 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -15,6 +15,8 @@ @@ -6999,7 +7135,7 @@ index 37ab254f7b92..c0b637adf337 100644 struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); -@@ -157,7 +161,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +@@ -158,7 +162,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); @@ -7009,7 +7145,7 @@ index 37ab254f7b92..c0b637adf337 100644 if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { tsoff = secure_tcpv6_ts_off(sock_net(sk), -@@ -170,15 +175,28 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +@@ -171,15 +176,28 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; @@ -7039,7 +7175,7 @@ index 37ab254f7b92..c0b637adf337 100644 if (security_inet_conn_request(sk, skb, req)) goto out_free; -@@ -248,15 +266,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +@@ -249,15 +267,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) req->rsk_window_clamp = full_space; @@ -7061,7 +7197,7 @@ index 37ab254f7b92..c0b637adf337 100644 return ret; out_free: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c -index 063898cae3e5..78d91dfc3f06 100644 +index 063898cae3e5..7f3804814657 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -58,6 +58,8 @@ @@ -7187,6 +7323,15 @@ index 063898cae3e5..78d91dfc3f06 100644 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; +@@ -427,7 +437,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + np = tcp_inet6_sk(sk); + + if (type == NDISC_REDIRECT) { +- if (!sock_owned_by_user(sk)) { ++ if (!sock_owned_by_user(meta_sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst) @@ -454,11 +464,15 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, WRITE_ONCE(tp->mtu_info, mtu); @@ -7657,10 +7802,10 @@ index 063898cae3e5..78d91dfc3f06 100644 /* thinking of making this const? Don't. diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig new file mode 100644 -index 000000000000..1a7e9bbc766d +index 000000000000..c3d2053a61b2 --- /dev/null +++ b/net/mptcp/Kconfig -@@ -0,0 +1,155 @@ +@@ -0,0 +1,162 @@ +# +# MPTCP configuration +# @@ -7816,6 +7961,13 @@ index 000000000000..1a7e9bbc766d + default "redundant" if DEFAULT_REDUNDANT + default "default" + ++config MPTCP_DEBUG_LOCK ++ bool "MPTCP lock debugging" ++ default n ++ depends on MPTCP ++ ---help--- ++ Check that MPTCP meta locks are properly held when locking subflows. ++ This is a debug option that impacts performances. Say N if unsure. diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile new file mode 100644 index 000000000000..369248a2f68e @@ -9373,10 +9525,10 @@ index 000000000000..9eb7628053f6 +MODULE_VERSION("0.1"); diff --git a/net/mptcp/mptcp_ctrl.c b/net/mptcp/mptcp_ctrl.c new file mode 100644 -index 000000000000..9a1b5a048b70 +index 000000000000..0e8dc7f2aac4 --- /dev/null +++ b/net/mptcp/mptcp_ctrl.c -@@ -0,0 +1,3302 @@ +@@ -0,0 +1,3307 @@ +/* + * MPTCP implementation - MPTCP-control + * @@ -10611,7 +10763,6 @@ index 000000000000..9a1b5a048b70 + /* The below has to be done to allow calling inet_csk_destroy_sock */ + sock_set_flag(sk, SOCK_DEAD); + percpu_counter_inc(sk->sk_prot->orphan_count); -+ /* inet_sk(sk)->inet_num = 0; */ + + tcp_done(sk); + @@ -11152,6 +11303,7 @@ index 000000000000..9a1b5a048b70 + /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ + tcp_init_nondata_skb(skb, tp->write_seq, + TCPHDR_ACK | TCPHDR_FIN); ++ sk_forced_mem_schedule(sk, skb->truesize); + tcp_queue_skb(sk, skb); + } + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); @@ -11495,9 +11647,6 @@ index 000000000000..9a1b5a048b70 + mptcp_for_each_sub_safe(meta_tp->mpcb, mptcp, tmp) { + struct sock *subsk = mptcp_to_sock(mptcp); + -+ if (spin_is_locked(&subsk->sk_lock.slock)) -+ bh_unlock_sock(subsk); -+ + tcp_sk(subsk)->tcp_disconnect = 1; + + meta_sk->sk_prot->disconnect(subsk, O_NONBLOCK); @@ -11648,9 +11797,6 @@ index 000000000000..9a1b5a048b70 + */ + mptcp_reqsk_remove_tk(req); + -+ /* Hold when creating the meta-sk in tcp_vX_syn_recv_sock. */ -+ sock_put(meta_sk); -+ + return 0; +} + @@ -11727,9 +11873,7 @@ index 000000000000..9a1b5a048b70 + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) { + bh_unlock_sock(meta_sk); -+ /* No sock_put() of the meta needed. The reference has -+ * already been dropped in __mptcp_check_req_master(). -+ */ ++ sock_put(meta_sk); + sock_put(child); + return -1; + } @@ -11739,14 +11883,14 @@ index 000000000000..9a1b5a048b70 + tcp_sk(meta_sk)->tsoffset = tsoff; + if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) { + bh_unlock_sock(meta_sk); -+ /* No sock_put() of the meta needed. The reference has -+ * already been dropped in __mptcp_check_req_master(). -+ */ ++ sock_put(meta_sk); + sock_put(child); + return -1; + } + } + ++ sock_put(meta_sk); ++ + return 0; +} + @@ -11799,6 +11943,7 @@ index 000000000000..9a1b5a048b70 + * some of the fields + */ + child_tp->mptcp->rcv_low_prio = mtreq->rcv_low_prio; ++ child_tp->mptcp->low_prio = mtreq->low_prio; + + /* We should allow proper increase of the snd/rcv-buffers. Thus, we + * use the original values instead of the bloated up ones from the @@ -12679,6 +12824,18 @@ index 000000000000..9a1b5a048b70 +mptcp_sock_cache_failed: + mptcp_init_failed = true; +} ++ ++#ifdef CONFIG_MPTCP_DEBUG_LOCK ++void mptcp_check_lock(struct sock *sk) ++{ ++ struct tcp_sock *tp = tcp_sk(sk); ++ /* make sure the meta lock is held when we hold the sublock */ ++ if (mptcp(tp) && !is_meta_tp(tp) && tp->meta_sk) ++ WARN_ON(!spin_is_locked(&tp->meta_sk->sk_lock.slock) && ++ !sock_owned_by_user(tp->meta_sk)); ++} ++EXPORT_SYMBOL(mptcp_check_lock); ++#endif diff --git a/net/mptcp/mptcp_ecf.c b/net/mptcp/mptcp_ecf.c new file mode 100644 index 000000000000..6b976b2b0c72 @@ -14824,10 +14981,10 @@ index 000000000000..65e2cd9bf630 +MODULE_VERSION("0.88"); diff --git a/net/mptcp/mptcp_input.c b/net/mptcp/mptcp_input.c new file mode 100644 -index 000000000000..7ce97409e1e2 +index 000000000000..01a81e3f7690 --- /dev/null +++ b/net/mptcp/mptcp_input.c -@@ -0,0 +1,2630 @@ +@@ -0,0 +1,2632 @@ +/* + * MPTCP implementation - Sending side + * @@ -15853,6 +16010,7 @@ index 000000000000..7ce97409e1e2 + tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; + mptcp_prepare_skb(tmp1, sk); + __skb_unlink(tmp1, &sk->sk_receive_queue); ++ sk_forced_mem_schedule(meta_sk, tmp1->truesize); + /* MUST be done here, because fragstolen may be true later. + * Then, kfree_skb_partial will not account the memory. + */ @@ -15884,6 +16042,7 @@ index 000000000000..7ce97409e1e2 + tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; + mptcp_prepare_skb(tmp1, sk); + __skb_unlink(tmp1, &sk->sk_receive_queue); ++ sk_forced_mem_schedule(meta_sk, tmp1->truesize); + /* MUST be done here, because fragstolen may be true. + * Then, kfree_skb_partial will not account the memory. + */ @@ -17460,7 +17619,7 @@ index 000000000000..7ce97409e1e2 +} diff --git a/net/mptcp/mptcp_ipv4.c b/net/mptcp/mptcp_ipv4.c new file mode 100644 -index 000000000000..c908e02c72e1 +index 000000000000..fbcf47c46783 --- /dev/null +++ b/net/mptcp/mptcp_ipv4.c @@ -0,0 +1,433 @@ @@ -17637,14 +17796,14 @@ index 000000000000..c908e02c72e1 + if (!sk) + goto new_subflow; + -+ if (is_meta_sk(sk)) { -+ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -+ sock_put(sk); ++ if (sk->sk_state == TCP_TIME_WAIT) { ++ inet_twsk_put(inet_twsk(sk)); + goto discard; + } + -+ if (sk->sk_state == TCP_TIME_WAIT) { -+ inet_twsk_put(inet_twsk(sk)); ++ if (is_meta_sk(sk)) { ++ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); ++ sock_put(sk); + goto discard; + } + @@ -17899,7 +18058,7 @@ index 000000000000..c908e02c72e1 +} diff --git a/net/mptcp/mptcp_ipv6.c b/net/mptcp/mptcp_ipv6.c new file mode 100644 -index 000000000000..ebe3f5f97460 +index 000000000000..915dd7892037 --- /dev/null +++ b/net/mptcp/mptcp_ipv6.c @@ -0,0 +1,482 @@ @@ -18105,14 +18264,14 @@ index 000000000000..ebe3f5f97460 + if (!sk) + goto new_subflow; + -+ if (is_meta_sk(sk)) { -+ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -+ sock_put(sk); ++ if (sk->sk_state == TCP_TIME_WAIT) { ++ inet_twsk_put(inet_twsk(sk)); + goto discard; + } + -+ if (sk->sk_state == TCP_TIME_WAIT) { -+ inet_twsk_put(inet_twsk(sk)); ++ if (is_meta_sk(sk)) { ++ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); ++ sock_put(sk); + goto discard; + } + @@ -20167,10 +20326,10 @@ index 000000000000..161a63f336d7 +MODULE_VERSION("0.1"); diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c new file mode 100644 -index 000000000000..a8a5787adbf1 +index 000000000000..c26e14a702fd --- /dev/null +++ b/net/mptcp/mptcp_output.c -@@ -0,0 +1,2015 @@ +@@ -0,0 +1,2038 @@ +/* + * MPTCP implementation - Sending side + * @@ -20466,6 +20625,7 @@ index 000000000000..a8a5787adbf1 +void mptcp_reinject_data(struct sock *sk, int clone_it) +{ + struct sock *meta_sk = mptcp_meta_sk(sk); ++ struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb_it, *tmp; + enum tcp_queue tcp_queue; + @@ -20493,6 +20653,10 @@ index 000000000000..a8a5787adbf1 + TCP_FRAG_IN_WRITE_QUEUE); + } + ++ /* We are emptying the rtx-queue. highest_sack is invalid */ ++ if (!clone_it) ++ tp->highest_sack = NULL; ++ + skb_it = tcp_rtx_queue_head(sk); + skb_rbtree_walk_from_safe(skb_it, tmp) { + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb_it); @@ -20525,11 +20689,11 @@ index 000000000000..a8a5787adbf1 + + /* If sk has sent the empty data-fin, we have to reinject it too. */ + if (skb_it && mptcp_is_data_fin(skb_it) && skb_it->len == 0 && -+ TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tcp_sk(sk)->mptcp->path_index)) { ++ TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index)) { + __mptcp_reinject_data(skb_it, meta_sk, NULL, 1, tcp_queue); + } + -+ tcp_sk(sk)->pf = 1; ++ tp->pf = 1; + + mptcp_push_pending_frames(meta_sk); +} @@ -20727,9 +20891,20 @@ index 000000000000..a8a5787adbf1 + struct tcp_skb_cb *tcb; + struct sk_buff *subskb = NULL; + -+ if (!reinject) ++ if (reinject) { ++ /* Make sure to update counters and MIB in case of meta-retrans ++ * AKA reinjections, similar to what is done in ++ * __tcp_retransmit_skb(). ++ */ ++ int segs = tcp_skb_pcount(skb); ++ ++ MPTCP_ADD_STATS(sock_net(meta_sk), MPTCP_MIB_RETRANSSEGS, segs); ++ tcp_sk(meta_sk)->total_retrans += segs; ++ tcp_sk(meta_sk)->bytes_retrans += skb->len; ++ } else { + TCP_SKB_CB(skb)->mptcp_flags |= (mpcb->snd_hiseq_index ? + MPTCPHDR_SEQ64_INDEX : 0); ++ } + + tcp_skb_tsorted_save(skb) { + subskb = pskb_copy_for_clone(skb, GFP_ATOMIC); @@ -20788,6 +20963,7 @@ index 000000000000..a8a5787adbf1 + + tcp_add_write_queue_tail(sk, subskb); + sk->sk_wmem_queued += subskb->truesize; ++ sk_forced_mem_schedule(sk, subskb->truesize); + sk_mem_charge(sk, subskb->truesize); + } else { + /* Necessary to initialize for tcp_transmit_skb. mss of 1, as @@ -21656,6 +21832,7 @@ index 000000000000..a8a5787adbf1 + tcp_init_nondata_skb(skb, meta_tp->write_seq, TCPHDR_ACK); + TCP_SKB_CB(skb)->end_seq++; + TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN; ++ sk_forced_mem_schedule(meta_sk, skb->truesize); + tcp_queue_skb(meta_sk, skb); + } + __tcp_push_pending_frames(meta_sk, mss_now, TCP_NAGLE_OFF); @@ -21825,7 +22002,9 @@ index 000000000000..a8a5787adbf1 + */ + if (refcount_read(&meta_sk->sk_wmem_alloc) > + min(meta_sk->sk_wmem_queued + (meta_sk->sk_wmem_queued >> 2), meta_sk->sk_sndbuf)) { -+ return -EAGAIN; ++ err = -EAGAIN; ++ ++ goto failed; + } + + /* We need to make sure that the retransmitted segment can be sent on a @@ -21872,9 +22051,6 @@ index 000000000000..a8a5787adbf1 + if (!mptcp_skb_entail(subsk, skb, -1)) + goto failed; + -+ /* Update global TCP statistics. */ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_RETRANSSEGS); -+ + /* Diff to tcp_retransmit_skb */ + + /* Save stamp of the first retransmit. */ @@ -21891,6 +22067,12 @@ index 000000000000..a8a5787adbf1 + +failed: + NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPRETRANSFAIL); ++ /* Save stamp of the first attempted retransmit. */ ++ if (!meta_tp->retrans_stamp) { ++ tcp_mstamp_refresh(meta_tp); ++ meta_tp->retrans_stamp = tcp_time_stamp(meta_tp); ++ } ++ + return err; +} + @@ -24132,10 +24314,10 @@ index 94358566c9d1..a26eeeda2b4d 100644 * Now to bump the refcnt of the [loadable] module that owns this * socket at sock_release time we decrement its refcnt. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h -index 0bfad86ec960..ed7013398991 100644 +index cb0631098f91..b9de598828e9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h -@@ -3438,6 +3438,7 @@ enum { +@@ -3439,6 +3439,7 @@ enum { BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ BPF_TCP_NEW_SYN_RECV, @@ -24143,268 +24325,3 @@ index 0bfad86ec960..ed7013398991 100644 BPF_TCP_MAX_STATES /* Leave at the end! */ }; -diff --git a/include/net/mptcp.h b/include/net/mptcp.h -index 630977f67614..f2efa46027d0 100644 ---- a/include/net/mptcp.h -+++ b/include/net/mptcp.h -@@ -732,6 +732,7 @@ static inline struct sock *mptcp_to_sock(const struct mptcp_tcp_sock *mptcp) - - #define MPTCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mptcp.mptcp_statistics, field) - #define MPTCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mptcp.mptcp_statistics, field) -+#define MPTCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mptcp.mptcp_statistics, field, val) - - enum - { -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index d278b28035ad..c0572253c723 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -4603,17 +4603,16 @@ static int tcp_prune_queue(struct sock *sk); - static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, - unsigned int size) - { -- if (mptcp(tcp_sk(sk))) -- sk = mptcp_meta_sk(sk); -+ struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk; - -- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || -+ if (atomic_read(&meta_sk->sk_rmem_alloc) > meta_sk->sk_rcvbuf || - !sk_rmem_schedule(sk, skb, size)) { - -- if (tcp_prune_queue(sk) < 0) -+ if (tcp_prune_queue(meta_sk) < 0) - return -1; - - while (!sk_rmem_schedule(sk, skb, size)) { -- if (!tcp_prune_ofo_queue(sk)) -+ if (!tcp_prune_ofo_queue(meta_sk)) - return -1; - } - } -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index a4fa05e5562d..2cb4c4a0ce4e 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -1391,6 +1391,12 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, - TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; - -+#ifdef CONFIG_MPTCP -+ memcpy(TCP_SKB_CB(buff)->dss, TCP_SKB_CB(skb)->dss, -+ sizeof(TCP_SKB_CB(skb)->dss)); -+ TCP_SKB_CB(buff)->mptcp_flags = TCP_SKB_CB(skb)->mptcp_flags; -+#endif -+ - /* PSH and FIN should only be set in the second packet. */ - flags = TCP_SKB_CB(skb)->tcp_flags; - TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); -@@ -1954,6 +1960,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, - TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; - TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; - -+#ifdef CONFIG_MPTCP -+ memcpy(TCP_SKB_CB(buff)->dss, TCP_SKB_CB(skb)->dss, -+ sizeof(TCP_SKB_CB(skb)->dss)); -+ TCP_SKB_CB(buff)->mptcp_flags = TCP_SKB_CB(skb)->mptcp_flags; -+#endif -+ - /* PSH and FIN should only be set in the second packet. */ - flags = TCP_SKB_CB(skb)->tcp_flags; - TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); -diff --git a/net/mptcp/mptcp_ctrl.c b/net/mptcp/mptcp_ctrl.c -index 9a1b5a048b70..e6cac7e4de31 100644 ---- a/net/mptcp/mptcp_ctrl.c -+++ b/net/mptcp/mptcp_ctrl.c -@@ -1773,6 +1773,7 @@ static int mptcp_sub_send_fin(struct sock *sk) - /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ - tcp_init_nondata_skb(skb, tp->write_seq, - TCPHDR_ACK | TCPHDR_FIN); -+ sk_forced_mem_schedule(sk, skb->truesize); - tcp_queue_skb(sk, skb); - } - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); -@@ -2420,6 +2421,7 @@ struct sock *mptcp_check_req_child(struct sock *meta_sk, - * some of the fields - */ - child_tp->mptcp->rcv_low_prio = mtreq->rcv_low_prio; -+ child_tp->mptcp->low_prio = mtreq->low_prio; - - /* We should allow proper increase of the snd/rcv-buffers. Thus, we - * use the original values instead of the bloated up ones from the -diff --git a/net/mptcp/mptcp_input.c b/net/mptcp/mptcp_input.c -index 7ce97409e1e2..01a81e3f7690 100644 ---- a/net/mptcp/mptcp_input.c -+++ b/net/mptcp/mptcp_input.c -@@ -1023,6 +1023,7 @@ static int mptcp_queue_skb(struct sock *sk) - tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; - mptcp_prepare_skb(tmp1, sk); - __skb_unlink(tmp1, &sk->sk_receive_queue); -+ sk_forced_mem_schedule(meta_sk, tmp1->truesize); - /* MUST be done here, because fragstolen may be true later. - * Then, kfree_skb_partial will not account the memory. - */ -@@ -1054,6 +1055,7 @@ static int mptcp_queue_skb(struct sock *sk) - tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; - mptcp_prepare_skb(tmp1, sk); - __skb_unlink(tmp1, &sk->sk_receive_queue); -+ sk_forced_mem_schedule(meta_sk, tmp1->truesize); - /* MUST be done here, because fragstolen may be true. - * Then, kfree_skb_partial will not account the memory. - */ -diff --git a/net/mptcp/mptcp_ipv4.c b/net/mptcp/mptcp_ipv4.c -index c908e02c72e1..fbcf47c46783 100644 ---- a/net/mptcp/mptcp_ipv4.c -+++ b/net/mptcp/mptcp_ipv4.c -@@ -171,14 +171,14 @@ int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb) - if (!sk) - goto new_subflow; - -- if (is_meta_sk(sk)) { -- WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -- sock_put(sk); -+ if (sk->sk_state == TCP_TIME_WAIT) { -+ inet_twsk_put(inet_twsk(sk)); - goto discard; - } - -- if (sk->sk_state == TCP_TIME_WAIT) { -- inet_twsk_put(inet_twsk(sk)); -+ if (is_meta_sk(sk)) { -+ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -+ sock_put(sk); - goto discard; - } - -diff --git a/net/mptcp/mptcp_ipv6.c b/net/mptcp/mptcp_ipv6.c -index ebe3f5f97460..915dd7892037 100644 ---- a/net/mptcp/mptcp_ipv6.c -+++ b/net/mptcp/mptcp_ipv6.c -@@ -200,14 +200,14 @@ int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) - if (!sk) - goto new_subflow; - -- if (is_meta_sk(sk)) { -- WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -- sock_put(sk); -+ if (sk->sk_state == TCP_TIME_WAIT) { -+ inet_twsk_put(inet_twsk(sk)); - goto discard; - } - -- if (sk->sk_state == TCP_TIME_WAIT) { -- inet_twsk_put(inet_twsk(sk)); -+ if (is_meta_sk(sk)) { -+ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -+ sock_put(sk); - goto discard; - } - -diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c -index a8a5787adbf1..226084d11961 100644 ---- a/net/mptcp/mptcp_output.c -+++ b/net/mptcp/mptcp_output.c -@@ -293,6 +293,7 @@ static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk - void mptcp_reinject_data(struct sock *sk, int clone_it) - { - struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb_it, *tmp; - enum tcp_queue tcp_queue; - -@@ -320,6 +321,10 @@ void mptcp_reinject_data(struct sock *sk, int clone_it) - TCP_FRAG_IN_WRITE_QUEUE); - } - -+ /* We are emptying the rtx-queue. highest_sack is invalid */ -+ if (!clone_it) -+ tp->highest_sack = NULL; -+ - skb_it = tcp_rtx_queue_head(sk); - skb_rbtree_walk_from_safe(skb_it, tmp) { - struct tcp_skb_cb *tcb = TCP_SKB_CB(skb_it); -@@ -352,11 +357,11 @@ void mptcp_reinject_data(struct sock *sk, int clone_it) - - /* If sk has sent the empty data-fin, we have to reinject it too. */ - if (skb_it && mptcp_is_data_fin(skb_it) && skb_it->len == 0 && -- TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tcp_sk(sk)->mptcp->path_index)) { -+ TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index)) { - __mptcp_reinject_data(skb_it, meta_sk, NULL, 1, tcp_queue); - } - -- tcp_sk(sk)->pf = 1; -+ tp->pf = 1; - - mptcp_push_pending_frames(meta_sk); - } -@@ -554,9 +559,20 @@ static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject) - struct tcp_skb_cb *tcb; - struct sk_buff *subskb = NULL; - -- if (!reinject) -+ if (reinject) { -+ /* Make sure to update counters and MIB in case of meta-retrans -+ * AKA reinjections, similar to what is done in -+ * __tcp_retransmit_skb(). -+ */ -+ int segs = tcp_skb_pcount(skb); -+ -+ MPTCP_ADD_STATS(sock_net(meta_sk), MPTCP_MIB_RETRANSSEGS, segs); -+ tcp_sk(meta_sk)->total_retrans += segs; -+ tcp_sk(meta_sk)->bytes_retrans += skb->len; -+ } else { - TCP_SKB_CB(skb)->mptcp_flags |= (mpcb->snd_hiseq_index ? - MPTCPHDR_SEQ64_INDEX : 0); -+ } - - tcp_skb_tsorted_save(skb) { - subskb = pskb_copy_for_clone(skb, GFP_ATOMIC); -@@ -615,6 +631,7 @@ static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject) - - tcp_add_write_queue_tail(sk, subskb); - sk->sk_wmem_queued += subskb->truesize; -+ sk_forced_mem_schedule(sk, subskb->truesize); - sk_mem_charge(sk, subskb->truesize); - } else { - /* Necessary to initialize for tcp_transmit_skb. mss of 1, as -@@ -1483,6 +1500,7 @@ void mptcp_send_fin(struct sock *meta_sk) - tcp_init_nondata_skb(skb, meta_tp->write_seq, TCPHDR_ACK); - TCP_SKB_CB(skb)->end_seq++; - TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN; -+ sk_forced_mem_schedule(meta_sk, skb->truesize); - tcp_queue_skb(meta_sk, skb); - } - __tcp_push_pending_frames(meta_sk, mss_now, TCP_NAGLE_OFF); -@@ -1652,7 +1670,9 @@ int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb) - */ - if (refcount_read(&meta_sk->sk_wmem_alloc) > - min(meta_sk->sk_wmem_queued + (meta_sk->sk_wmem_queued >> 2), meta_sk->sk_sndbuf)) { -- return -EAGAIN; -+ err = -EAGAIN; -+ -+ goto failed; - } - - /* We need to make sure that the retransmitted segment can be sent on a -@@ -1699,9 +1719,6 @@ int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb) - if (!mptcp_skb_entail(subsk, skb, -1)) - goto failed; - -- /* Update global TCP statistics. */ -- MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_RETRANSSEGS); -- - /* Diff to tcp_retransmit_skb */ - - /* Save stamp of the first retransmit. */ -@@ -1718,6 +1735,12 @@ int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb) - - failed: - NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPRETRANSFAIL); -+ /* Save stamp of the first attempted retransmit. */ -+ if (!meta_tp->retrans_stamp) { -+ tcp_mstamp_refresh(meta_tp); -+ meta_tp->retrans_stamp = tcp_time_stamp(meta_tp); -+ } -+ - return err; - } -