1
0
Fork 0
mirror of https://github.com/Ysurac/openmptcprouter.git synced 2025-02-15 04:42:02 +00:00
openmptcprouter/6.1/target/linux/generic/hack-6.1/999-BBRv2.patch
2023-07-30 22:29:17 +02:00

10575 lines
348 KiB
Diff

From 092758f89a20d9d2f00ce022152ed447ee1b0380 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 11 Jun 2019 12:26:55 -0400
Subject: [PATCH 01/27] net-tcp_bbr: broaden app-limited rate sample detection
This commit is a bug fix for the Linux TCP app-limited
(application-limited) logic that is used for collecting rate
(bandwidth) samples.
Previously the app-limited logic only looked for "bubbles" of
silence in between application writes, by checking at the start
of each sendmsg. But "bubbles" of silence can also happen before
retransmits: e.g. bubbles can happen between an application write
and a retransmit, or between two retransmits.
Retransmits are triggered by ACKs or timers. So this commit checks
for bubbles of app-limited silence upon ACKs or timers.
Why does this commit check for app-limited state at the start of
ACKs and timer handling? Because at that point we know whether
inflight was fully using the cwnd. During processing the ACK or
timer event we often change the cwnd; after changing the cwnd we
can't know whether inflight was fully using the old cwnd.
Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc
Change-Id: I37221506f5166877c2b110753d39bb0757985e68
---
net/ipv4/tcp_input.c | 1 +
net/ipv4/tcp_timer.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0640453fce54..1a308993c371 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3812,6 +3812,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una;
rs.prior_in_flight = tcp_packets_in_flight(tp);
+ tcp_rate_check_app_limited(sk);
/* ts_recent update must be made after we are sure that the packet
* is in window.
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cb79127f45c3..70e4de876a7f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -605,6 +605,7 @@ void tcp_write_timer_handler(struct sock *sk)
return;
}
+ tcp_rate_check_app_limited(sk);
tcp_mstamp_refresh(tcp_sk(sk));
event = icsk->icsk_pending;
--
2.34.1
From 8ad58ff1db4ac78232203dcdc1e28c49263d8b6e Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sun, 24 Jun 2018 21:55:59 -0400
Subject: [PATCH 02/27] net-tcp_bbr: v2: shrink delivered_mstamp,
first_tx_mstamp to u32 to free up 8 bytes
Free up some space for tracking inflight and losses for each
bw sample, in upcoming commits.
These timestamps are in microseconds, and are now stored in 32
bits. So they can only hold time intervals up to roughly 2^12 = 4096
seconds. But Linux TCP RTT and RTO tracking has the same 32-bit
microsecond implementation approach and resulting deployment
limitations. So this is not introducing a new limit. And these should
not be a limitation for the foreseeable future.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55
Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c
---
include/net/tcp.h | 9 +++++++--
net/ipv4/tcp_rate.c | 7 ++++---
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b70b241ce71..b4c0f5bed5dc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -823,6 +823,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
return max_t(s64, t1 - t0, 0);
}
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0)
+{
+ return max_t(s32, t1 - t0, 0);
+}
+
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
{
return tcp_ns_to_ts(skb->skb_mstamp_ns);
@@ -898,9 +903,9 @@ struct tcp_skb_cb {
/* pkts S/ACKed so far upon tx of skb, incl retrans: */
__u32 delivered;
/* start of send pipeline phase */
- u64 first_tx_mstamp;
+ u32 first_tx_mstamp;
/* when we reached the "delivered" count */
- u64 delivered_mstamp;
+ u32 delivered_mstamp;
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index a8f6d9d06f2e..763362159fe9 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
/* Record send time of most recently ACKed packet: */
tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
- scb->tx.first_tx_mstamp);
+ rs->interval_us = tcp_stamp32_us_delta(
+ tp->first_tx_mstamp,
+ scb->tx.first_tx_mstamp);
}
/* Mark off the skb delivered once it's sacked to avoid being
@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* longer phase.
*/
snd_us = rs->interval_us; /* send phase */
- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp,
rs->prior_mstamp); /* ack phase */
rs->interval_us = max(snd_us, ack_us);
--
2.34.1
From a9db2da3c0026dddcdf6dab05c5c30d67fbeb32f Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 5 Aug 2017 11:49:50 -0400
Subject: [PATCH 03/27] net-tcp_bbr: v2: snapshot packets in flight at transmit
time and pass in rate_sample
For understanding the relationship between inflight and losses or ECN
signals, to try to find the highest inflight value that has acceptable
levels of loss/ECN marking.
Effort: net-tcp_bbr
Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea
Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a
---
include/net/tcp.h | 5 +++++
net/ipv4/tcp_rate.c | 14 ++++++++++++++
2 files changed, 19 insertions(+)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b4c0f5bed5dc..21709e740fe2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -906,6 +906,10 @@ struct tcp_skb_cb {
u32 first_tx_mstamp;
/* when we reached the "delivered" count */
u32 delivered_mstamp;
+#define TCPCB_IN_FLIGHT_BITS 20
+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
+ u32 in_flight:20, /* packets in flight at transmit */
+ unused2:12;
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
@@ -1053,6 +1057,7 @@ struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
+ u32 tx_in_flight; /* packets in flight at starting timestamp */
s32 delivered; /* number of packets delivered over interval */
s32 delivered_ce; /* number of packets delivered w/ CE marks*/
long interval_us; /* time for tp->delivered to incr "delivered" */
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 763362159fe9..782f44b4ee70 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -40,6 +40,7 @@
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 in_flight;
/* In general we need to start delivery rate samples from the
* time we received the most recent ACK, to ensure we include
@@ -67,6 +68,18 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
+
+ /* Check, sanitize, and record packets in flight after skb was sent. */
+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
+ WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
+ "insane in_flight %u cc %s mss %u "
+ "cwnd %u pif %u %u %u %u\n",
+ in_flight, inet_csk(sk)->icsk_ca_ops->name,
+ tp->mss_cache, tp->snd_cwnd,
+ tp->packets_out, tp->retrans_out,
+ tp->sacked_out, tp->lost_out);
+ in_flight = min(in_flight, TCPCB_IN_FLIGHT_MAX);
+ TCP_SKB_CB(skb)->tx.in_flight = in_flight;
}
/* When an skb is sacked or acked, we fill in the rate sample with the (prior)
@@ -97,6 +110,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
rs->last_end_seq = scb->end_seq;
+ rs->tx_in_flight = scb->tx.in_flight;
/* Record send time of most recently ACKed packet: */
tp->first_tx_mstamp = tx_tstamp;
--
2.34.1
From 5a337e11773eae0a4eeb855751d1af54f78ac370 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 12 Oct 2017 23:44:27 -0400
Subject: [PATCH 04/27] net-tcp_bbr: v2: count packets lost over TCP rate
sampling interval
For understanding the relationship between inflight and packet loss
signals, to try to find the highest inflight value that has acceptable
levels of packet losses.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab
Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5
---
include/net/tcp.h | 5 ++++-
net/ipv4/tcp_rate.c | 3 +++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 21709e740fe2..9c5d7575e659 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -910,6 +910,7 @@ struct tcp_skb_cb {
#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1)
u32 in_flight:20, /* packets in flight at transmit */
unused2:12;
+ u32 lost; /* packets lost so far upon tx of skb */
} tx; /* only used for outgoing skbs */
union {
struct inet_skb_parm h4;
@@ -1055,11 +1056,13 @@ struct ack_sample {
*/
struct rate_sample {
u64 prior_mstamp; /* starting timestamp for interval */
+ u32 prior_lost; /* tp->lost at "prior_mstamp" */
u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
u32 tx_in_flight; /* packets in flight at starting timestamp */
+ s32 lost; /* number of packets lost over interval */
s32 delivered; /* number of packets delivered over interval */
- s32 delivered_ce; /* number of packets delivered w/ CE marks*/
+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */
long interval_us; /* time for tp->delivered to incr "delivered" */
u32 snd_interval_us; /* snd interval for delivered packets */
u32 rcv_interval_us; /* rcv interval for delivered packets */
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 782f44b4ee70..e985026fc037 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -67,6 +67,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp;
TCP_SKB_CB(skb)->tx.delivered = tp->delivered;
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
+ TCP_SKB_CB(skb)->tx.lost = tp->lost;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
/* Check, sanitize, and record packets in flight after skb was sent. */
@@ -104,6 +105,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
if (!rs->prior_delivered ||
tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
scb->end_seq, rs->last_end_seq)) {
+ rs->prior_lost = scb->tx.lost;
rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
@@ -159,6 +161,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
return;
}
rs->delivered = tp->delivered - rs->prior_delivered;
+ rs->lost = tp->lost - rs->prior_lost;
rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce;
/* delivered_ce occupies less than 32 bits in the skb control block */
--
2.34.1
From 48c6bc09a6028e11a74c694dd993b080a09abf75 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 19 Nov 2018 13:48:36 -0500
Subject: [PATCH 05/27] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece
For understanding the relationship between inflight and ECN signals,
to try to find the highest inflight value that has acceptable levels
ECN marking.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8
Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3
---
include/net/tcp.h | 1 +
net/ipv4/tcp_input.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9c5d7575e659..7072b785839f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1074,6 +1074,7 @@ struct rate_sample {
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
bool is_ack_delayed; /* is this (likely) a delayed ACK? */
+ bool is_ece; /* did this ACK have ECN marked? */
};
struct tcp_congestion_ops {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a308993c371..b089111bbe62 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3911,6 +3911,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
delivered = tcp_newly_delivered(sk, delivered, flag);
lost = tp->lost - lost; /* freshly marked lost */
rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
+ rs.is_ece = !!(flag & FLAG_ECE);
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
--
2.34.1
From 721e70a3b75b30b517bb27d84f227322ac8a4e62 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 7 Aug 2018 21:52:06 -0400
Subject: [PATCH 06/27] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC
module callback API
For connections experiencing reordering, RACK can mark packets lost
long after we receive the SACKs/ACKs hinting that the packets were
actually lost.
This means that CC modules cannot easily learn the volume of inflight
data at which packet loss happens by looking at the current inflight
or even the packets in flight when the most recently SACKed packet was
sent. To learn this, CC modules need to know how many packets were in
flight at the time lost packets were sent. This new callback, combined
with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this.
This also provides a consistent callback that is invoked whether
packets are marked lost upon ACK processing, using the RACK reordering
timer, or at RTO time.
Effort: net-tcp_bbr
Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4
Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a
---
include/net/tcp.h | 3 +++
net/ipv4/tcp_input.c | 5 +++++
2 files changed, 8 insertions(+)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7072b785839f..49c925270337 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1101,6 +1101,9 @@ struct tcp_congestion_ops {
/* override sysctl_tcp_min_tso_segs */
u32 (*min_tso_segs)(struct sock *sk);
+ /* react to a specific lost skb (optional) */
+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
+
/* call when packets are delivered to update cwnd and pacing rate,
* after all the ca_state processing. (optional)
*/
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b089111bbe62..8f76bdea1bd9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1079,7 +1079,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
*/
static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb)
{
+ struct sock *sk = (struct sock *)tp;
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+
tp->lost += tcp_skb_pcount(skb);
+ if (ca_ops->skb_marked_lost)
+ ca_ops->skb_marked_lost(sk, skb);
}
void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
--
2.34.1
From ddc77e8fe5229a7cbd52d4af7edde7fa2734a0e1 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 7 May 2019 22:36:36 -0400
Subject: [PATCH 07/27] net-tcp_bbr: v2: factor out tx.in_flight setting into
tcp_set_tx_in_flight()
Factor out the code to set an skb's tx.in_flight field into its own
function, so that this code can be used for the TCP_REPAIR "fake send"
code path that inserts skbs into the rtx queue without sending
them. This is in preparation for the following patch, which fixes an
issue with TCP_REPAIR and tx.in_flight.
Tested: See last patch in series for sponge link.
Effort: net-tcp_bbr
Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b
Change-Id: I4fbd4a6e18a51ab06d50ab1c9ad820ce5bea89af
---
include/net/tcp.h | 1 +
net/ipv4/tcp_rate.c | 32 +++++++++++++++++++-------------
2 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 49c925270337..1ab6b2753236 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1184,6 +1184,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
void tcp_set_ca_state(struct sock *sk, const u8 ca_state);
/* From tcp_rate.c */
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index e985026fc037..a8b4c9504570 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -34,13 +34,30 @@
* ready to send in the write queue.
*/
+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 in_flight;
+
+ /* Check, sanitize, and record packets in flight after skb was sent. */
+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
+ "insane in_flight %u cc %s mss %u "
+ "cwnd %u pif %u %u %u %u\n",
+ in_flight, inet_csk(sk)->icsk_ca_ops->name,
+ tp->mss_cache, tp->snd_cwnd,
+ tp->packets_out, tp->retrans_out,
+ tp->sacked_out, tp->lost_out))
+ in_flight = TCPCB_IN_FLIGHT_MAX;
+ TCP_SKB_CB(skb)->tx.in_flight = in_flight;
+}
+
/* Snapshot the current delivery information in the skb, to generate
* a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
*/
void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 in_flight;
/* In general we need to start delivery rate samples from the
* time we received the most recent ACK, to ensure we include
@@ -69,18 +86,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce;
TCP_SKB_CB(skb)->tx.lost = tp->lost;
TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0;
-
- /* Check, sanitize, and record packets in flight after skb was sent. */
- in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb);
- WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX,
- "insane in_flight %u cc %s mss %u "
- "cwnd %u pif %u %u %u %u\n",
- in_flight, inet_csk(sk)->icsk_ca_ops->name,
- tp->mss_cache, tp->snd_cwnd,
- tp->packets_out, tp->retrans_out,
- tp->sacked_out, tp->lost_out);
- in_flight = min(in_flight, TCPCB_IN_FLIGHT_MAX);
- TCP_SKB_CB(skb)->tx.in_flight = in_flight;
+ tcp_set_tx_in_flight(sk, skb);
}
/* When an skb is sacked or acked, we fill in the rate sample with the (prior)
--
2.34.1
From 52dd9fd01e87674a3c57dbd8d2e10febc58130e6 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 1 May 2019 20:16:33 -0400
Subject: [PATCH 08/27] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in
tcp_shifted_skb()
When tcp_shifted_skb() updates state as adjacent SACKed skbs are
coalesced, previously the tx.in_flight was not adjusted, so we could
get contradictory state where the skb's recorded pcount was bigger
than the tx.in_flight (the number of segments that were in_flight
after sending the skb).
Normally have a SACKed skb with contradictory pcount/tx.in_flight
would not matter. However, with SACK reneging, the SACKed bit is
removed, and an skb once again becomes eligible for retransmitting,
fragmenting, SACKing, etc. Packetdrill testing verified the following
sequence is possible in a kernel that does not have this commit:
- skb N is SACKed
- skb N+1 is SACKed and combined with skb N using tcp_shifted_skb()
- tcp_shifted_skb() will increase the pcount of prev,
but leave tx.in_flight as-is
- so prev skb can have pcount > tx.in_flight
- RTO, tcp_timeout_mark_lost(), detect reneg,
remove "SACKed" bit, mark skb N as lost
- find pcount of skb N is greater than its tx.in_flight
I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb():
WARN_ON_ONCE(inflight_prev < 0)
to fire in production machines using bbr2.
Tested: See last commit in series for sponge link.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715
Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55
---
net/ipv4/tcp_input.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8f76bdea1bd9..394633fc9fa6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1465,6 +1465,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount);
+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */
+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount,
+ "prev in_flight: %u skb in_flight: %u pcount: %u",
+ TCP_SKB_CB(prev)->tx.in_flight,
+ TCP_SKB_CB(skb)->tx.in_flight,
+ pcount))
+ TCP_SKB_CB(skb)->tx.in_flight = 0;
+ else
+ TCP_SKB_CB(skb)->tx.in_flight -= pcount;
+ TCP_SKB_CB(prev)->tx.in_flight += pcount;
+
/* When we're adding to gso_segs == 1, gso_size will be zero,
* in theory this shouldn't be necessary but as long as DSACK
* code can come after this skb later on it's better to keep
--
2.34.1
From 8d8c4a58577e23dfb57406d36f71da10d9c5f1bb Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 1 May 2019 20:16:25 -0400
Subject: [PATCH 09/27] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in
tcp_fragment()
When we fragment an skb that has already been sent, we need to update
the tx.in_flight for the first skb in the resulting pair ("buff").
Because we were not updating the tx.in_flight, the tx.in_flight value
was inconsistent with the pcount of the "buff" skb (tx.in_flight would
be too high). That meant that if the "buff" skb was lost, then
bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value
that is too high. This could result in longer queues and higher packet
loss.
Packetdrill testing verified that without this commit, when the second
half of an skb is SACKed and then later the first half of that skb is
marked lost, the calculated inflight_hi was incorrect.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874
Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d
---
net/ipv4/tcp_output.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c69f4d966024..82465a0cf921 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1533,7 +1533,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
- int nsize, old_factor;
+ int nsize, old_factor, inflight_prev;
long limit;
int nlen;
u8 flags;
@@ -1610,6 +1610,15 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (diff)
tcp_adjust_pcount(sk, skb, diff);
+
+ /* Set buff tx.in_flight as if buff were sent by itself. */
+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor;
+ if (WARN_ONCE(inflight_prev < 0,
+ "inconsistent: tx.in_flight: %u old_factor: %d",
+ TCP_SKB_CB(skb)->tx.in_flight, old_factor))
+ inflight_prev = 0;
+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev +
+ tcp_skb_pcount(buff);
}
/* Link BUFF into the send queue. */
--
2.34.1
From 8418f129970ad19ed9eb1dd5e4cd2ef50634a5a3 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 7 May 2019 22:37:19 -0400
Subject: [PATCH 10/27] net-tcp_bbr: v2: set tx.in_flight for skbs in repair
write queue
Syzkaller was able to use TCP_REPAIR to reproduce the new warning
added in tcp_fragment():
WARNING: CPU: 0 PID: 118174 at net/ipv4/tcp_output.c:1487
tcp_fragment+0xdcc/0x10a0 net/ipv4/tcp_output.c:1487()
inconsistent: tx.in_flight: 0 old_factor: 53
The warning happens because skbs inserted into the tcp_rtx_queue
during the repair process go through a sort of "fake send" process,
and that process was seting pcount but not tx.in_flight, and thus the
warnings (where old_factor is the old pcount).
The fix of setting tx.in_flight in the TCP_REPAIR code path seems
simple enough, and indeed makes the repro code from syzkaller stop
producing warnings. Running through kokonut tests, and will send out
for review when all tests pass.
Effort: net-tcp_bbr
Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63
Change-Id: I0bc4a790f040fd4239620e1eedd5dc64666c6f05
---
net/ipv4/tcp_output.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 82465a0cf921..95816a1a5858 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2644,6 +2644,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);
+ tcp_set_tx_in_flight(sk, skb);
goto repair; /* Skip network transmission */
}
--
2.34.1
From cffd0b97c891dd51ce52c95d2738e4628d6d217c Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Wed, 23 May 2018 17:55:54 -0700
Subject: [PATCH 11/27] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS
Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a
congestion control module to receive CE events.
Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN
bit in opts flag to receive CE events but this may incur changes in ECN
behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS
that allows congestion control modules to receive CE events
independently of TCP_CONG_NEEDS_ECN.
Effort: net-tcp
Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b
Change-Id: I2255506985242f376d910c6fd37daabaf4744f24
---
include/net/tcp.h | 14 +++++++++++++-
net/ipv4/tcp_input.c | 4 ++--
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1ab6b2753236..989aaedc2e4a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1036,7 +1036,11 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NON_RESTRICTED 0x1
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN 0x2
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */
+#define TCP_CONG_WANTS_CE_EVENTS 0x4
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \
+ TCP_CONG_NEEDS_ECN | \
+ TCP_CONG_WANTS_CE_EVENTS)
union tcp_cc_info;
@@ -1165,6 +1169,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
}
#endif
+static inline bool tcp_ca_wants_ce_events(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN |
+ TCP_CONG_WANTS_CE_EVENTS);
+}
+
static inline bool tcp_ca_needs_ecn(const struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 394633fc9fa6..82a68b768be4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
tcp_enter_quickack_mode(sk, 2);
break;
case INET_ECN_CE:
- if (tcp_ca_needs_ecn(sk))
+ if (tcp_ca_wants_ce_events(sk))
tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
@@ -360,7 +360,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
tp->ecn_flags |= TCP_ECN_SEEN;
break;
default:
- if (tcp_ca_needs_ecn(sk))
+ if (tcp_ca_wants_ce_events(sk))
tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
tp->ecn_flags |= TCP_ECN_SEEN;
break;
--
2.34.1
From a827b3564662d4b300dbc03e062740ec5df12c86 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 27 Sep 2019 17:10:26 -0400
Subject: [PATCH 12/27] net-tcp: re-generalize TSO sizing in TCP CC module API
Reorganize the API for CC modules so that the CC module once again
gets complete control of the TSO sizing decision. This is how the API
was set up around 2016 and the initial BBRv1 upstreaming. Later Eric
Dumazet simplified it. But with wider testing it now seems that to
avoid CPU regressions BBR needs to have a different TSO sizing
function.
This is necessary to handle cases where there are many flows
bottlenecked on the sender host's NIC, in which case BBR's pacing rate
is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because
BBR's pacing rate adapts to the low bandwidth share each flow sees. By
contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very
large cwnd, and thus large pacing rate and large TSO burst size.
Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea
---
include/net/tcp.h | 4 ++--
net/ipv4/tcp_bbr.c | 38 ++++++++++++++++++++++++++------------
net/ipv4/tcp_output.c | 11 +++++------
3 files changed, 33 insertions(+), 20 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 989aaedc2e4a..b65268d3a3c2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1102,8 +1102,8 @@ struct tcp_congestion_ops {
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* override sysctl_tcp_min_tso_segs */
- u32 (*min_tso_segs)(struct sock *sk);
+ /* pick target number of segments per TSO/GSO skb (optional): */
+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now);
/* react to a specific lost skb (optional) */
void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 54eec33c6e1c..fbd8fa11acca 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -294,26 +294,40 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
sk->sk_pacing_rate = rate;
}
-/* override sysctl_tcp_min_tso_segs */
static u32 bbr_min_tso_segs(struct sock *sk)
{
return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
+/* Return the number of segments BBR would like in a TSO/GSO skb, given
+ * a particular max gso size as a constraint.
+ */
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
+ u32 gso_max_size)
+{
+ u32 segs;
+ u32 bytes;
+
+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
+ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift;
+
+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk));
+ return segs;
+}
+
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
+{
+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
+}
+
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 segs, bytes;
-
- /* Sort of tcp_tso_autosize() but ignoring
- * driver provided sk_gso_max_size.
- */
- bytes = min_t(unsigned long,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
- return min(segs, 0x7FU);
+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -1149,7 +1163,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
- .min_tso_segs = bbr_min_tso_segs,
+ .tso_segs = bbr_tso_segs,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 95816a1a5858..5284249f4689 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2002,13 +2002,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
{
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
- u32 min_tso, tso_segs;
+ u32 tso_segs;
- min_tso = ca_ops->min_tso_segs ?
- ca_ops->min_tso_segs(sk) :
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
-
- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+ tso_segs = ca_ops->tso_segs ?
+ ca_ops->tso_segs(sk, mss_now) :
+ tcp_tso_autosize(sk, mss_now,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs));
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
}
--
2.34.1
From 443e2f20e82a9f975fcb02d79f6f882006bc448c Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 16 Nov 2019 13:16:25 -0500
Subject: [PATCH 13/27] net-tcp: add fast_ack_mode=1: skip rwin check in
tcp_fast_ack_mode__tcp_ack_snd_check()
Add logic for an experimental TCP connection behavior, enabled with
tp->fast_ack_mode = 1, which disables checking the receive window
before sending an ack in __tcp_ack_snd_check(). If this behavior is
enabled, the data receiver sends an ACK if the amount of data is >
RCV.MSS.
Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4
---
include/linux/tcp.h | 3 ++-
net/ipv4/tcp.c | 1 +
net/ipv4/tcp_cong.c | 1 +
net/ipv4/tcp_input.c | 5 +++--
4 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 41b1da621a45..d8f94ef1a297 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -255,7 +255,8 @@ struct tcp_sock {
u8 compressed_ack;
u8 dup_ack_counter:2,
tlp_retrans:1, /* TLP is a retransmission */
- unused:5;
+ fast_ack_mode:2, /* which fast ack mode ? */
+ unused:3;
u32 chrono_start; /* Start time in jiffies of a TCP chrono */
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6667c3538f2a..34207ec358d8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3190,6 +3190,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->rx_opt.dsack = 0;
tp->rx_opt.num_sacks = 0;
tp->rcv_ooopack = 0;
+ tp->fast_ack_mode = 0;
/* Clean up fastopen related fields */
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d3cae40749e8..0f268f2ff2e9 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,6 +189,7 @@ void tcp_init_congestion_control(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_sk(sk)->prior_ssthresh = 0;
+ tcp_sk(sk)->fast_ack_mode = 0;
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 82a68b768be4..8a455eb0c552 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5527,13 +5527,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
/* More than one full frame received... */
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
+ (tp->fast_ack_mode == 1 ||
/* ... and right edge of window advances far enough.
* (tcp_recvmsg() will send ACK otherwise).
* If application uses SO_RCVLOWAT, we want send ack now if
* we have not received enough bytes to satisfy the condition.
*/
- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
- __tcp_select_window(sk) >= tp->rcv_wnd)) ||
+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
+ __tcp_select_window(sk) >= tp->rcv_wnd))) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* Protocol state mandates a one-time immediate ACK */
--
2.34.1
From 09264a22823a54e020bfdf0ae86fc3644958db16 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 11 Jun 2019 12:54:22 -0400
Subject: [PATCH 14/27] net-tcp_bbr: v2: BBRv2 ("bbr2") congestion control for
Linux TCP
BBR v2 is an enhacement to the BBR v1 algorithm. It's designed to aim for lower
queues, lower loss, and better Reno/CUBIC coexistence than BBR v1.
BBR v2 maintains the core of BBR v1: an explicit model of the network
path that is two-dimensional, adapting to estimate the (a) maximum
available bandwidth and (b) maximum safe volume of data a flow can
keep in-flight in the network. It maintains the estimated BDP as a
core guide for estimating an appropriate level of in-flight data.
BBR v2 makes several key enhancements:
o Its bandwidth-probing time scale is adapted, within bounds, to allow improved
coexistence with Reno and CUBIC. The bandwidth-probing time scale is (a)
extended dynamically based on estimated BDP to improve coexistence with
Reno/CUBIC; (b) bounded by an interactive wall-clock time-scale to be more
scalable and responsive than Reno and CUBIC.
o Rather than being largely agnostic to loss and ECN marks, it explicitly uses
loss and (DCTCP-style) ECN signals to maintain its model.
o It aims for lower losses than v1 by adjusting its model to attempt to stay
within loss rate and ECN mark rate bounds (loss_thresh and ecn_thresh,
respectively).
o It adapts to loss/ECN signals even when the application is running out of
data ("application-limited"), in case the "application-limited" flow is also
"network-limited" (the bw and/or inflight available to this flow is lower than
previously estimated when the flow ran out of data).
o It has a three-part model: the model explicit three tracks operating points,
where an operating point is a tuple: (bandwidth, inflight). The three operating
points are:
o latest: the latest measurement from the current round trip
o upper bound: robust, optimistic, long-term upper bound
o lower bound: robust, conservative, short-term lower bound
These are stored in the following state variables:
o latest: bw_latest, inflight_latest
o lo: bw_lo, inflight_lo
o hi: bw_hi[2], inflight_hi
To gain intuition about the meaning of the three operating points, it
may help to consider the analogs in CUBIC, which has a somewhat
analogous three-part model used by its probing state machine:
BBR param CUBIC param
----------- -------------
latest ~ cwnd
lo ~ ssthresh
hi ~ last_max_cwnd
The analogy is only a loose one, though, since the BBR operating
points are calculated differently, and are 2-dimensional (bw,inflight)
rather than CUBIC's one-dimensional notion of operating point
(inflight).
o It uses the three-part model to adapt the magnitude of its bandwidth
to match the estimated space available in the buffer, rather than (as
in BBR v1) assuming that it was always acceptable to place 0.25*BDP in
the bottleneck buffer when probing (commodity datacenter switches
commonly do not have that much buffer for WAN flows). When BBR v2
estimates it hit a buffer limit during probing, its bandwidth probing
then starts gently in case little space is still available in the
buffer, and the accelerates, slowly at first and then rapidly if it
can grow inflight without seeing congestion signals. In such cases,
probing is bounded by inflight_hi + inflight_probe, where
inflight_probe grows as: [0, 1, 2, 4, 8, 16,...]. This allows BBR to
keep losses low and bounded if a bottleneck remains congested, while
rapidly/scalably utilizing free bandwidth when it becomes available.
o It has a slightly revised state machine, to achieve the goals above.
BBR_BW_PROBE_UP: pushes up inflight to probe for bw/vol
BBR_BW_PROBE_DOWN: drain excess inflight from the queue
BBR_BW_PROBE_CRUISE: use pipe, w/ headroom in queue/pipe
BBR_BW_PROBE_REFILL: try refill the pipe again to 100%, leaving queue empty
o The estimated BDP: BBR v2 continues to maintain an estimate of the
path's two-way propagation delay, by tracking a windowed min_rtt, and
coordinating (on an as-ndeeded basis) to try to expose the two-way
propagation delay by draining the bottleneck queue.
BBR v2 continues to use its min_rtt and (currently-applicable) bandwidth
estimate to estimate the current bandwidth-delay product. The estimated BDP
still provides one important guideline for bounding inflight data. However,
because any min-filtered RTT and max-filtered bw inherently tend to both
overestimate, the estimated BDP is often too high; in this case loss or ECN
marks can ensue, in which case BBR v2 adjusts inflight_hi and inflight_lo to
adapt its sending rate and inflight down to match the available capacity of the
path.
o Space: Note that ICSK_CA_PRIV_SIZE increased. This is because BBR v2
requires more space. Note that much of the space is due to support for
per-socket parameterization and debugging in this release for research
and debugging. With that state removed, the full "struct bbr" is 140
bytes, or 144 with padding. This is an increase of 40 bytes over the
existing ca_priv space.
o Code: BBR v2 reuses many pieces from BBR v1. But it omits the following
significant pieces:
o "packet conservation" (bbr_set_cwnd_to_recover_or_restore(),
bbr_can_grow_inflight())
o long-term bandwidth estimator ("policer mode")
The code layout tries to keep BBR v2 code near the bottom of the
file, so that v1-applicable code in the top does not accidentally
refer to v2 code.
o Docs:
See the following docs for more details and diagrams decsribing the BBR v2
algorithm:
https://datatracker.ietf.org/meeting/104/materials/slides-104-iccrg-an-update-on-bbr-00
https://datatracker.ietf.org/meeting/102/materials/slides-102-iccrg-an-update-on-bbr-work-at-google-00
o Internal notes:
For this upstream rebase, Neal started from:
git show fed518041ac6:net/ipv4/tcp_bbr.c > net/ipv4/tcp_bbr.c
then removed dev instrumentation (dynamic get/set for parameters)
and code that was only used by BBRv1
Effort: net-tcp_bbr
Origin-9xx-SHA1: 2c84098e60bed6d67dde23cd7538c51dee273102
Change-Id: I125cf26ba2a7a686f2fa5e87f4c2afceb65f7a05
---
include/net/inet_connection_sock.h | 5 +-
include/uapi/linux/inet_diag.h | 33 +
net/ipv4/Kconfig | 22 +
net/ipv4/Makefile | 1 +
net/ipv4/tcp_bbr2.c | 2683 ++++++++++++++++++++++++++++
5 files changed, 2742 insertions(+), 2 deletions(-)
create mode 100644 net/ipv4/tcp_bbr2.c
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c2b15f7e5516..1387e67a017f 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -135,8 +135,9 @@ struct inet_connection_sock {
u32 icsk_probes_tstamp;
u32 icsk_user_timeout;
- u64 icsk_ca_priv[104 / sizeof(u64)];
-#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv)
+/* XXX inflated by temporary internal debugging info */
+#define ICSK_CA_PRIV_SIZE (216)
+ u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)];
};
#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 50655de04c9b..0e24f11627d5 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -231,9 +231,42 @@ struct tcp_bbr_info {
__u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
};
+/* Phase as reported in netlink/ss stats. */
+enum tcp_bbr2_phase {
+ BBR2_PHASE_INVALID = 0,
+ BBR2_PHASE_STARTUP = 1,
+ BBR2_PHASE_DRAIN = 2,
+ BBR2_PHASE_PROBE_RTT = 3,
+ BBR2_PHASE_PROBE_BW_UP = 4,
+ BBR2_PHASE_PROBE_BW_DOWN = 5,
+ BBR2_PHASE_PROBE_BW_CRUISE = 6,
+ BBR2_PHASE_PROBE_BW_REFILL = 7
+};
+
+struct tcp_bbr2_info {
+ /* u64 bw: bandwidth (app throughput) estimate in Byte per sec: */
+ __u32 bbr_bw_lsb; /* lower 32 bits of bw */
+ __u32 bbr_bw_msb; /* upper 32 bits of bw */
+ __u32 bbr_min_rtt; /* min-filtered RTT in uSec */
+ __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */
+ __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */
+ __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */
+ __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */
+ __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */
+ __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */
+ __u8 bbr_mode; /* current bbr_mode in state machine */
+ __u8 bbr_phase; /* current state machine phase */
+ __u8 unused1; /* alignment padding; not used yet */
+ __u8 bbr_version; /* MUST be at this offset in struct */
+ __u32 bbr_inflight_lo; /* lower/short-term data volume bound */
+ __u32 bbr_inflight_hi; /* higher/long-term data volume bound */
+ __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */
+};
+
union tcp_cc_info {
struct tcpvegas_info vegas;
struct tcp_dctcp_info dctcp;
struct tcp_bbr_info bbr;
+ struct tcp_bbr2_info bbr2;
};
#endif /* _UAPI_INET_DIAG_H_ */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 2dfb12230f08..b6bec331a82e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -678,6 +678,24 @@ config TCP_CONG_BBR
AQM schemes that do not provide a delay signal. It requires the fq
("Fair Queue") pacing packet scheduler.
+config TCP_CONG_BBR2
+ tristate "BBR2 TCP"
+ default n
+ help
+
+ BBR2 TCP congestion control is a model-based congestion control
+ algorithm that aims to maximize network utilization, keep queues and
+ retransmit rates low, and to be able to coexist with Reno/CUBIC in
+ common scenarios. It builds an explicit model of the network path. It
+ tolerates a targeted degree of random packet loss and delay that are
+ unrelated to congestion. It can operate over LAN, WAN, cellular, wifi,
+ or cable modem links, and can use DCTCP-L4S-style ECN signals. It can
+ coexist with flows that use loss-based congestion control, and can
+ operate with shallow buffers, deep buffers, bufferbloat, policers, or
+ AQM schemes that do not provide a delay signal. It requires pacing,
+ using either TCP internal pacing or the fq ("Fair Queue") pacing packet
+ scheduler.
+
choice
prompt "Default TCP congestion control"
default DEFAULT_CUBIC
@@ -715,6 +733,9 @@ choice
config DEFAULT_BBR
bool "BBR" if TCP_CONG_BBR=y
+ config DEFAULT_BBR2
+ bool "BBR2" if TCP_CONG_BBR2=y
+
config DEFAULT_RENO
bool "Reno"
endchoice
@@ -739,6 +760,7 @@ config DEFAULT_TCP_CONG
default "dctcp" if DEFAULT_DCTCP
default "cdg" if DEFAULT_CDG
default "bbr" if DEFAULT_BBR
+ default "bbr2" if DEFAULT_BBR2
default "cubic"
config TCP_MD5SIG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bbdd9c44f14e..8dee1547d820 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
+obj-$(CONFIG_TCP_CONG_BBR2) += tcp_bbr2.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
new file mode 100644
index 000000000000..a6959b70e51d
--- /dev/null
+++ b/net/ipv4/tcp_bbr2.c
@@ -0,0 +1,2683 @@
+/* BBR (Bottleneck Bandwidth and RTT) congestion control, v2
+ *
+ * BBRv2 is a model-based congestion control algorithm that aims for low
+ * queues, low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model
+ * of the network path, it uses measurements of bandwidth and RTT, as well as
+ * (if they occur) packet loss and/or DCTCP/L4S-style ECN signals. Note that
+ * although it can use ECN or loss signals explicitly, it does not require
+ * either; it can bound its in-flight data based on its estimate of the BDP.
+ *
+ * The model has both higher and lower bounds for the operating range:
+ * lo: bw_lo, inflight_lo: conservative short-term lower bound
+ * hi: bw_hi, inflight_hi: robust long-term upper bound
+ * The bandwidth-probing time scale is (a) extended dynamically based on
+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by
+ * an interactive wall-clock time-scale to be more scalable and responsive
+ * than Reno and CUBIC.
+ *
+ * Here is a state transition diagram for BBR:
+ *
+ * |
+ * V
+ * +---> STARTUP ----+
+ * | | |
+ * | V |
+ * | DRAIN ----+
+ * | | |
+ * | V |
+ * +---> PROBE_BW ----+
+ * | ^ | |
+ * | | | |
+ * | +----+ |
+ * | |
+ * +---- PROBE_RTT <--+
+ *
+ * A BBR flow starts in STARTUP, and ramps up its sending rate quickly.
+ * When it estimates the pipe is full, it enters DRAIN to drain the queue.
+ * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT.
+ * A long-lived BBR flow spends the vast majority of its time remaining
+ * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth
+ * in a fair manner, with a small, bounded queue. *If* a flow has been
+ * continuously sending for the entire min_rtt window, and hasn't seen an RTT
+ * sample that matches or decreases its min_rtt estimate for 10 seconds, then
+ * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe
+ * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if
+ * we estimated that we reached the full bw of the pipe then we enter PROBE_BW;
+ * otherwise we enter STARTUP to try to fill the pipe.
+ *
+ * BBR is described in detail in:
+ * "BBR: Congestion-Based Congestion Control",
+ * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
+ * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016.
+ *
+ * There is a public e-mail list for discussing BBR development and testing:
+ * https://groups.google.com/forum/#!forum/bbr-dev
+ *
+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * otherwise TCP stack falls back to an internal pacing using one high
+ * resolution timer per TCP socket and may use more resources.
+ */
+#include <linux/module.h>
+#include <net/tcp.h>
+#include <linux/inet_diag.h>
+#include <linux/inet.h>
+#include <linux/random.h>
+
+#include "tcp_dctcp.h"
+
+/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
+ * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
+ * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
+ * Since the minimum window is >=4 packets, the lower bound isn't
+ * an issue. The upper bound isn't an issue with existing technologies.
+ */
+#define BW_SCALE 24
+#define BW_UNIT (1 << BW_SCALE)
+
+#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */
+#define BBR_UNIT (1 << BBR_SCALE)
+
+#define FLAG_DEBUG_VERBOSE 0x1 /* Verbose debugging messages */
+#define FLAG_DEBUG_LOOPBACK 0x2 /* Do NOT skip loopback addr */
+
+#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */
+
+/* BBR has the following modes for deciding how fast to send: */
+enum bbr_mode {
+ BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */
+ BBR_DRAIN, /* drain any queue created during startup */
+ BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */
+ BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */
+};
+
+/* How does the incoming ACK stream relate to our bandwidth probing? */
+enum bbr_ack_phase {
+ BBR_ACKS_INIT, /* not probing; not getting probe feedback */
+ BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */
+ BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */
+ BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */
+ BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */
+};
+
+/* BBR congestion control block */
+struct bbr {
+ u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */
+ u32 min_rtt_stamp; /* timestamp of min_rtt_us */
+ u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */
+ u32 probe_rtt_min_us; /* min RTT in bbr_probe_rtt_win_ms window */
+ u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/
+ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
+ u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */
+ u64 cycle_mstamp; /* time of this cycle phase start */
+ u32 mode:3, /* current bbr_mode in state machine */
+ prev_ca_state:3, /* CA state on previous ACK */
+ packet_conservation:1, /* use packet conservation? */
+ round_start:1, /* start of packet-timed tx->ack round? */
+ ce_state:1, /* If most recent data has CE bit set */
+ bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */
+ try_fast_path:1, /* can we take fast path? */
+ unused2:11,
+ idle_restart:1, /* restarting after idle? */
+ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
+ cycle_idx:3, /* current index in pacing_gain cycle array */
+ has_seen_rtt:1; /* have we seen an RTT sample yet? */
+ u32 pacing_gain:11, /* current gain for setting pacing rate */
+ cwnd_gain:11, /* current gain for setting cwnd */
+ full_bw_reached:1, /* reached full bw in Startup? */
+ full_bw_cnt:2, /* number of rounds without large bw gains */
+ init_cwnd:7; /* initial cwnd */
+ u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
+ u32 full_bw; /* recent bw, to estimate if pipe is full */
+
+ /* For tracking ACK aggregation: */
+ u64 ack_epoch_mstamp; /* start of ACK sampling epoch */
+ u16 extra_acked[2]; /* max excess data ACKed in epoch */
+ u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */
+ extra_acked_win_rtts:5, /* age of extra_acked, in round trips */
+ extra_acked_win_idx:1, /* current index in extra_acked array */
+ /* BBR v2 state: */
+ unused1:2,
+ startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */
+ loss_in_cycle:1, /* packet loss in this cycle? */
+ ecn_in_cycle:1; /* ECN in this cycle? */
+ u32 loss_round_delivered; /* scb->tx.delivered ending loss round */
+ u32 undo_bw_lo; /* bw_lo before latest losses */
+ u32 undo_inflight_lo; /* inflight_lo before latest losses */
+ u32 undo_inflight_hi; /* inflight_hi before latest losses */
+ u32 bw_latest; /* max delivered bw in last round trip */
+ u32 bw_lo; /* lower bound on sending bandwidth */
+ u32 bw_hi[2]; /* upper bound of sending bandwidth range*/
+ u32 inflight_latest; /* max delivered data in last round trip */
+ u32 inflight_lo; /* lower bound of inflight data range */
+ u32 inflight_hi; /* upper bound of inflight data range */
+ u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */
+ u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */
+ u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */
+ u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */
+ ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */
+ bw_probe_samples:1, /* rate samples reflect bw probing? */
+ prev_probe_too_high:1, /* did last PROBE_UP go too high? */
+ stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */
+ rounds_since_probe:8, /* packet-timed rounds since probed bw */
+ loss_round_start:1, /* loss_round_delivered round trip? */
+ loss_in_round:1, /* loss marked in this round trip? */
+ ecn_in_round:1, /* ECN marked in this round trip? */
+ ack_phase:3, /* bbr_ack_phase: meaning of ACKs */
+ loss_events_in_round:4,/* losses in STARTUP round */
+ initialized:1; /* has bbr_init() been called? */
+ u32 alpha_last_delivered; /* tp->delivered at alpha update */
+ u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */
+
+ /* Params configurable using setsockopt. Refer to correspoding
+ * module param for detailed description of params.
+ */
+ struct bbr_params {
+ u32 high_gain:11, /* max allowed value: 2047 */
+ drain_gain:10, /* max allowed value: 1023 */
+ cwnd_gain:11; /* max allowed value: 2047 */
+ u32 cwnd_min_target:4, /* max allowed value: 15 */
+ min_rtt_win_sec:5, /* max allowed value: 31 */
+ probe_rtt_mode_ms:9, /* max allowed value: 511 */
+ full_bw_cnt:3, /* max allowed value: 7 */
+ bw_rtts:5, /* max allowed value: 31 */
+ cwnd_tso_budget:1, /* allowed values: {0, 1} */
+ unused3:1,
+ drain_to_target:1, /* boolean */
+ precise_ece_ack:1, /* boolean */
+ extra_acked_in_startup:1, /* allowed values: {0, 1} */
+ fast_path:1; /* boolean */
+ u32 full_bw_thresh:10, /* max allowed value: 1023 */
+ startup_cwnd_gain:11, /* max allowed value: 2047 */
+ bw_probe_pif_gain:9, /* max allowed value: 511 */
+ usage_based_cwnd:1, /* boolean */
+ unused2:1;
+ u16 probe_rtt_win_ms:14, /* max allowed value: 16383 */
+ refill_add_inc:2; /* max allowed value: 3 */
+ u16 extra_acked_gain:11, /* max allowed value: 2047 */
+ extra_acked_win_rtts:5; /* max allowed value: 31*/
+ u16 pacing_gain[CYCLE_LEN]; /* max allowed value: 1023 */
+ /* Mostly BBR v2 parameters below here: */
+ u32 ecn_alpha_gain:8, /* max allowed value: 255 */
+ ecn_factor:8, /* max allowed value: 255 */
+ ecn_thresh:8, /* max allowed value: 255 */
+ beta:8; /* max allowed value: 255 */
+ u32 ecn_max_rtt_us:19, /* max allowed value: 524287 */
+ bw_probe_reno_gain:9, /* max allowed value: 511 */
+ full_loss_cnt:4; /* max allowed value: 15 */
+ u32 probe_rtt_cwnd_gain:8, /* max allowed value: 255 */
+ inflight_headroom:8, /* max allowed value: 255 */
+ loss_thresh:8, /* max allowed value: 255 */
+ bw_probe_max_rounds:8; /* max allowed value: 255 */
+ u32 bw_probe_rand_rounds:4, /* max allowed value: 15 */
+ bw_probe_base_us:26, /* usecs: 0..2^26-1 (67 secs) */
+ full_ecn_cnt:2; /* max allowed value: 3 */
+ u32 bw_probe_rand_us:26, /* usecs: 0..2^26-1 (67 secs) */
+ undo:1, /* boolean */
+ tso_rtt_shift:4, /* max allowed value: 15 */
+ unused5:1;
+ u32 ecn_reprobe_gain:9, /* max allowed value: 511 */
+ unused1:14,
+ ecn_alpha_init:9; /* max allowed value: 256 */
+ } params;
+
+ struct {
+ u32 snd_isn; /* Initial sequence number */
+ u32 rs_bw; /* last valid rate sample bw */
+ u32 target_cwnd; /* target cwnd, based on BDP */
+ u8 undo:1, /* Undo even happened but not yet logged */
+ unused:7;
+ char event; /* single-letter event debug codes */
+ u16 unused2;
+ } debug;
+};
+
+struct bbr_context {
+ u32 sample_bw;
+ u32 target_cwnd;
+ u32 log:1;
+};
+
+/* Window length of bw filter (in rounds). Max allowed value is 31 (0x1F) */
+static int bbr_bw_rtts = CYCLE_LEN + 2;
+/* Window length of min_rtt filter (in sec). Max allowed value is 31 (0x1F) */
+static u32 bbr_min_rtt_win_sec = 10;
+/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode.
+ * Max allowed value is 511 (0x1FF).
+ */
+static u32 bbr_probe_rtt_mode_ms = 200;
+/* Window length of probe_rtt_min_us filter (in ms), and consequently the
+ * typical interval between PROBE_RTT mode entries.
+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC
+ */
+static u32 bbr_probe_rtt_win_ms = 5000;
+/* Skip TSO below the following bandwidth (bits/sec): */
+static int bbr_min_tso_rate = 1200000;
+
+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting
+ * in bigger TSO bursts. By default we cut the RTT-based allowance in half
+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance
+ * is below 1500 bytes after 6 * ~500 usec = 3ms.
+ */
+static u32 bbr_tso_rtt_shift = 9; /* halve allowance per 2^9 usecs, 512us */
+
+/* Select cwnd TSO budget approach:
+ * 0: padding
+ * 1: flooring
+ */
+static uint bbr_cwnd_tso_budget = 1;
+
+/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
+ * In order to help drive the network toward lower queues and low latency while
+ * maintaining high utilization, the average pacing rate aims to be slightly
+ * lower than the estimated bandwidth. This is an important aspect of the
+ * design.
+ */
+static const int bbr_pacing_margin_percent = 1;
+
+/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
+ * that will allow a smoothly increasing pacing rate that will double each RTT
+ * and send the same number of packets per RTT that an un-paced, slow-starting
+ * Reno or CUBIC flow would. Max allowed value is 2047 (0x7FF).
+ */
+static int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
+/* The gain for deriving startup cwnd. Max allowed value is 2047 (0x7FF). */
+static int bbr_startup_cwnd_gain = BBR_UNIT * 2885 / 1000 + 1;
+/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
+ * the queue created in BBR_STARTUP in a single round. Max allowed value
+ * is 1023 (0x3FF).
+ */
+static int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
+/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs.
+ * Max allowed value is 2047 (0x7FF).
+ */
+static int bbr_cwnd_gain = BBR_UNIT * 2;
+/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw.
+ * Max allowed value for each element is 1023 (0x3FF).
+ */
+enum bbr_pacing_gain_phase {
+ BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */
+ BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */
+ BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */
+ BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */
+};
+static int bbr_pacing_gain[] = {
+ BBR_UNIT * 5 / 4, /* probe for more available bw */
+ BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */
+ BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */
+ BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */
+};
+/* Randomize the starting gain cycling phase over N phases: */
+static u32 bbr_cycle_rand = 7;
+
+/* Try to keep at least this many packets in flight, if things go smoothly. For
+ * smooth functioning, a sliding window protocol ACKing every other packet
+ * needs at least 4 packets in flight. Max allowed value is 15 (0xF).
+ */
+static u32 bbr_cwnd_min_target = 4;
+
+/* Cwnd to BDP proportion in PROBE_RTT mode scaled by BBR_UNIT. Default: 50%.
+ * Use 0 to disable. Max allowed value is 255.
+ */
+static u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2;
+
+/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
+/* If bw has increased significantly (1.25x), there may be more bw available.
+ * Max allowed value is 1023 (0x3FF).
+ */
+static u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
+/* But after 3 rounds w/o significant bw growth, estimate pipe is full.
+ * Max allowed value is 7 (0x7).
+ */
+static u32 bbr_full_bw_cnt = 3;
+
+static u32 bbr_flags; /* Debugging related stuff */
+
+/* Whether to debug using printk.
+ */
+static bool bbr_debug_with_printk;
+
+/* Whether to debug using ftrace event tcp:tcp_bbr_event.
+ * Ignored when bbr_debug_with_printk is set.
+ */
+static bool bbr_debug_ftrace;
+
+/* Experiment: each cycle, try to hold sub-unity gain until inflight <= BDP. */
+static bool bbr_drain_to_target = true; /* default: enabled */
+
+/* Experiment: Flags to control BBR with ECN behavior.
+ */
+static bool bbr_precise_ece_ack = true; /* default: enabled */
+
+/* The max rwin scaling shift factor is 14 (RFC 1323), so the max sane rwin is
+ * (2^(16+14) B)/(1024 B/packet) = 1M packets.
+ */
+static u32 bbr_cwnd_warn_val = 1U << 20;
+
+static u16 bbr_debug_port_mask;
+
+/* BBR module parameters. These are module parameters only in Google prod.
+ * Upstream these are intentionally not module parameters.
+ */
+static int bbr_pacing_gain_size = CYCLE_LEN;
+
+/* Gain factor for adding extra_acked to target cwnd: */
+static int bbr_extra_acked_gain = 256;
+
+/* Window length of extra_acked window. Max allowed val is 31. */
+static u32 bbr_extra_acked_win_rtts = 5;
+
+/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */
+static u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20;
+
+/* Time period for clamping cwnd increment due to ack aggregation */
+static u32 bbr_extra_acked_max_us = 100 * 1000;
+
+/* Use extra acked in startup ?
+ * 0: disabled
+ * 1: use latest extra_acked value from 1-2 rtt in startup
+ */
+static int bbr_extra_acked_in_startup = 1; /* default: enabled */
+
+/* Experiment: don't grow cwnd beyond twice of what we just probed. */
+static bool bbr_usage_based_cwnd; /* default: disabled */
+
+/* For lab testing, researchers can enable BBRv2 ECN support with this flag,
+ * when they know that any ECN marks that the connections experience will be
+ * DCTCP/L4S-style ECN marks, rather than RFC3168 ECN marks.
+ * TODO(ncardwell): Production use of the BBRv2 ECN functionality depends on
+ * negotiation or configuration that is outside the scope of the BBRv2
+ * alpha release.
+ */
+static bool bbr_ecn_enable = false;
+
+module_param_named(bw_rtts, bbr_bw_rtts, int, 0644);
+module_param_named(min_tso_rate, bbr_min_tso_rate, int, 0644);
+module_param_named(tso_rtt_shift, bbr_tso_rtt_shift, int, 0644);
+module_param_named(high_gain, bbr_high_gain, int, 0644);
+module_param_named(drain_gain, bbr_drain_gain, int, 0644);
+module_param_named(startup_cwnd_gain, bbr_startup_cwnd_gain, int, 0644);
+module_param_named(cwnd_gain, bbr_cwnd_gain, int, 0644);
+module_param_array_named(pacing_gain, bbr_pacing_gain, int,
+ &bbr_pacing_gain_size, 0644);
+module_param_named(cycle_rand, bbr_cycle_rand, uint, 0644);
+module_param_named(cwnd_min_target, bbr_cwnd_min_target, uint, 0644);
+module_param_named(probe_rtt_cwnd_gain,
+ bbr_probe_rtt_cwnd_gain, uint, 0664);
+module_param_named(cwnd_warn_val, bbr_cwnd_warn_val, uint, 0664);
+module_param_named(debug_port_mask, bbr_debug_port_mask, ushort, 0644);
+module_param_named(flags, bbr_flags, uint, 0644);
+module_param_named(debug_ftrace, bbr_debug_ftrace, bool, 0644);
+module_param_named(debug_with_printk, bbr_debug_with_printk, bool, 0644);
+module_param_named(min_rtt_win_sec, bbr_min_rtt_win_sec, uint, 0644);
+module_param_named(probe_rtt_mode_ms, bbr_probe_rtt_mode_ms, uint, 0644);
+module_param_named(probe_rtt_win_ms, bbr_probe_rtt_win_ms, uint, 0644);
+module_param_named(full_bw_thresh, bbr_full_bw_thresh, uint, 0644);
+module_param_named(full_bw_cnt, bbr_full_bw_cnt, uint, 0644);
+module_param_named(cwnd_tso_bduget, bbr_cwnd_tso_budget, uint, 0664);
+module_param_named(extra_acked_gain, bbr_extra_acked_gain, int, 0664);
+module_param_named(extra_acked_win_rtts,
+ bbr_extra_acked_win_rtts, uint, 0664);
+module_param_named(extra_acked_max_us,
+ bbr_extra_acked_max_us, uint, 0664);
+module_param_named(ack_epoch_acked_reset_thresh,
+ bbr_ack_epoch_acked_reset_thresh, uint, 0664);
+module_param_named(drain_to_target, bbr_drain_to_target, bool, 0664);
+module_param_named(precise_ece_ack, bbr_precise_ece_ack, bool, 0664);
+module_param_named(extra_acked_in_startup,
+ bbr_extra_acked_in_startup, int, 0664);
+module_param_named(usage_based_cwnd, bbr_usage_based_cwnd, bool, 0664);
+module_param_named(ecn_enable, bbr_ecn_enable, bool, 0664);
+
+static void bbr2_exit_probe_rtt(struct sock *sk);
+static void bbr2_reset_congestion_signals(struct sock *sk);
+
+static void bbr_check_probe_rtt_done(struct sock *sk);
+
+/* Do we estimate that STARTUP filled the pipe? */
+static bool bbr_full_bw_reached(const struct sock *sk)
+{
+ const struct bbr *bbr = inet_csk_ca(sk);
+
+ return bbr->full_bw_reached;
+}
+
+/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
+static u32 bbr_max_bw(const struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ return max(bbr->bw_hi[0], bbr->bw_hi[1]);
+}
+
+/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
+static u32 bbr_bw(const struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ return min(bbr_max_bw(sk), bbr->bw_lo);
+}
+
+/* Return maximum extra acked in past k-2k round trips,
+ * where k = bbr_extra_acked_win_rtts.
+ */
+static u16 bbr_extra_acked(const struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ return max(bbr->extra_acked[0], bbr->extra_acked[1]);
+}
+
+/* Return rate in bytes per second, optionally with a gain.
+ * The order here is chosen carefully to avoid overflow of u64. This should
+ * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
+ */
+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain,
+ int margin)
+{
+ unsigned int mss = tcp_sk(sk)->mss_cache;
+
+ rate *= mss;
+ rate *= gain;
+ rate >>= BBR_SCALE;
+ rate *= USEC_PER_SEC / 100 * (100 - margin);
+ rate >>= BW_SCALE;
+ rate = max(rate, 1ULL);
+ return rate;
+}
+
+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate)
+{
+ return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0);
+}
+
+static u64 bbr_rate_kbps(struct sock *sk, u64 rate)
+{
+ rate = bbr_bw_bytes_per_sec(sk, rate);
+ rate *= 8;
+ do_div(rate, 1000);
+ return rate;
+}
+
+static u32 bbr_tso_segs_goal(struct sock *sk);
+static void bbr_debug(struct sock *sk, u32 acked,
+ const struct rate_sample *rs, struct bbr_context *ctx)
+{
+ static const char ca_states[] = {
+ [TCP_CA_Open] = 'O',
+ [TCP_CA_Disorder] = 'D',
+ [TCP_CA_CWR] = 'C',
+ [TCP_CA_Recovery] = 'R',
+ [TCP_CA_Loss] = 'L',
+ };
+ static const char mode[] = {
+ 'G', /* Growing - BBR_STARTUP */
+ 'D', /* Drain - BBR_DRAIN */
+ 'W', /* Window - BBR_PROBE_BW */
+ 'M', /* Min RTT - BBR_PROBE_RTT */
+ };
+ static const char ack_phase[] = { /* bbr_ack_phase strings */
+ 'I', /* BBR_ACKS_INIT - 'Init' */
+ 'R', /* BBR_ACKS_REFILLING - 'Refilling' */
+ 'B', /* BBR_ACKS_PROBE_STARTING - 'Before' */
+ 'F', /* BBR_ACKS_PROBE_FEEDBACK - 'Feedback' */
+ 'A', /* BBR_ACKS_PROBE_STOPPING - 'After' */
+ };
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ const u32 una = tp->snd_una - bbr->debug.snd_isn;
+ const u32 fack = tcp_highest_sack_seq(tp);
+ const u16 dport = ntohs(inet_sk(sk)->inet_dport);
+ bool is_port_match = (bbr_debug_port_mask &&
+ ((dport & bbr_debug_port_mask) == 0));
+ char debugmsg[320];
+
+ if (sk->sk_state == TCP_SYN_SENT)
+ return; /* no bbr_init() yet if SYN retransmit -> CA_Loss */
+
+ if (!tp->snd_cwnd || tp->snd_cwnd > bbr_cwnd_warn_val) {
+ char addr[INET6_ADDRSTRLEN + 10] = { 0 };
+
+ if (sk->sk_family == AF_INET)
+ snprintf(addr, sizeof(addr), "%pI4:%u",
+ &inet_sk(sk)->inet_daddr, dport);
+ else if (sk->sk_family == AF_INET6)
+ snprintf(addr, sizeof(addr), "%pI6:%u",
+ &sk->sk_v6_daddr, dport);
+
+ WARN_ONCE(1,
+ "BBR %s cwnd alert: %u "
+ "snd_una: %u ca: %d pacing_gain: %u cwnd_gain: %u "
+ "bw: %u rtt: %u min_rtt: %u "
+ "acked: %u tso_segs: %u "
+ "bw: %d %ld %d pif: %u\n",
+ addr, tp->snd_cwnd,
+ una, inet_csk(sk)->icsk_ca_state,
+ bbr->pacing_gain, bbr->cwnd_gain,
+ bbr_max_bw(sk), (tp->srtt_us >> 3), bbr->min_rtt_us,
+ acked, bbr_tso_segs_goal(sk),
+ rs->delivered, rs->interval_us, rs->is_retrans,
+ tcp_packets_in_flight(tp));
+ }
+
+ if (likely(!bbr_debug_with_printk && !bbr_debug_ftrace))
+ return;
+
+ if (!sock_flag(sk, SOCK_DBG) && !is_port_match)
+ return;
+
+ if (!ctx->log && !tp->app_limited && !(bbr_flags & FLAG_DEBUG_VERBOSE))
+ return;
+
+ if (ipv4_is_loopback(inet_sk(sk)->inet_daddr) &&
+ !(bbr_flags & FLAG_DEBUG_LOOPBACK))
+ return;
+
+ snprintf(debugmsg, sizeof(debugmsg) - 1,
+ "BBR %pI4:%-5u %5u,%03u:%-7u %c "
+ "%c %2u br %2u cr %2d rtt %5ld d %2d i %5ld mrtt %d %cbw %llu "
+ "bw %llu lb %llu ib %llu qb %llu "
+ "a %u if %2u %c %c dl %u l %u al %u # %u t %u %c %c "
+ "lr %d er %d ea %d bwl %lld il %d ih %d c %d "
+ "v %d %c %u %c %s\n",
+ &inet_sk(sk)->inet_daddr, dport,
+ una / 1000, una % 1000, fack - tp->snd_una,
+ ca_states[inet_csk(sk)->icsk_ca_state],
+ bbr->debug.undo ? '@' : mode[bbr->mode],
+ tp->snd_cwnd,
+ bbr_extra_acked(sk), /* br (legacy): extra_acked */
+ rs->tx_in_flight, /* cr (legacy): tx_inflight */
+ rs->rtt_us,
+ rs->delivered,
+ rs->interval_us,
+ bbr->min_rtt_us,
+ rs->is_app_limited ? '_' : 'l',
+ bbr_rate_kbps(sk, ctx->sample_bw), /* lbw: latest sample bw */
+ bbr_rate_kbps(sk, bbr_max_bw(sk)), /* bw: max bw */
+ 0ULL, /* lb: [obsolete] */
+ 0ULL, /* ib: [obsolete] */
+ (u64)sk->sk_pacing_rate * 8 / 1000,
+ acked,
+ tcp_packets_in_flight(tp),
+ rs->is_ack_delayed ? 'd' : '.',
+ bbr->round_start ? '*' : '.',
+ tp->delivered, tp->lost,
+ tp->app_limited,
+ 0, /* #: [obsolete] */
+ ctx->target_cwnd,
+ tp->reord_seen ? 'r' : '.', /* r: reordering seen? */
+ ca_states[bbr->prev_ca_state],
+ (rs->lost + rs->delivered) > 0 ?
+ (1000 * rs->lost /
+ (rs->lost + rs->delivered)) : 0, /* lr: loss rate x1000 */
+ (rs->delivered) > 0 ?
+ (1000 * rs->delivered_ce /
+ (rs->delivered)) : 0, /* er: ECN rate x1000 */
+ 1000 * bbr->ecn_alpha >> BBR_SCALE, /* ea: ECN alpha x1000 */
+ bbr->bw_lo == ~0U ?
+ -1 : (s64)bbr_rate_kbps(sk, bbr->bw_lo), /* bwl */
+ bbr->inflight_lo, /* il */
+ bbr->inflight_hi, /* ih */
+ bbr->bw_probe_up_cnt, /* c */
+ 2, /* v: version */
+ bbr->debug.event,
+ bbr->cycle_idx,
+ ack_phase[bbr->ack_phase],
+ bbr->bw_probe_samples ? "Y" : "N");
+ debugmsg[sizeof(debugmsg) - 1] = 0;
+
+ /* printk takes a higher precedence. */
+ if (bbr_debug_with_printk)
+ printk(KERN_DEBUG "%s", debugmsg);
+
+ if (unlikely(bbr->debug.undo))
+ bbr->debug.undo = 0;
+}
+
+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+ u64 rate = bw;
+
+ rate = bbr_rate_bytes_per_sec(sk, rate, gain,
+ bbr_pacing_margin_percent);
+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ return rate;
+}
+
+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+ u32 rtt_us;
+
+ if (tp->srtt_us) { /* any RTT sample yet? */
+ rtt_us = max(tp->srtt_us >> 3, 1U);
+ bbr->has_seen_rtt = 1;
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+ bw = (u64)tp->snd_cwnd * BW_UNIT;
+ do_div(bw, rtt_us);
+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr->params.high_gain);
+}
+
+/* Pace using current bw estimate and a gain factor. */
+static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain);
+
+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
+ bbr_init_pacing_rate_from_rtt(sk);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
+ sk->sk_pacing_rate = rate;
+}
+
+static u32 bbr_min_tso_segs(struct sock *sk)
+{
+ return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
+}
+
+/* Return the number of segments BBR would like in a TSO/GSO skb, given
+ * a particular max gso size as a constraint.
+ */
+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now,
+ u32 gso_max_size)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 segs, r;
+ u64 bytes;
+
+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */
+ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift;
+
+ /* Budget a TSO/GSO burst size allowance based on min_rtt. For every
+ * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst.
+ * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K)
+ */
+ if (bbr->params.tso_rtt_shift) {
+ r = bbr->min_rtt_us >> bbr->params.tso_rtt_shift;
+ if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */
+ bytes += GSO_MAX_SIZE >> r;
+ }
+
+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER);
+ segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk));
+ return segs;
+}
+
+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */
+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now)
+{
+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size);
+}
+
+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */
+static u32 bbr_tso_segs_goal(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE);
+}
+
+/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
+static void bbr_save_cwnd(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
+ bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
+ else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
+ bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
+}
+
+static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (event == CA_EVENT_TX_START && tp->app_limited) {
+ bbr->idle_restart = 1;
+ bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+ bbr->ack_epoch_acked = 0;
+ /* Avoid pointless buffer overflows: pace at est. bw if we don't
+ * need more speed (we're restarting from idle and app-limited).
+ */
+ if (bbr->mode == BBR_PROBE_BW)
+ bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
+ else if (bbr->mode == BBR_PROBE_RTT)
+ bbr_check_probe_rtt_done(sk);
+ } else if ((event == CA_EVENT_ECN_IS_CE ||
+ event == CA_EVENT_ECN_NO_CE) &&
+ bbr_ecn_enable &&
+ bbr->params.precise_ece_ack) {
+ u32 state = bbr->ce_state;
+ dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state);
+ bbr->ce_state = state;
+ if (tp->fast_ack_mode == 2 && event == CA_EVENT_ECN_IS_CE)
+ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+ }
+}
+
+/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth:
+ *
+ * bdp = ceil(bw * min_rtt * gain)
+ *
+ * The key factor, gain, controls the amount of queue. While a small gain
+ * builds a smaller queue, it becomes more vulnerable to noise in RTT
+ * measurements (e.g., delayed ACKs or other ACK compression effects). This
+ * noise may cause BBR to under-estimate the rate.
+ */
+static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 bdp;
+ u64 w;
+
+ /* If we've never had a valid RTT sample, cap cwnd at the initial
+ * default. This should only happen when the connection is not using TCP
+ * timestamps and has retransmitted all of the SYN/SYNACK/data packets
+ * ACKed so far. In this case, an RTO can cut cwnd to 1, in which
+ * case we need to slow-start up toward something safe: initial cwnd.
+ */
+ if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */
+ return bbr->init_cwnd; /* be safe: cap at initial cwnd */
+
+ w = (u64)bw * bbr->min_rtt_us;
+
+ /* Apply a gain to the given value, remove the BW_SCALE shift, and
+ * round the value up to avoid a negative feedback loop.
+ */
+ bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
+
+ return bdp;
+}
+
+/* To achieve full performance in high-speed paths, we budget enough cwnd to
+ * fit full-sized skbs in-flight on both end hosts to fully utilize the path:
+ * - one skb in sending host Qdisc,
+ * - one skb in sending host TSO/GSO engine
+ * - one skb being received by receiver host LRO/GRO/delayed-ACK engine
+ * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
+ * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
+ * which allows 2 outstanding 2-packet sequences, to try to keep pipe
+ * full even with ACK-every-other-packet delayed ACKs.
+ */
+static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 tso_segs_goal;
+
+ tso_segs_goal = 3 * bbr_tso_segs_goal(sk);
+
+ /* Allow enough full-sized skbs in flight to utilize end systems. */
+ if (bbr->params.cwnd_tso_budget == 1) {
+ cwnd = max_t(u32, cwnd, tso_segs_goal);
+ cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target);
+ } else {
+ cwnd += tso_segs_goal;
+ cwnd = (cwnd + 1) & ~1U;
+ }
+ /* Ensure gain cycling gets inflight above BDP even for small BDPs. */
+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
+ cwnd += 2;
+
+ return cwnd;
+}
+
+/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */
+static u32 bbr_inflight(struct sock *sk, u32 bw, int gain)
+{
+ u32 inflight;
+
+ inflight = bbr_bdp(sk, bw, gain);
+ inflight = bbr_quantization_budget(sk, inflight);
+
+ return inflight;
+}
+
+/* With pacing at lower layers, there's often less data "in the network" than
+ * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq),
+ * we often have several skbs queued in the pacing layer with a pre-scheduled
+ * earliest departure time (EDT). BBR adapts its pacing rate based on the
+ * inflight level that it estimates has already been "baked in" by previous
+ * departure time decisions. We calculate a rough estimate of the number of our
+ * packets that might be in the network at the earliest departure time for the
+ * next skb scheduled:
+ * in_network_at_edt = inflight_at_edt - (EDT - now) * bw
+ * If we're increasing inflight, then we want to know if the transmit of the
+ * EDT skb will push inflight above the target, so inflight_at_edt includes
+ * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight,
+ * then estimate if inflight will sink too low just before the EDT transmit.
+ */
+static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 now_ns, edt_ns, interval_us;
+ u32 interval_delivered, inflight_at_edt;
+
+ now_ns = tp->tcp_clock_cache;
+ edt_ns = max(tp->tcp_wstamp_ns, now_ns);
+ interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
+ interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
+ inflight_at_edt = inflight_now;
+ if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */
+ inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */
+ if (interval_delivered >= inflight_at_edt)
+ return 0;
+ return inflight_at_edt - interval_delivered;
+}
+
+/* Find the cwnd increment based on estimate of ack aggregation */
+static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 max_aggr_cwnd, aggr_cwnd = 0;
+
+ if (bbr->params.extra_acked_gain &&
+ (bbr_full_bw_reached(sk) || bbr->params.extra_acked_in_startup)) {
+ max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us)
+ / BW_UNIT;
+ aggr_cwnd = (bbr->params.extra_acked_gain * bbr_extra_acked(sk))
+ >> BBR_SCALE;
+ aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd);
+ }
+
+ return aggr_cwnd;
+}
+
+/* Returns the cwnd for PROBE_RTT mode. */
+static u32 bbr_probe_rtt_cwnd(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr->params.probe_rtt_cwnd_gain == 0)
+ return bbr->params.cwnd_min_target;
+ return max_t(u32, bbr->params.cwnd_min_target,
+ bbr_bdp(sk, bbr_bw(sk), bbr->params.probe_rtt_cwnd_gain));
+}
+
+/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
+ * has drawn us down below target), or snap down to target if we're above it.
+ */
+static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
+ u32 acked, u32 bw, int gain, u32 cwnd,
+ struct bbr_context *ctx)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 target_cwnd = 0, prev_cwnd = tp->snd_cwnd, max_probe;
+
+ if (!acked)
+ goto done; /* no packet fully ACKed; just apply caps */
+
+ target_cwnd = bbr_bdp(sk, bw, gain);
+
+ /* Increment the cwnd to account for excess ACKed data that seems
+ * due to aggregation (of data and/or ACKs) visible in the ACK stream.
+ */
+ target_cwnd += bbr_ack_aggregation_cwnd(sk);
+ target_cwnd = bbr_quantization_budget(sk, target_cwnd);
+
+ /* If we're below target cwnd, slow start cwnd toward target cwnd. */
+ bbr->debug.target_cwnd = target_cwnd;
+
+ /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */
+ bbr->try_fast_path = 0;
+ if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */
+ cwnd += acked;
+ if (cwnd >= target_cwnd) {
+ cwnd = target_cwnd;
+ bbr->try_fast_path = 1;
+ }
+ } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) {
+ cwnd += acked;
+ } else {
+ bbr->try_fast_path = 1;
+ }
+
+ /* When growing cwnd, don't grow beyond twice what we just probed. */
+ if (bbr->params.usage_based_cwnd) {
+ max_probe = max(2 * tp->max_packets_out, tp->snd_cwnd);
+ cwnd = min(cwnd, max_probe);
+ }
+
+ cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target);
+done:
+ tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
+ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
+ tp->snd_cwnd = min_t(u32, tp->snd_cwnd, bbr_probe_rtt_cwnd(sk));
+
+ ctx->target_cwnd = target_cwnd;
+ ctx->log = (tp->snd_cwnd != prev_cwnd);
+}
+
+/* See if we have reached next round trip */
+static void bbr_update_round_start(struct sock *sk,
+ const struct rate_sample *rs, struct bbr_context *ctx)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->round_start = 0;
+
+ /* See if we've reached the next RTT */
+ if (rs->interval_us > 0 &&
+ !before(rs->prior_delivered, bbr->next_rtt_delivered)) {
+ bbr->next_rtt_delivered = tp->delivered;
+ bbr->round_start = 1;
+ }
+}
+
+/* Calculate the bandwidth based on how fast packets are delivered */
+static void bbr_calculate_bw_sample(struct sock *sk,
+ const struct rate_sample *rs, struct bbr_context *ctx)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw = 0;
+
+ /* Divide delivered by the interval to find a (lower bound) bottleneck
+ * bandwidth sample. Delivered is in packets and interval_us in uS and
+ * ratio will be <<1 for most connections. So delivered is first scaled.
+ * Round up to allow growth at low rates, even with integer division.
+ */
+ if (rs->interval_us > 0) {
+ if (WARN_ONCE(rs->delivered < 0,
+ "negative delivered: %d interval_us: %ld\n",
+ rs->delivered, rs->interval_us))
+ return;
+
+ bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us);
+ }
+
+ ctx->sample_bw = bw;
+ bbr->debug.rs_bw = bw;
+}
+
+/* Estimates the windowed max degree of ack aggregation.
+ * This is used to provision extra in-flight data to keep sending during
+ * inter-ACK silences.
+ *
+ * Degree of ack aggregation is estimated as extra data acked beyond expected.
+ *
+ * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval"
+ * cwnd += max_extra_acked
+ *
+ * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms).
+ * Max filter is an approximate sliding window of 5-10 (packet timed) round
+ * trips for non-startup phase, and 1-2 round trips for startup.
+ */
+static void bbr_update_ack_aggregation(struct sock *sk,
+ const struct rate_sample *rs)
+{
+ u32 epoch_us, expected_acked, extra_acked;
+ struct bbr *bbr = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 extra_acked_win_rtts_thresh = bbr->params.extra_acked_win_rtts;
+
+ if (!bbr->params.extra_acked_gain || rs->acked_sacked <= 0 ||
+ rs->delivered < 0 || rs->interval_us <= 0)
+ return;
+
+ if (bbr->round_start) {
+ bbr->extra_acked_win_rtts = min(0x1F,
+ bbr->extra_acked_win_rtts + 1);
+ if (bbr->params.extra_acked_in_startup &&
+ !bbr_full_bw_reached(sk))
+ extra_acked_win_rtts_thresh = 1;
+ if (bbr->extra_acked_win_rtts >=
+ extra_acked_win_rtts_thresh) {
+ bbr->extra_acked_win_rtts = 0;
+ bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ?
+ 0 : 1;
+ bbr->extra_acked[bbr->extra_acked_win_idx] = 0;
+ }
+ }
+
+ /* Compute how many packets we expected to be delivered over epoch. */
+ epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp,
+ bbr->ack_epoch_mstamp);
+ expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT;
+
+ /* Reset the aggregation epoch if ACK rate is below expected rate or
+ * significantly large no. of ack received since epoch (potentially
+ * quite old epoch).
+ */
+ if (bbr->ack_epoch_acked <= expected_acked ||
+ (bbr->ack_epoch_acked + rs->acked_sacked >=
+ bbr_ack_epoch_acked_reset_thresh)) {
+ bbr->ack_epoch_acked = 0;
+ bbr->ack_epoch_mstamp = tp->delivered_mstamp;
+ expected_acked = 0;
+ }
+
+ /* Compute excess data delivered, beyond what was expected. */
+ bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
+ bbr->ack_epoch_acked + rs->acked_sacked);
+ extra_acked = bbr->ack_epoch_acked - expected_acked;
+ extra_acked = min(extra_acked, tp->snd_cwnd);
+ if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
+ bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
+}
+
+/* Estimate when the pipe is full, using the change in delivery rate: BBR
+ * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
+ * higher rwin, 3: we get higher delivery rate samples. Or transient
+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
+ */
+static void bbr_check_full_bw_reached(struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 bw_thresh;
+
+ if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
+ return;
+
+ bw_thresh = (u64)bbr->full_bw * bbr->params.full_bw_thresh >> BBR_SCALE;
+ if (bbr_max_bw(sk) >= bw_thresh) {
+ bbr->full_bw = bbr_max_bw(sk);
+ bbr->full_bw_cnt = 0;
+ return;
+ }
+ ++bbr->full_bw_cnt;
+ bbr->full_bw_reached = bbr->full_bw_cnt >= bbr->params.full_bw_cnt;
+}
+
+/* If pipe is probably full, drain the queue and then enter steady-state. */
+static bool bbr_check_drain(struct sock *sk, const struct rate_sample *rs,
+ struct bbr_context *ctx)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
+ bbr->mode = BBR_DRAIN; /* drain queue we created */
+ tcp_sk(sk)->snd_ssthresh =
+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
+ bbr2_reset_congestion_signals(sk);
+ } /* fall through to check if in-flight is already small: */
+ if (bbr->mode == BBR_DRAIN &&
+ bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT))
+ return true; /* exiting DRAIN now */
+ return false;
+}
+
+static void bbr_check_probe_rtt_done(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (!(bbr->probe_rtt_done_stamp &&
+ after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
+ return;
+
+ bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */
+ tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
+ bbr2_exit_probe_rtt(sk);
+}
+
+/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
+ * periodically drain the bottleneck queue, to converge to measure the true
+ * min_rtt (unloaded propagation delay). This allows the flows to keep queues
+ * small (reducing queuing delay and packet loss) and achieve fairness among
+ * BBR flows.
+ *
+ * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires,
+ * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets.
+ * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed
+ * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and
+ * re-enter the previous mode. BBR uses 200ms to approximately bound the
+ * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s).
+ *
+ * Note that flows need only pay 2% if they are busy sending over the last 10
+ * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have
+ * natural silences or low-rate periods within 10 seconds where the rate is low
+ * enough for long enough to drain its queue in the bottleneck. We pick up
+ * these min RTT measurements opportunistically with our min_rtt filter. :-)
+ */
+static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ bool probe_rtt_expired, min_rtt_expired;
+ u32 expire;
+
+ /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */
+ expire = bbr->probe_rtt_min_stamp +
+ msecs_to_jiffies(bbr->params.probe_rtt_win_ms);
+ probe_rtt_expired = after(tcp_jiffies32, expire);
+ if (rs->rtt_us >= 0 &&
+ (rs->rtt_us <= bbr->probe_rtt_min_us ||
+ (probe_rtt_expired && !rs->is_ack_delayed))) {
+ bbr->probe_rtt_min_us = rs->rtt_us;
+ bbr->probe_rtt_min_stamp = tcp_jiffies32;
+ }
+ /* Track min RTT seen in the min_rtt_win_sec filter window: */
+ expire = bbr->min_rtt_stamp + bbr->params.min_rtt_win_sec * HZ;
+ min_rtt_expired = after(tcp_jiffies32, expire);
+ if (bbr->probe_rtt_min_us <= bbr->min_rtt_us ||
+ min_rtt_expired) {
+ bbr->min_rtt_us = bbr->probe_rtt_min_us;
+ bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp;
+ }
+
+ if (bbr->params.probe_rtt_mode_ms > 0 && probe_rtt_expired &&
+ !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
+ bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
+ bbr_save_cwnd(sk); /* note cwnd so we can restore it */
+ bbr->probe_rtt_done_stamp = 0;
+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
+ bbr->next_rtt_delivered = tp->delivered;
+ }
+
+ if (bbr->mode == BBR_PROBE_RTT) {
+ /* Ignore low rate samples during this mode. */
+ tp->app_limited =
+ (tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
+ /* Maintain min packets in flight for max(200 ms, 1 round). */
+ if (!bbr->probe_rtt_done_stamp &&
+ tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) {
+ bbr->probe_rtt_done_stamp = tcp_jiffies32 +
+ msecs_to_jiffies(bbr->params.probe_rtt_mode_ms);
+ bbr->probe_rtt_round_done = 0;
+ bbr->next_rtt_delivered = tp->delivered;
+ } else if (bbr->probe_rtt_done_stamp) {
+ if (bbr->round_start)
+ bbr->probe_rtt_round_done = 1;
+ if (bbr->probe_rtt_round_done)
+ bbr_check_probe_rtt_done(sk);
+ }
+ }
+ /* Restart after idle ends only once we process a new S/ACK for data */
+ if (rs->delivered > 0)
+ bbr->idle_restart = 0;
+}
+
+static void bbr_update_gains(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ switch (bbr->mode) {
+ case BBR_STARTUP:
+ bbr->pacing_gain = bbr->params.high_gain;
+ bbr->cwnd_gain = bbr->params.startup_cwnd_gain;
+ break;
+ case BBR_DRAIN:
+ bbr->pacing_gain = bbr->params.drain_gain; /* slow, to drain */
+ bbr->cwnd_gain = bbr->params.startup_cwnd_gain; /* keep cwnd */
+ break;
+ case BBR_PROBE_BW:
+ bbr->pacing_gain = bbr->params.pacing_gain[bbr->cycle_idx];
+ bbr->cwnd_gain = bbr->params.cwnd_gain;
+ break;
+ case BBR_PROBE_RTT:
+ bbr->pacing_gain = BBR_UNIT;
+ bbr->cwnd_gain = BBR_UNIT;
+ break;
+ default:
+ WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
+ break;
+ }
+}
+
+static void bbr_init(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ int i;
+
+ WARN_ON_ONCE(tp->snd_cwnd >= bbr_cwnd_warn_val);
+
+ bbr->initialized = 1;
+ bbr->params.high_gain = min(0x7FF, bbr_high_gain);
+ bbr->params.drain_gain = min(0x3FF, bbr_drain_gain);
+ bbr->params.startup_cwnd_gain = min(0x7FF, bbr_startup_cwnd_gain);
+ bbr->params.cwnd_gain = min(0x7FF, bbr_cwnd_gain);
+ bbr->params.cwnd_tso_budget = min(0x1U, bbr_cwnd_tso_budget);
+ bbr->params.cwnd_min_target = min(0xFU, bbr_cwnd_min_target);
+ bbr->params.min_rtt_win_sec = min(0x1FU, bbr_min_rtt_win_sec);
+ bbr->params.probe_rtt_mode_ms = min(0x1FFU, bbr_probe_rtt_mode_ms);
+ bbr->params.full_bw_cnt = min(0x7U, bbr_full_bw_cnt);
+ bbr->params.bw_rtts = min(0x1F, bbr_bw_rtts);
+ bbr->params.full_bw_thresh = min(0x3FFU, bbr_full_bw_thresh);
+ bbr->params.extra_acked_gain = min(0x7FF, bbr_extra_acked_gain);
+ bbr->params.extra_acked_win_rtts = min(0x1FU, bbr_extra_acked_win_rtts);
+ bbr->params.drain_to_target = bbr_drain_to_target ? 1 : 0;
+ bbr->params.precise_ece_ack = bbr_precise_ece_ack ? 1 : 0;
+ bbr->params.extra_acked_in_startup = bbr_extra_acked_in_startup ? 1 : 0;
+ bbr->params.probe_rtt_cwnd_gain = min(0xFFU, bbr_probe_rtt_cwnd_gain);
+ bbr->params.probe_rtt_win_ms =
+ min(0x3FFFU,
+ min_t(u32, bbr_probe_rtt_win_ms,
+ bbr->params.min_rtt_win_sec * MSEC_PER_SEC));
+ for (i = 0; i < CYCLE_LEN; i++)
+ bbr->params.pacing_gain[i] = min(0x3FF, bbr_pacing_gain[i]);
+ bbr->params.usage_based_cwnd = bbr_usage_based_cwnd ? 1 : 0;
+ bbr->params.tso_rtt_shift = min(0xFU, bbr_tso_rtt_shift);
+
+ bbr->debug.snd_isn = tp->snd_una;
+ bbr->debug.target_cwnd = 0;
+ bbr->debug.undo = 0;
+
+ bbr->init_cwnd = min(0x7FU, tp->snd_cwnd);
+ bbr->prior_cwnd = tp->prior_cwnd;
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ bbr->next_rtt_delivered = 0;
+ bbr->prev_ca_state = TCP_CA_Open;
+ bbr->packet_conservation = 0;
+
+ bbr->probe_rtt_done_stamp = 0;
+ bbr->probe_rtt_round_done = 0;
+ bbr->probe_rtt_min_us = tcp_min_rtt(tp);
+ bbr->probe_rtt_min_stamp = tcp_jiffies32;
+ bbr->min_rtt_us = tcp_min_rtt(tp);
+ bbr->min_rtt_stamp = tcp_jiffies32;
+
+ bbr->has_seen_rtt = 0;
+ bbr_init_pacing_rate_from_rtt(sk);
+
+ bbr->round_start = 0;
+ bbr->idle_restart = 0;
+ bbr->full_bw_reached = 0;
+ bbr->full_bw = 0;
+ bbr->full_bw_cnt = 0;
+ bbr->cycle_mstamp = 0;
+ bbr->cycle_idx = 0;
+ bbr->mode = BBR_STARTUP;
+ bbr->debug.rs_bw = 0;
+
+ bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+ bbr->ack_epoch_acked = 0;
+ bbr->extra_acked_win_rtts = 0;
+ bbr->extra_acked_win_idx = 0;
+ bbr->extra_acked[0] = 0;
+ bbr->extra_acked[1] = 0;
+
+ bbr->ce_state = 0;
+ bbr->prior_rcv_nxt = tp->rcv_nxt;
+ bbr->try_fast_path = 0;
+
+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
+}
+
+static u32 bbr_sndbuf_expand(struct sock *sk)
+{
+ /* Provision 3 * cwnd since BBR may slow-start even during recovery. */
+ return 3;
+}
+
+/* __________________________________________________________________________
+ *
+ * Functions new to BBR v2 ("bbr") congestion control are below here.
+ * __________________________________________________________________________
+ */
+
+/* Incorporate a new bw sample into the current window of our max filter. */
+static void bbr2_take_bw_hi_sample(struct sock *sk, u32 bw)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]);
+}
+
+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */
+static void bbr2_advance_bw_hi_filter(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (!bbr->bw_hi[1])
+ return; /* no samples in this window; remember old window */
+ bbr->bw_hi[0] = bbr->bw_hi[1];
+ bbr->bw_hi[1] = 0;
+}
+
+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */
+static u32 bbr2_target_inflight(struct sock *sk)
+{
+ u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT);
+
+ return min(bdp, tcp_sk(sk)->snd_cwnd);
+}
+
+static bool bbr2_is_probing_bandwidth(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ return (bbr->mode == BBR_STARTUP) ||
+ (bbr->mode == BBR_PROBE_BW &&
+ (bbr->cycle_idx == BBR_BW_PROBE_REFILL ||
+ bbr->cycle_idx == BBR_BW_PROBE_UP));
+}
+
+/* Has the given amount of time elapsed since we marked the phase start? */
+static bool bbr2_has_elapsed_in_phase(const struct sock *sk, u32 interval_us)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ const struct bbr *bbr = inet_csk_ca(sk);
+
+ return tcp_stamp_us_delta(tp->tcp_mstamp,
+ bbr->cycle_mstamp + interval_us) > 0;
+}
+
+static void bbr2_handle_queue_too_high_in_startup(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->full_bw_reached = 1;
+ bbr->inflight_hi = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
+}
+
+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */
+static void bbr2_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible ||
+ !bbr->params.full_ecn_cnt || !bbr->params.ecn_thresh)
+ return;
+
+ if (ce_ratio >= bbr->params.ecn_thresh)
+ bbr->startup_ecn_rounds++;
+ else
+ bbr->startup_ecn_rounds = 0;
+
+ if (bbr->startup_ecn_rounds >= bbr->params.full_ecn_cnt) {
+ bbr->debug.event = 'E'; /* ECN caused STARTUP exit */
+ bbr2_handle_queue_too_high_in_startup(sk);
+ return;
+ }
+}
+
+static void bbr2_update_ecn_alpha(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ s32 delivered, delivered_ce;
+ u64 alpha, ce_ratio;
+ u32 gain;
+
+ if (bbr->params.ecn_factor == 0)
+ return;
+
+ delivered = tp->delivered - bbr->alpha_last_delivered;
+ delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce;
+
+ if (delivered == 0 || /* avoid divide by zero */
+ WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */
+ return;
+
+ /* See if we should use ECN sender logic for this connection. */
+ if (!bbr->ecn_eligible && bbr_ecn_enable &&
+ (bbr->min_rtt_us <= bbr->params.ecn_max_rtt_us ||
+ !bbr->params.ecn_max_rtt_us))
+ bbr->ecn_eligible = 1;
+
+ ce_ratio = (u64)delivered_ce << BBR_SCALE;
+ do_div(ce_ratio, delivered);
+ gain = bbr->params.ecn_alpha_gain;
+ alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE;
+ alpha += (gain * ce_ratio) >> BBR_SCALE;
+ bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT);
+
+ bbr->alpha_last_delivered = tp->delivered;
+ bbr->alpha_last_delivered_ce = tp->delivered_ce;
+
+ bbr2_check_ecn_too_high_in_startup(sk, ce_ratio);
+}
+
+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */
+static void bbr2_raise_inflight_hi_slope(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 growth_this_round, cnt;
+
+ /* Calculate "slope": packets S/Acked per inflight_hi increment. */
+ growth_this_round = 1 << bbr->bw_probe_up_rounds;
+ bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30);
+ cnt = tp->snd_cwnd / growth_this_round;
+ cnt = max(cnt, 1U);
+ bbr->bw_probe_up_cnt = cnt;
+ bbr->debug.event = 'G'; /* Grow inflight_hi slope */
+}
+
+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */
+static void bbr2_probe_inflight_hi_upward(struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 delta;
+
+ if (!tp->is_cwnd_limited || tp->snd_cwnd < bbr->inflight_hi) {
+ bbr->bw_probe_up_acks = 0; /* don't accmulate unused credits */
+ return; /* not fully using inflight_hi, so don't grow it */
+ }
+
+ /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */
+ bbr->bw_probe_up_acks += rs->acked_sacked;
+ if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) {
+ delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt;
+ bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt;
+ bbr->inflight_hi += delta;
+ bbr->debug.event = 'I'; /* Increment inflight_hi */
+ }
+
+ if (bbr->round_start)
+ bbr2_raise_inflight_hi_slope(sk);
+}
+
+/* Does loss/ECN rate for this sample say inflight is "too high"?
+ * This is used by both the bbr_check_loss_too_high_in_startup() function,
+ * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which
+ * uses it to notice when loss/ECN rates suggest inflight is too high.
+ */
+static bool bbr2_is_inflight_too_high(const struct sock *sk,
+ const struct rate_sample *rs)
+{
+ const struct bbr *bbr = inet_csk_ca(sk);
+ u32 loss_thresh, ecn_thresh;
+
+ if (rs->lost > 0 && rs->tx_in_flight) {
+ loss_thresh = (u64)rs->tx_in_flight * bbr->params.loss_thresh >>
+ BBR_SCALE;
+ if (rs->lost > loss_thresh)
+ return true;
+ }
+
+ if (rs->delivered_ce > 0 && rs->delivered > 0 &&
+ bbr->ecn_eligible && bbr->params.ecn_thresh) {
+ ecn_thresh = (u64)rs->delivered * bbr->params.ecn_thresh >>
+ BBR_SCALE;
+ if (rs->delivered_ce >= ecn_thresh)
+ return true;
+ }
+
+ return false;
+}
+
+/* Calculate the tx_in_flight level that corresponded to excessive loss.
+ * We find "lost_prefix" segs of the skb where loss rate went too high,
+ * by solving for "lost_prefix" in the following equation:
+ * lost / inflight >= loss_thresh
+ * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh
+ * Then we take that equation, convert it to fixed point, and
+ * round up to the nearest packet.
+ */
+static u32 bbr2_inflight_hi_from_lost_skb(const struct sock *sk,
+ const struct rate_sample *rs,
+ const struct sk_buff *skb)
+{
+ const struct bbr *bbr = inet_csk_ca(sk);
+ u32 loss_thresh = bbr->params.loss_thresh;
+ u32 pcount, divisor, inflight_hi;
+ s32 inflight_prev, lost_prev;
+ u64 loss_budget, lost_prefix;
+
+ pcount = tcp_skb_pcount(skb);
+
+ /* How much data was in flight before this skb? */
+ inflight_prev = rs->tx_in_flight - pcount;
+ if (WARN_ONCE(inflight_prev < 0,
+ "tx_in_flight: %u pcount: %u reneg: %u",
+ rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg))
+ return ~0U;
+
+ /* How much inflight data was marked lost before this skb? */
+ lost_prev = rs->lost - pcount;
+ if (WARN_ON_ONCE(lost_prev < 0))
+ return ~0U;
+
+ /* At what prefix of this lost skb did losss rate exceed loss_thresh? */
+ loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1;
+ loss_budget >>= BBR_SCALE;
+ if (lost_prev >= loss_budget) {
+ lost_prefix = 0; /* previous losses crossed loss_thresh */
+ } else {
+ lost_prefix = loss_budget - lost_prev;
+ lost_prefix <<= BBR_SCALE;
+ divisor = BBR_UNIT - loss_thresh;
+ if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */
+ return ~0U;
+ do_div(lost_prefix, divisor);
+ }
+
+ inflight_hi = inflight_prev + lost_prefix;
+ return inflight_hi;
+}
+
+/* If loss/ECN rates during probing indicated we may have overfilled a
+ * buffer, return an operating point that tries to leave unutilized headroom in
+ * the path for other flows, for fairness convergence and lower RTTs and loss.
+ */
+static u32 bbr2_inflight_with_headroom(const struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 headroom, headroom_fraction;
+
+ if (bbr->inflight_hi == ~0U)
+ return ~0U;
+
+ headroom_fraction = bbr->params.inflight_headroom;
+ headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE;
+ headroom = max(headroom, 1U);
+ return max_t(s32, bbr->inflight_hi - headroom,
+ bbr->params.cwnd_min_target);
+}
+
+/* Bound cwnd to a sensible level, based on our current probing state
+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi).
+ */
+static void bbr2_bound_cwnd_for_inflight_model(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 cap;
+
+ /* tcp_rcv_synsent_state_process() currently calls tcp_ack()
+ * and thus cong_control() without first initializing us(!).
+ */
+ if (!bbr->initialized)
+ return;
+
+ cap = ~0U;
+ if (bbr->mode == BBR_PROBE_BW &&
+ bbr->cycle_idx != BBR_BW_PROBE_CRUISE) {
+ /* Probe to see if more packets fit in the path. */
+ cap = bbr->inflight_hi;
+ } else {
+ if (bbr->mode == BBR_PROBE_RTT ||
+ (bbr->mode == BBR_PROBE_BW &&
+ bbr->cycle_idx == BBR_BW_PROBE_CRUISE))
+ cap = bbr2_inflight_with_headroom(sk);
+ }
+ /* Adapt to any loss/ECN since our last bw probe. */
+ cap = min(cap, bbr->inflight_lo);
+
+ cap = max_t(u32, cap, bbr->params.cwnd_min_target);
+ tp->snd_cwnd = min(cap, tp->snd_cwnd);
+}
+
+/* Estimate a short-term lower bound on the capacity available now, based
+ * on measurements of the current delivery process and recent history. When we
+ * are seeing loss/ECN at times when we are not probing bw, then conservatively
+ * move toward flow balance by multiplicatively cutting our short-term
+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a
+ * multiplicative decrease in order to converge to a lower capacity in time
+ * logarithmic in the magnitude of the decrease.
+ *
+ * However, we do not cut our short-term estimates lower than the current rate
+ * and volume of delivered data from this round trip, since from the current
+ * delivery process we can estimate the measured capacity available now.
+ *
+ * Anything faster than that approach would knowingly risk high loss, which can
+ * cause low bw for Reno/CUBIC and high loss recovery latency for
+ * request/response flows using any congestion control.
+ */
+static void bbr2_adapt_lower_bounds(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 ecn_cut, ecn_inflight_lo, beta;
+
+ /* We only use lower-bound estimates when not probing bw.
+ * When probing we need to push inflight higher to probe bw.
+ */
+ if (bbr2_is_probing_bandwidth(sk))
+ return;
+
+ /* ECN response. */
+ if (bbr->ecn_in_round && bbr->ecn_eligible && bbr->params.ecn_factor) {
+ /* Reduce inflight to (1 - alpha*ecn_factor). */
+ ecn_cut = (BBR_UNIT -
+ ((bbr->ecn_alpha * bbr->params.ecn_factor) >>
+ BBR_SCALE));
+ if (bbr->inflight_lo == ~0U)
+ bbr->inflight_lo = tp->snd_cwnd;
+ ecn_inflight_lo = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE;
+ } else {
+ ecn_inflight_lo = ~0U;
+ }
+
+ /* Loss response. */
+ if (bbr->loss_in_round) {
+ /* Reduce bw and inflight to (1 - beta). */
+ if (bbr->bw_lo == ~0U)
+ bbr->bw_lo = bbr_max_bw(sk);
+ if (bbr->inflight_lo == ~0U)
+ bbr->inflight_lo = tp->snd_cwnd;
+ beta = bbr->params.beta;
+ bbr->bw_lo =
+ max_t(u32, bbr->bw_latest,
+ (u64)bbr->bw_lo *
+ (BBR_UNIT - beta) >> BBR_SCALE);
+ bbr->inflight_lo =
+ max_t(u32, bbr->inflight_latest,
+ (u64)bbr->inflight_lo *
+ (BBR_UNIT - beta) >> BBR_SCALE);
+ }
+
+ /* Adjust to the lower of the levels implied by loss or ECN. */
+ bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo);
+}
+
+/* Reset any short-term lower-bound adaptation to congestion, so that we can
+ * push our inflight up.
+ */
+static void bbr2_reset_lower_bounds(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->bw_lo = ~0U;
+ bbr->inflight_lo = ~0U;
+}
+
+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state
+ * machine phase where we adapt our lower bound based on congestion signals.
+ */
+static void bbr2_reset_congestion_signals(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->loss_in_round = 0;
+ bbr->ecn_in_round = 0;
+ bbr->loss_in_cycle = 0;
+ bbr->ecn_in_cycle = 0;
+ bbr->bw_latest = 0;
+ bbr->inflight_latest = 0;
+}
+
+/* Update (most of) our congestion signals: track the recent rate and volume of
+ * delivered data, presence of loss, and EWMA degree of ECN marking.
+ */
+static void bbr2_update_congestion_signals(
+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+
+ bbr->loss_round_start = 0;
+ if (rs->interval_us <= 0 || !rs->acked_sacked)
+ return; /* Not a valid observation */
+ bw = ctx->sample_bw;
+
+ if (!rs->is_app_limited || bw >= bbr_max_bw(sk))
+ bbr2_take_bw_hi_sample(sk, bw);
+
+ bbr->loss_in_round |= (rs->losses > 0);
+
+ /* Update rate and volume of delivered data from latest round trip: */
+ bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw);
+ bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered);
+
+ if (before(rs->prior_delivered, bbr->loss_round_delivered))
+ return; /* skip the per-round-trip updates */
+ /* Now do per-round-trip updates. */
+ bbr->loss_round_delivered = tp->delivered; /* mark round trip */
+ bbr->loss_round_start = 1;
+ bbr2_adapt_lower_bounds(sk);
+
+ /* Update windowed "latest" (single-round-trip) filters. */
+ bbr->loss_in_round = 0;
+ bbr->ecn_in_round = 0;
+ bbr->bw_latest = ctx->sample_bw;
+ bbr->inflight_latest = rs->delivered;
+}
+
+/* Bandwidth probing can cause loss. To help coexistence with loss-based
+ * congestion control we spread out our probing in a Reno-conscious way. Due to
+ * the shape of the Reno sawtooth, the time required between loss epochs for an
+ * idealized Reno flow is a number of round trips that is the BDP of that
+ * flow. We count packet-timed round trips directly, since measured RTT can
+ * vary widely, and Reno is driven by packet-timed round trips.
+ */
+static bool bbr2_is_reno_coexistence_probe_time(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 inflight, rounds, reno_gain, reno_rounds;
+
+ /* Random loss can shave some small percentage off of our inflight
+ * in each round. To survive this, flows need robust periodic probes.
+ */
+ rounds = bbr->params.bw_probe_max_rounds;
+
+ reno_gain = bbr->params.bw_probe_reno_gain;
+ if (reno_gain) {
+ inflight = bbr2_target_inflight(sk);
+ reno_rounds = ((u64)inflight * reno_gain) >> BBR_SCALE;
+ rounds = min(rounds, reno_rounds);
+ }
+ return bbr->rounds_since_probe >= rounds;
+}
+
+/* How long do we want to wait before probing for bandwidth (and risking
+ * loss)? We randomize the wait, for better mixing and fairness convergence.
+ *
+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips.
+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow,
+ * (eg 4K video to a broadband user):
+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
+ *
+ * We bound the BBR-native inter-bw-probe wall clock time to be:
+ * (a) higher than 2 sec: to try to avoid causing loss for a long enough time
+ * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must
+ * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs
+ * (b) lower than 3 sec: to ensure flows can start probing in a reasonable
+ * amount of time to discover unutilized bw on human-scale interactive
+ * time-scales (e.g. perhaps traffic from a web page download that we
+ * were competing with is now complete).
+ */
+static void bbr2_pick_probe_wait(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ /* Decide the random round-trip bound for wait until probe: */
+ bbr->rounds_since_probe =
+ prandom_u32_max(bbr->params.bw_probe_rand_rounds);
+ /* Decide the random wall clock bound for wait until probe: */
+ bbr->probe_wait_us = bbr->params.bw_probe_base_us +
+ prandom_u32_max(bbr->params.bw_probe_rand_us);
+}
+
+static void bbr2_set_cycle_idx(struct sock *sk, int cycle_idx)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->cycle_idx = cycle_idx;
+ /* New phase, so need to update cwnd and pacing rate. */
+ bbr->try_fast_path = 0;
+}
+
+/* Send at estimated bw to fill the pipe, but not queue. We need this phase
+ * before PROBE_UP, because as soon as we send faster than the available bw
+ * we will start building a queue, and if the buffer is shallow we can cause
+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and
+ * inflight_hi estimates will underestimate.
+ */
+static void bbr2_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr2_reset_lower_bounds(sk);
+ if (bbr->inflight_hi != ~0U)
+ bbr->inflight_hi += bbr->params.refill_add_inc;
+ bbr->bw_probe_up_rounds = bw_probe_up_rounds;
+ bbr->bw_probe_up_acks = 0;
+ bbr->stopped_risky_probe = 0;
+ bbr->ack_phase = BBR_ACKS_REFILLING;
+ bbr->next_rtt_delivered = tp->delivered;
+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_REFILL);
+}
+
+/* Now probe max deliverable data rate and volume. */
+static void bbr2_start_bw_probe_up(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->ack_phase = BBR_ACKS_PROBE_STARTING;
+ bbr->next_rtt_delivered = tp->delivered;
+ bbr->cycle_mstamp = tp->tcp_mstamp;
+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_UP);
+ bbr2_raise_inflight_hi_slope(sk);
+}
+
+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall
+ * clock time at which to probe beyond an inflight that we think to be
+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to
+ * keep packet loss rates low. Also start a round-trip counter, to probe faster
+ * if we estimate a Reno flow at our BDP would probe faster.
+ */
+static void bbr2_start_bw_probe_down(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr2_reset_congestion_signals(sk);
+ bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */
+ bbr2_pick_probe_wait(sk);
+ bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */
+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING;
+ bbr->next_rtt_delivered = tp->delivered;
+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_DOWN);
+}
+
+/* Cruise: maintain what we estimate to be a neutral, conservative
+ * operating point, without attempting to probe up for bandwidth or down for
+ * RTT, and only reducing inflight in response to loss/ECN signals.
+ */
+static void bbr2_start_bw_probe_cruise(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr->inflight_lo != ~0U)
+ bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi);
+
+ bbr2_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE);
+}
+
+/* Loss and/or ECN rate is too high while probing.
+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle.
+ */
+static void bbr2_handle_inflight_too_high(struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ const u32 beta = bbr->params.beta;
+
+ bbr->prev_probe_too_high = 1;
+ bbr->bw_probe_samples = 0; /* only react once per probe */
+ bbr->debug.event = 'L'; /* Loss/ECN too high */
+ /* If we are app-limited then we are not robustly
+ * probing the max volume of inflight data we think
+ * might be safe (analogous to how app-limited bw
+ * samples are not known to be robustly probing bw).
+ */
+ if (!rs->is_app_limited)
+ bbr->inflight_hi = max_t(u32, rs->tx_in_flight,
+ (u64)bbr2_target_inflight(sk) *
+ (BBR_UNIT - beta) >> BBR_SCALE);
+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP)
+ bbr2_start_bw_probe_down(sk);
+}
+
+/* If we're seeing bw and loss samples reflecting our bw probing, adapt
+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt
+ * inflight_hi downward. If we're able to push inflight higher without such
+ * signals, push higher: adapt inflight_hi upward.
+ */
+static bool bbr2_adapt_upper_bounds(struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ /* Track when we'll see bw/loss samples resulting from our bw probes. */
+ if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start)
+ bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK;
+ if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) {
+ /* End of samples from bw probing phase. */
+ bbr->bw_probe_samples = 0;
+ bbr->ack_phase = BBR_ACKS_INIT;
+ /* At this point in the cycle, our current bw sample is also
+ * our best recent chance at finding the highest available bw
+ * for this flow. So now is the best time to forget the bw
+ * samples from the previous cycle, by advancing the window.
+ */
+ if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited)
+ bbr2_advance_bw_hi_filter(sk);
+ /* If we had an inflight_hi, then probed and pushed inflight all
+ * the way up to hit that inflight_hi without seeing any
+ * high loss/ECN in all the resulting ACKs from that probing,
+ * then probe up again, this time letting inflight persist at
+ * inflight_hi for a round trip, then accelerating beyond.
+ */
+ if (bbr->mode == BBR_PROBE_BW &&
+ bbr->stopped_risky_probe && !bbr->prev_probe_too_high) {
+ bbr->debug.event = 'R'; /* reprobe */
+ bbr2_start_bw_probe_refill(sk, 0);
+ return true; /* yes, decided state transition */
+ }
+ }
+
+ if (bbr2_is_inflight_too_high(sk, rs)) {
+ if (bbr->bw_probe_samples) /* sample is from bw probing? */
+ bbr2_handle_inflight_too_high(sk, rs);
+ } else {
+ /* Loss/ECN rate is declared safe. Adjust upper bound upward. */
+ if (bbr->inflight_hi == ~0U) /* no excess queue signals yet? */
+ return false;
+
+ /* To be resilient to random loss, we must raise inflight_hi
+ * if we observe in any phase that a higher level is safe.
+ */
+ if (rs->tx_in_flight > bbr->inflight_hi) {
+ bbr->inflight_hi = rs->tx_in_flight;
+ bbr->debug.event = 'U'; /* raise up inflight_hi */
+ }
+
+ if (bbr->mode == BBR_PROBE_BW &&
+ bbr->cycle_idx == BBR_BW_PROBE_UP)
+ bbr2_probe_inflight_hi_upward(sk, rs);
+ }
+
+ return false;
+}
+
+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */
+static bool bbr2_check_time_to_probe_bw(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 n;
+
+ /* If we seem to be at an operating point where we are not seeing loss
+ * but we are seeing ECN marks, then when the ECN marks cease we reprobe
+ * quickly (in case a burst of cross-traffic has ceased and freed up bw,
+ * or in case we are sharing with multiplicatively probing traffic).
+ */
+ if (bbr->params.ecn_reprobe_gain && bbr->ecn_eligible &&
+ bbr->ecn_in_cycle && !bbr->loss_in_cycle &&
+ inet_csk(sk)->icsk_ca_state == TCP_CA_Open) {
+ bbr->debug.event = 'A'; /* *A*ll clear to probe *A*gain */
+ /* Calculate n so that when bbr2_raise_inflight_hi_slope()
+ * computes growth_this_round as 2^n it will be roughly the
+ * desired volume of data (inflight_hi*ecn_reprobe_gain).
+ */
+ n = ilog2((((u64)bbr->inflight_hi *
+ bbr->params.ecn_reprobe_gain) >> BBR_SCALE));
+ bbr2_start_bw_probe_refill(sk, n);
+ return true;
+ }
+
+ if (bbr2_has_elapsed_in_phase(sk, bbr->probe_wait_us) ||
+ bbr2_is_reno_coexistence_probe_time(sk)) {
+ bbr2_start_bw_probe_refill(sk, 0);
+ return true;
+ }
+ return false;
+}
+
+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */
+static bool bbr2_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ bool is_under_bdp, is_long_enough;
+
+ /* Always need to pull inflight down to leave headroom in queue. */
+ if (inflight > bbr2_inflight_with_headroom(sk))
+ return false;
+
+ is_under_bdp = inflight <= bbr_inflight(sk, bw, BBR_UNIT);
+ if (bbr->params.drain_to_target)
+ return is_under_bdp;
+
+ is_long_enough = bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us);
+ return is_under_bdp || is_long_enough;
+}
+
+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */
+static void bbr2_update_cycle_phase(struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ bool is_risky = false, is_queuing = false;
+ u32 inflight, bw;
+
+ if (!bbr_full_bw_reached(sk))
+ return;
+
+ /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */
+ if (bbr2_adapt_upper_bounds(sk, rs))
+ return; /* already decided state transition */
+
+ if (bbr->mode != BBR_PROBE_BW)
+ return;
+
+ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
+ bw = bbr_max_bw(sk);
+
+ switch (bbr->cycle_idx) {
+ /* First we spend most of our time cruising with a pacing_gain of 1.0,
+ * which paces at the estimated bw, to try to fully use the pipe
+ * without building queue. If we encounter loss/ECN marks, we adapt
+ * by slowing down.
+ */
+ case BBR_BW_PROBE_CRUISE:
+ if (bbr2_check_time_to_probe_bw(sk))
+ return; /* already decided state transition */
+ break;
+
+ /* After cruising, when it's time to probe, we first "refill": we send
+ * at the estimated bw to fill the pipe, before probing higher and
+ * knowingly risking overflowing the bottleneck buffer (causing loss).
+ */
+ case BBR_BW_PROBE_REFILL:
+ if (bbr->round_start) {
+ /* After one full round trip of sending in REFILL, we
+ * start to see bw samples reflecting our REFILL, which
+ * may be putting too much data in flight.
+ */
+ bbr->bw_probe_samples = 1;
+ bbr2_start_bw_probe_up(sk);
+ }
+ break;
+
+ /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to
+ * probe for bw. If we have not seen loss/ECN, we try to raise inflight
+ * to at least pacing_gain*BDP; note that this may take more than
+ * min_rtt if min_rtt is small (e.g. on a LAN).
+ *
+ * We terminate PROBE_UP bandwidth probing upon any of the following:
+ *
+ * (1) We've pushed inflight up to hit the inflight_hi target set in the
+ * most recent previous bw probe phase. Thus we want to start
+ * draining the queue immediately because it's very likely the most
+ * recently sent packets will fill the queue and cause drops.
+ * (checked here)
+ * (2) We have probed for at least 1*min_rtt_us, and the
+ * estimated queue is high enough (inflight > 1.25 * estimated_bdp).
+ * (checked here)
+ * (3) Loss filter says loss rate is "too high".
+ * (checked in bbr_is_inflight_too_high())
+ * (4) ECN filter says ECN mark rate is "too high".
+ * (checked in bbr_is_inflight_too_high())
+ */
+ case BBR_BW_PROBE_UP:
+ if (bbr->prev_probe_too_high &&
+ inflight >= bbr->inflight_hi) {
+ bbr->stopped_risky_probe = 1;
+ is_risky = true;
+ bbr->debug.event = 'D'; /* D for danger */
+ } else if (bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us) &&
+ inflight >=
+ bbr_inflight(sk, bw,
+ bbr->params.bw_probe_pif_gain)) {
+ is_queuing = true;
+ bbr->debug.event = 'Q'; /* building Queue */
+ }
+ if (is_risky || is_queuing) {
+ bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */
+ bbr2_start_bw_probe_down(sk); /* restart w/ down */
+ }
+ break;
+
+ /* After probing in PROBE_UP, we have usually accumulated some data in
+ * the bottleneck buffer (if bw probing didn't find more bw). We next
+ * enter PROBE_DOWN to try to drain any excess data from the queue. To
+ * do this, we use a pacing_gain < 1.0. We hold this pacing gain until
+ * our inflight is less then that target cruising point, which is the
+ * minimum of (a) the amount needed to leave headroom, and (b) the
+ * estimated BDP. Once inflight falls to match the target, we estimate
+ * the queue is drained; persisting would underutilize the pipe.
+ */
+ case BBR_BW_PROBE_DOWN:
+ if (bbr2_check_time_to_probe_bw(sk))
+ return; /* already decided state transition */
+ if (bbr2_check_time_to_cruise(sk, inflight, bw))
+ bbr2_start_bw_probe_cruise(sk);
+ break;
+
+ default:
+ WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx);
+ }
+}
+
+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */
+static void bbr2_exit_probe_rtt(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr2_reset_lower_bounds(sk);
+ if (bbr_full_bw_reached(sk)) {
+ bbr->mode = BBR_PROBE_BW;
+ /* Raising inflight after PROBE_RTT may cause loss, so reset
+ * the PROBE_BW clock and schedule the next bandwidth probe for
+ * a friendly and randomized future point in time.
+ */
+ bbr2_start_bw_probe_down(sk);
+ /* Since we are exiting PROBE_RTT, we know inflight is
+ * below our estimated BDP, so it is reasonable to cruise.
+ */
+ bbr2_start_bw_probe_cruise(sk);
+ } else {
+ bbr->mode = BBR_STARTUP;
+ }
+}
+
+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until
+ * the end of the round in recovery to get a good estimate of how many packets
+ * have been lost, and how many we need to drain with a low pacing rate.
+ */
+static void bbr2_check_loss_too_high_in_startup(struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr_full_bw_reached(sk))
+ return;
+
+ /* For STARTUP exit, check the loss rate at the end of each round trip
+ * of Recovery episodes in STARTUP. We check the loss rate at the end
+ * of the round trip to filter out noisy/low loss and have a better
+ * sense of inflight (extent of loss), so we can drain more accurately.
+ */
+ if (rs->losses && bbr->loss_events_in_round < 0xf)
+ bbr->loss_events_in_round++; /* update saturating counter */
+ if (bbr->params.full_loss_cnt && bbr->loss_round_start &&
+ inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery &&
+ bbr->loss_events_in_round >= bbr->params.full_loss_cnt &&
+ bbr2_is_inflight_too_high(sk, rs)) {
+ bbr->debug.event = 'P'; /* Packet loss caused STARTUP exit */
+ bbr2_handle_queue_too_high_in_startup(sk);
+ return;
+ }
+ if (bbr->loss_round_start)
+ bbr->loss_events_in_round = 0;
+}
+
+/* If we are done draining, advance into steady state operation in PROBE_BW. */
+static void bbr2_check_drain(struct sock *sk, const struct rate_sample *rs,
+ struct bbr_context *ctx)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (bbr_check_drain(sk, rs, ctx)) {
+ bbr->mode = BBR_PROBE_BW;
+ bbr2_start_bw_probe_down(sk);
+ }
+}
+
+static void bbr2_update_model(struct sock *sk, const struct rate_sample *rs,
+ struct bbr_context *ctx)
+{
+ bbr2_update_congestion_signals(sk, rs, ctx);
+ bbr_update_ack_aggregation(sk, rs);
+ bbr2_check_loss_too_high_in_startup(sk, rs);
+ bbr_check_full_bw_reached(sk, rs);
+ bbr2_check_drain(sk, rs, ctx);
+ bbr2_update_cycle_phase(sk, rs);
+ bbr_update_min_rtt(sk, rs);
+}
+
+/* Fast path for app-limited case.
+ *
+ * On each ack, we execute bbr state machine, which primarily consists of:
+ * 1) update model based on new rate sample, and
+ * 2) update control based on updated model or state change.
+ *
+ * There are certain workload/scenarios, e.g. app-limited case, where
+ * either we can skip updating model or we can skip update of both model
+ * as well as control. This provides signifcant softirq cpu savings for
+ * processing incoming acks.
+ *
+ * In case of app-limited, if there is no congestion (loss/ecn) and
+ * if observed bw sample is less than current estimated bw, then we can
+ * skip some of the computation in bbr state processing:
+ *
+ * - if there is no rtt/mode/phase change: In this case, since all the
+ * parameters of the network model are constant, we can skip model
+ * as well control update.
+ *
+ * - else we can skip rest of the model update. But we still need to
+ * update the control to account for the new rtt/mode/phase.
+ *
+ * Returns whether we can take fast path or not.
+ */
+static bool bbr2_fast_path(struct sock *sk, bool *update_model,
+ const struct rate_sample *rs, struct bbr_context *ctx)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+ u32 prev_min_rtt_us, prev_mode;
+
+ if (bbr->params.fast_path && bbr->try_fast_path &&
+ rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) &&
+ !bbr->loss_in_round && !bbr->ecn_in_round) {
+ prev_mode = bbr->mode;
+ prev_min_rtt_us = bbr->min_rtt_us;
+ bbr2_check_drain(sk, rs, ctx);
+ bbr2_update_cycle_phase(sk, rs);
+ bbr_update_min_rtt(sk, rs);
+
+ if (bbr->mode == prev_mode &&
+ bbr->min_rtt_us == prev_min_rtt_us &&
+ bbr->try_fast_path)
+ return true;
+
+ /* Skip model update, but control still needs to be updated */
+ *update_model = false;
+ }
+ return false;
+}
+
+static void bbr2_main(struct sock *sk, const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ struct bbr_context ctx = { 0 };
+ bool update_model = true;
+ u32 bw;
+
+ bbr->debug.event = '.'; /* init to default NOP (no event yet) */
+
+ bbr_update_round_start(sk, rs, &ctx);
+ if (bbr->round_start) {
+ bbr->rounds_since_probe =
+ min_t(s32, bbr->rounds_since_probe + 1, 0xFF);
+ bbr2_update_ecn_alpha(sk);
+ }
+
+ bbr->ecn_in_round |= rs->is_ece;
+ bbr_calculate_bw_sample(sk, rs, &ctx);
+
+ if (bbr2_fast_path(sk, &update_model, rs, &ctx))
+ goto out;
+
+ if (update_model)
+ bbr2_update_model(sk, rs, &ctx);
+
+ bbr_update_gains(sk);
+ bw = bbr_bw(sk);
+ bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
+ bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain,
+ tp->snd_cwnd, &ctx);
+ bbr2_bound_cwnd_for_inflight_model(sk);
+
+out:
+ bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state;
+ bbr->loss_in_cycle |= rs->lost > 0;
+ bbr->ecn_in_cycle |= rs->delivered_ce > 0;
+
+ bbr_debug(sk, rs->acked_sacked, rs, &ctx);
+}
+
+/* Module parameters that are settable by TCP_CONGESTION_PARAMS are declared
+ * down here, so that the algorithm functions that use the parameters must use
+ * the per-socket parameters; if they accidentally use the global version
+ * then there will be a compile error.
+ * TODO(ncardwell): move all per-socket parameters down to this section.
+ */
+
+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE.
+ * No loss response when 0. Max allwed value is 255.
+ */
+static u32 bbr_beta = BBR_UNIT * 30 / 100;
+
+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE.
+ * Max allowed value is 255.
+ */
+static u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; /* 1/16 = 6.25% */
+
+/* The initial value for the ecn_alpha state variable. Default and max
+ * BBR_UNIT (256), representing 1.0. This allows a flow to respond quickly
+ * to congestion if the bottleneck is congested when the flow starts up.
+ */
+static u32 bbr_ecn_alpha_init = BBR_UNIT; /* 1.0, to respond quickly */
+
+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE.
+ * No ECN based bounding when 0. Max allwed value is 255.
+ */
+static u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */
+
+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold.
+ * Scaled by BBR_SCALE. Disabled when 0. Max allowed is 255.
+ */
+static u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */
+
+/* Max RTT (in usec) at which to use sender-side ECN logic.
+ * Disabled when 0 (ECN allowed at any RTT).
+ * Max allowed for the parameter is 524287 (0x7ffff) us, ~524 ms.
+ */
+static u32 bbr_ecn_max_rtt_us = 5000;
+
+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN
+ * clears then use a multiplicative increase to quickly reprobe bw by
+ * starting inflight probing at the given multiple of inflight_hi.
+ * Default for this experimental knob is 0 (disabled).
+ * Planned value for experiments: BBR_UNIT * 1 / 2 = 128, representing 0.5.
+ */
+static u32 bbr_ecn_reprobe_gain;
+
+/* Estimate bw probing has gone too far if loss rate exceeds this level. */
+static u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */
+
+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N,
+ * and loss rate is higher than bbr_loss_thresh.
+ * Disabled if 0. Max allowed value is 15 (0xF).
+ */
+static u32 bbr_full_loss_cnt = 8;
+
+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh
+ * meets this count. Max allowed value is 3.
+ */
+static u32 bbr_full_ecn_cnt = 2;
+
+/* Fraction of unutilized headroom to try to leave in path upon high loss. */
+static u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100;
+
+/* Multiplier to get target inflight (as multiple of BDP) for PROBE_UP phase.
+ * Default is 1.25x, as in BBR v1. Max allowed is 511.
+ */
+static u32 bbr_bw_probe_pif_gain = BBR_UNIT * 5 / 4;
+
+/* Multiplier to get Reno-style probe epoch duration as: k * BDP round trips.
+ * If zero, disables this BBR v2 Reno-style BDP-scaled coexistence mechanism.
+ * Max allowed is 511.
+ */
+static u32 bbr_bw_probe_reno_gain = BBR_UNIT;
+
+/* Max number of packet-timed rounds to wait before probing for bandwidth. If
+ * we want to tolerate 1% random loss per round, and not have this cut our
+ * inflight too much, we must probe for bw periodically on roughly this scale.
+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance.
+ * We aim to be fair with Reno/CUBIC up to a BDP of at least:
+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets
+ */
+static u32 bbr_bw_probe_max_rounds = 63;
+
+/* Max amount of randomness to inject in round counting for Reno-coexistence.
+ * Max value is 15.
+ */
+static u32 bbr_bw_probe_rand_rounds = 2;
+
+/* Use BBR-native probe time scale starting at this many usec.
+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least:
+ * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs
+ */
+static u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */
+
+/* Use BBR-native probes spread over this many usec: */
+static u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */
+
+/* Undo the model changes made in loss recovery if recovery was spurious? */
+static bool bbr_undo = true;
+
+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */
+static bool bbr_fast_path = true; /* default: enabled */
+
+/* Use fast ack mode ? */
+static int bbr_fast_ack_mode = 1; /* default: rwnd check off */
+
+/* How much to additively increase inflight_hi when entering REFILL? */
+static u32 bbr_refill_add_inc; /* default: disabled */
+
+module_param_named(beta, bbr_beta, uint, 0644);
+module_param_named(ecn_alpha_gain, bbr_ecn_alpha_gain, uint, 0644);
+module_param_named(ecn_alpha_init, bbr_ecn_alpha_init, uint, 0644);
+module_param_named(ecn_factor, bbr_ecn_factor, uint, 0644);
+module_param_named(ecn_thresh, bbr_ecn_thresh, uint, 0644);
+module_param_named(ecn_max_rtt_us, bbr_ecn_max_rtt_us, uint, 0644);
+module_param_named(ecn_reprobe_gain, bbr_ecn_reprobe_gain, uint, 0644);
+module_param_named(loss_thresh, bbr_loss_thresh, uint, 0664);
+module_param_named(full_loss_cnt, bbr_full_loss_cnt, uint, 0664);
+module_param_named(full_ecn_cnt, bbr_full_ecn_cnt, uint, 0664);
+module_param_named(inflight_headroom, bbr_inflight_headroom, uint, 0664);
+module_param_named(bw_probe_pif_gain, bbr_bw_probe_pif_gain, uint, 0664);
+module_param_named(bw_probe_reno_gain, bbr_bw_probe_reno_gain, uint, 0664);
+module_param_named(bw_probe_max_rounds, bbr_bw_probe_max_rounds, uint, 0664);
+module_param_named(bw_probe_rand_rounds, bbr_bw_probe_rand_rounds, uint, 0664);
+module_param_named(bw_probe_base_us, bbr_bw_probe_base_us, uint, 0664);
+module_param_named(bw_probe_rand_us, bbr_bw_probe_rand_us, uint, 0664);
+module_param_named(undo, bbr_undo, bool, 0664);
+module_param_named(fast_path, bbr_fast_path, bool, 0664);
+module_param_named(fast_ack_mode, bbr_fast_ack_mode, uint, 0664);
+module_param_named(refill_add_inc, bbr_refill_add_inc, uint, 0664);
+
+static void bbr2_init(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr_init(sk); /* run shared init code for v1 and v2 */
+
+ /* BBR v2 parameters: */
+ bbr->params.beta = min_t(u32, 0xFFU, bbr_beta);
+ bbr->params.ecn_alpha_gain = min_t(u32, 0xFFU, bbr_ecn_alpha_gain);
+ bbr->params.ecn_alpha_init = min_t(u32, BBR_UNIT, bbr_ecn_alpha_init);
+ bbr->params.ecn_factor = min_t(u32, 0xFFU, bbr_ecn_factor);
+ bbr->params.ecn_thresh = min_t(u32, 0xFFU, bbr_ecn_thresh);
+ bbr->params.ecn_max_rtt_us = min_t(u32, 0x7ffffU, bbr_ecn_max_rtt_us);
+ bbr->params.ecn_reprobe_gain = min_t(u32, 0x1FF, bbr_ecn_reprobe_gain);
+ bbr->params.loss_thresh = min_t(u32, 0xFFU, bbr_loss_thresh);
+ bbr->params.full_loss_cnt = min_t(u32, 0xFU, bbr_full_loss_cnt);
+ bbr->params.full_ecn_cnt = min_t(u32, 0x3U, bbr_full_ecn_cnt);
+ bbr->params.inflight_headroom =
+ min_t(u32, 0xFFU, bbr_inflight_headroom);
+ bbr->params.bw_probe_pif_gain =
+ min_t(u32, 0x1FFU, bbr_bw_probe_pif_gain);
+ bbr->params.bw_probe_reno_gain =
+ min_t(u32, 0x1FFU, bbr_bw_probe_reno_gain);
+ bbr->params.bw_probe_max_rounds =
+ min_t(u32, 0xFFU, bbr_bw_probe_max_rounds);
+ bbr->params.bw_probe_rand_rounds =
+ min_t(u32, 0xFU, bbr_bw_probe_rand_rounds);
+ bbr->params.bw_probe_base_us =
+ min_t(u32, (1 << 26) - 1, bbr_bw_probe_base_us);
+ bbr->params.bw_probe_rand_us =
+ min_t(u32, (1 << 26) - 1, bbr_bw_probe_rand_us);
+ bbr->params.undo = bbr_undo;
+ bbr->params.fast_path = bbr_fast_path ? 1 : 0;
+ bbr->params.refill_add_inc = min_t(u32, 0x3U, bbr_refill_add_inc);
+
+ /* BBR v2 state: */
+ bbr->initialized = 1;
+ /* Start sampling ECN mark rate after first full flight is ACKed: */
+ bbr->loss_round_delivered = tp->delivered + 1;
+ bbr->loss_round_start = 0;
+ bbr->undo_bw_lo = 0;
+ bbr->undo_inflight_lo = 0;
+ bbr->undo_inflight_hi = 0;
+ bbr->loss_events_in_round = 0;
+ bbr->startup_ecn_rounds = 0;
+ bbr2_reset_congestion_signals(sk);
+ bbr->bw_lo = ~0U;
+ bbr->bw_hi[0] = 0;
+ bbr->bw_hi[1] = 0;
+ bbr->inflight_lo = ~0U;
+ bbr->inflight_hi = ~0U;
+ bbr->bw_probe_up_cnt = ~0U;
+ bbr->bw_probe_up_acks = 0;
+ bbr->bw_probe_up_rounds = 0;
+ bbr->probe_wait_us = 0;
+ bbr->stopped_risky_probe = 0;
+ bbr->ack_phase = BBR_ACKS_INIT;
+ bbr->rounds_since_probe = 0;
+ bbr->bw_probe_samples = 0;
+ bbr->prev_probe_too_high = 0;
+ bbr->ecn_eligible = 0;
+ bbr->ecn_alpha = bbr->params.ecn_alpha_init;
+ bbr->alpha_last_delivered = 0;
+ bbr->alpha_last_delivered_ce = 0;
+
+ tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode);
+}
+
+/* Core TCP stack informs us that the given skb was just marked lost. */
+static void bbr2_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ struct rate_sample rs;
+
+ /* Capture "current" data over the full round trip of loss,
+ * to have a better chance to see the full capacity of the path.
+ */
+ if (!bbr->loss_in_round) /* first loss in this round trip? */
+ bbr->loss_round_delivered = tp->delivered; /* set round trip */
+ bbr->loss_in_round = 1;
+ bbr->loss_in_cycle = 1;
+
+ if (!bbr->bw_probe_samples)
+ return; /* not an skb sent while probing for bandwidth */
+ if (unlikely(!scb->tx.delivered_mstamp))
+ return; /* skb was SACKed, reneged, marked lost; ignore it */
+ /* We are probing for bandwidth. Construct a rate sample that
+ * estimates what happened in the flight leading up to this lost skb,
+ * then see if the loss rate went too high, and if so at which packet.
+ */
+ memset(&rs, 0, sizeof(rs));
+ rs.tx_in_flight = scb->tx.in_flight;
+ rs.lost = tp->lost - scb->tx.lost;
+ rs.delivered_ce = tp->delivered_ce - scb->tx.delivered_ce;
+ rs.is_app_limited = scb->tx.is_app_limited;
+ if (bbr2_is_inflight_too_high(sk, &rs)) {
+ rs.tx_in_flight = bbr2_inflight_hi_from_lost_skb(sk, &rs, skb);
+ bbr2_handle_inflight_too_high(sk, &rs);
+ }
+}
+
+/* Revert short-term model if current loss recovery event was spurious. */
+static u32 bbr2_undo_cwnd(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr->debug.undo = 1;
+ bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
+ bbr->full_bw_cnt = 0;
+ bbr->loss_in_round = 0;
+
+ if (!bbr->params.undo)
+ return tp->snd_cwnd;
+
+ /* Revert to cwnd and other state saved before loss episode. */
+ bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo);
+ bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo);
+ bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi);
+ return bbr->prior_cwnd;
+}
+
+/* Entering loss recovery, so save state for when we undo recovery. */
+static u32 bbr2_ssthresh(struct sock *sk)
+{
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ bbr_save_cwnd(sk);
+ /* For undo, save state that adapts based on loss signal. */
+ bbr->undo_bw_lo = bbr->bw_lo;
+ bbr->undo_inflight_lo = bbr->inflight_lo;
+ bbr->undo_inflight_hi = bbr->inflight_hi;
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+static enum tcp_bbr2_phase bbr2_get_phase(struct bbr *bbr)
+{
+ switch (bbr->mode) {
+ case BBR_STARTUP:
+ return BBR2_PHASE_STARTUP;
+ case BBR_DRAIN:
+ return BBR2_PHASE_DRAIN;
+ case BBR_PROBE_BW:
+ break;
+ case BBR_PROBE_RTT:
+ return BBR2_PHASE_PROBE_RTT;
+ default:
+ return BBR2_PHASE_INVALID;
+ }
+ switch (bbr->cycle_idx) {
+ case BBR_BW_PROBE_UP:
+ return BBR2_PHASE_PROBE_BW_UP;
+ case BBR_BW_PROBE_DOWN:
+ return BBR2_PHASE_PROBE_BW_DOWN;
+ case BBR_BW_PROBE_CRUISE:
+ return BBR2_PHASE_PROBE_BW_CRUISE;
+ case BBR_BW_PROBE_REFILL:
+ return BBR2_PHASE_PROBE_BW_REFILL;
+ default:
+ return BBR2_PHASE_INVALID;
+ }
+}
+
+static size_t bbr2_get_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
+{
+ if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
+ ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk));
+ u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk));
+ u64 bw_lo = bbr->bw_lo == ~0U ?
+ ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo);
+
+ memset(&info->bbr2, 0, sizeof(info->bbr2));
+ info->bbr2.bbr_bw_lsb = (u32)bw;
+ info->bbr2.bbr_bw_msb = (u32)(bw >> 32);
+ info->bbr2.bbr_min_rtt = bbr->min_rtt_us;
+ info->bbr2.bbr_pacing_gain = bbr->pacing_gain;
+ info->bbr2.bbr_cwnd_gain = bbr->cwnd_gain;
+ info->bbr2.bbr_bw_hi_lsb = (u32)bw_hi;
+ info->bbr2.bbr_bw_hi_msb = (u32)(bw_hi >> 32);
+ info->bbr2.bbr_bw_lo_lsb = (u32)bw_lo;
+ info->bbr2.bbr_bw_lo_msb = (u32)(bw_lo >> 32);
+ info->bbr2.bbr_mode = bbr->mode;
+ info->bbr2.bbr_phase = (__u8)bbr2_get_phase(bbr);
+ info->bbr2.bbr_version = (__u8)2;
+ info->bbr2.bbr_inflight_lo = bbr->inflight_lo;
+ info->bbr2.bbr_inflight_hi = bbr->inflight_hi;
+ info->bbr2.bbr_extra_acked = bbr_extra_acked(sk);
+ *attr = INET_DIAG_BBRINFO;
+ return sizeof(info->bbr2);
+ }
+ return 0;
+}
+
+static void bbr2_set_state(struct sock *sk, u8 new_state)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+
+ if (new_state == TCP_CA_Loss) {
+ struct rate_sample rs = { .losses = 1 };
+ struct bbr_context ctx = { 0 };
+
+ bbr->prev_ca_state = TCP_CA_Loss;
+ bbr->full_bw = 0;
+ if (!bbr2_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) {
+ /* bbr_adapt_lower_bounds() needs cwnd before
+ * we suffered an RTO, to update inflight_lo:
+ */
+ WARN_ON_ONCE(bbr->prior_cwnd == 0);
+ WARN_ON_ONCE(bbr->prior_cwnd == ~0U);
+ bbr->inflight_lo = bbr->prior_cwnd;
+ }
+ bbr_debug(sk, 0, &rs, &ctx);
+ } else if (bbr->prev_ca_state == TCP_CA_Loss &&
+ new_state != TCP_CA_Loss) {
+ WARN_ON_ONCE(bbr->prior_cwnd == 0);
+ WARN_ON_ONCE(bbr->prior_cwnd == ~0U);
+ tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
+ bbr->try_fast_path = 0; /* bound cwnd using latest model */
+ }
+}
+
+static struct tcp_congestion_ops tcp_bbr2_cong_ops __read_mostly = {
+ .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS,
+ .name = "bbr2",
+ .owner = THIS_MODULE,
+ .init = bbr2_init,
+ .cong_control = bbr2_main,
+ .sndbuf_expand = bbr_sndbuf_expand,
+ .skb_marked_lost = bbr2_skb_marked_lost,
+ .undo_cwnd = bbr2_undo_cwnd,
+ .cwnd_event = bbr_cwnd_event,
+ .ssthresh = bbr2_ssthresh,
+ .tso_segs = bbr_tso_segs,
+ .get_info = bbr2_get_info,
+ .set_state = bbr2_set_state,
+};
+
+static int __init bbr_register(void)
+{
+ BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
+ return tcp_register_congestion_control(&tcp_bbr2_cong_ops);
+}
+
+static void __exit bbr_unregister(void)
+{
+ tcp_unregister_congestion_control(&tcp_bbr2_cong_ops);
+}
+
+module_init(bbr_register);
+module_exit(bbr_unregister);
+
+MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
+MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
+MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
+MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
+MODULE_AUTHOR("Priyaranjan Jha <priyarjha@google.com>");
+MODULE_AUTHOR("Yousuk Seung <ysseung@google.com>");
+MODULE_AUTHOR("Kevin Yang <yyd@google.com>");
+MODULE_AUTHOR("Arjun Roy <arjunroy@google.com>");
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
--
2.34.1
From 0a65dddf457f76a09570a284f260eda9eb6504de Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sat, 23 Jan 2016 20:51:41 -0500
Subject: [PATCH 15/27] net-test: add .config for kernel circa v5.10, with many
TCP CC modules enabled
This commit provides a kernel config file for GCE. It builds most
(all?) of the available congestion control modules and uses bbr2 as
the default.
Tested: On GCE.
Effort: net-test
Change-Id: Ibc4dfdc119c804f1ad2853b3ee2c1c503bca01a9
---
config.gce | 3807 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 3807 insertions(+)
create mode 100644 config.gce
diff --git a/config.gce b/config.gce
new file mode 100644
index 000000000000..1538acf6818b
--- /dev/null
+++ b/config.gce
@@ -0,0 +1,3807 @@
+#
+# Automatically generated file; DO NOT EDIT.
+# Linux/x86 4.8.0 Kernel Configuration
+#
+CONFIG_64BIT=y
+CONFIG_X86_64=y
+CONFIG_X86=y
+CONFIG_INSTRUCTION_DECODER=y
+CONFIG_OUTPUT_FORMAT="elf64-x86-64"
+CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_MMU=y
+CONFIG_ARCH_MMAP_RND_BITS_MIN=28
+CONFIG_ARCH_MMAP_RND_BITS_MAX=32
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16
+CONFIG_NEED_DMA_MAP_STATE=y
+CONFIG_NEED_SG_DMA_LENGTH=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_HAS_CPU_RELAX=y
+CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
+CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
+CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y
+CONFIG_ARCH_WANT_GENERAL_HUGETLB=y
+CONFIG_ZONE_DMA32=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
+CONFIG_HAVE_INTEL_TXT=y
+CONFIG_X86_64_SMP=y
+CONFIG_ARCH_SUPPORTS_UPROBES=y
+CONFIG_FIX_EARLYCON_MEM=y
+CONFIG_DEBUG_RODATA=y
+CONFIG_PGTABLE_LEVELS=4
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+CONFIG_IRQ_WORK=y
+CONFIG_BUILDTIME_EXTABLE_SORT=y
+
+#
+# General setup
+#
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_CROSS_COMPILE=""
+# CONFIG_COMPILE_TEST is not set
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_HAVE_KERNEL_XZ=y
+CONFIG_HAVE_KERNEL_LZO=y
+CONFIG_HAVE_KERNEL_LZ4=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_BZIP2 is not set
+CONFIG_KERNEL_LZMA=y
+# CONFIG_KERNEL_XZ is not set
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
+CONFIG_DEFAULT_HOSTNAME="(none)"
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
+CONFIG_CROSS_MEMORY_ATTACH=y
+CONFIG_FHANDLE=y
+CONFIG_USELIB=y
+CONFIG_AUDIT=y
+CONFIG_HAVE_ARCH_AUDITSYSCALL=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_AUDIT_WATCH=y
+CONFIG_AUDIT_TREE=y
+
+#
+# IRQ subsystem
+#
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_IRQ_SHOW=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_IRQ_DOMAIN=y
+CONFIG_IRQ_DOMAIN_HIERARCHY=y
+CONFIG_GENERIC_MSI_IRQ=y
+CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
+# CONFIG_IRQ_DOMAIN_DEBUG is not set
+CONFIG_IRQ_FORCED_THREADING=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_CLOCKSOURCE_WATCHDOG=y
+CONFIG_ARCH_CLOCKSOURCE_DATA=y
+CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
+CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+
+#
+# Timers subsystem
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ_COMMON=y
+# CONFIG_HZ_PERIODIC is not set
+CONFIG_NO_HZ_IDLE=y
+# CONFIG_NO_HZ_FULL is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+
+#
+# CPU/Task time and stats accounting
+#
+CONFIG_TICK_CPU_ACCOUNTING=y
+# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set
+CONFIG_IRQ_TIME_ACCOUNTING=y
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+
+#
+# RCU Subsystem
+#
+CONFIG_TREE_RCU=y
+# CONFIG_RCU_EXPERT is not set
+CONFIG_SRCU=y
+# CONFIG_TASKS_RCU is not set
+CONFIG_RCU_STALL_COMMON=y
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_RCU_EXPEDITE_BOOT is not set
+CONFIG_BUILD_BIN2C=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
+CONFIG_NMI_LOG_BUF_SHIFT=13
+CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
+CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
+CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y
+CONFIG_ARCH_SUPPORTS_INT128=y
+# CONFIG_NUMA_BALANCING is not set
+CONFIG_CGROUPS=y
+CONFIG_PAGE_COUNTER=y
+CONFIG_MEMCG=y
+# CONFIG_MEMCG_SWAP is not set
+CONFIG_BLK_CGROUP=y
+CONFIG_DEBUG_BLK_CGROUP=y
+CONFIG_CGROUP_WRITEBACK=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+# CONFIG_RT_GROUP_SCHED is not set
+# CONFIG_CGROUP_PIDS is not set
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_PROC_PID_CPUSET=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+# CONFIG_CGROUP_DEBUG is not set
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_NAMESPACES=y
+CONFIG_UTS_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+# CONFIG_SCHED_AUTOGROUP is not set
+# CONFIG_SYSFS_DEPRECATED is not set
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_RD_XZ=y
+CONFIG_RD_LZO=y
+CONFIG_RD_LZ4=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_HAVE_UID16=y
+CONFIG_SYSCTL_EXCEPTION_TRACE=y
+CONFIG_HAVE_PCSPKR_PLATFORM=y
+CONFIG_BPF=y
+CONFIG_EXPERT=y
+CONFIG_UID16=y
+CONFIG_MULTIUSER=y
+CONFIG_SGETMASK_SYSCALL=y
+CONFIG_SYSFS_SYSCALL=y
+# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
+CONFIG_KALLSYMS_BASE_RELATIVE=y
+CONFIG_PRINTK=y
+CONFIG_PRINTK_NMI=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_PCSPKR_PLATFORM=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_ADVISE_SYSCALLS=y
+# CONFIG_USERFAULTFD is not set
+CONFIG_PCI_QUIRKS=y
+CONFIG_MEMBARRIER=y
+# CONFIG_EMBEDDED is not set
+CONFIG_HAVE_PERF_EVENTS=y
+
+#
+# Kernel Performance Events And Counters
+#
+CONFIG_PERF_EVENTS=y
+# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_SLAB_FREELIST_RANDOM is not set
+# CONFIG_SYSTEM_DATA_VERIFICATION is not set
+CONFIG_PROFILING=y
+CONFIG_TRACEPOINTS=y
+CONFIG_KEXEC_CORE=y
+# CONFIG_OPROFILE is not set
+CONFIG_HAVE_OPROFILE=y
+CONFIG_OPROFILE_NMI_TIMER=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+# CONFIG_STATIC_KEYS_SELFTEST is not set
+CONFIG_OPTPROBES=y
+CONFIG_KPROBES_ON_FTRACE=y
+# CONFIG_UPROBES is not set
+# CONFIG_HAVE_64BIT_ALIGNED_ACCESS is not set
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_ARCH_USE_BUILTIN_BSWAP=y
+CONFIG_KRETPROBES=y
+CONFIG_USER_RETURN_NOTIFIER=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_OPTPROBES=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_NMI=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_CONTIGUOUS=y
+CONFIG_GENERIC_SMP_IDLE_THREAD=y
+CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y
+CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+CONFIG_HAVE_HW_BREAKPOINT=y
+CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y
+CONFIG_HAVE_USER_RETURN_NOTIFIER=y
+CONFIG_HAVE_PERF_EVENTS_NMI=y
+CONFIG_HAVE_PERF_REGS=y
+CONFIG_HAVE_PERF_USER_STACK_DUMP=y
+CONFIG_HAVE_ARCH_JUMP_LABEL=y
+CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
+CONFIG_HAVE_CMPXCHG_LOCAL=y
+CONFIG_HAVE_CMPXCHG_DOUBLE=y
+CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y
+CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y
+CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
+CONFIG_SECCOMP_FILTER=y
+CONFIG_HAVE_GCC_PLUGINS=y
+# CONFIG_GCC_PLUGINS is not set
+CONFIG_HAVE_CC_STACKPROTECTOR=y
+# CONFIG_CC_STACKPROTECTOR is not set
+CONFIG_CC_STACKPROTECTOR_NONE=y
+# CONFIG_CC_STACKPROTECTOR_REGULAR is not set
+# CONFIG_CC_STACKPROTECTOR_STRONG is not set
+CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y
+CONFIG_HAVE_CONTEXT_TRACKING=y
+CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
+CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y
+CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
+CONFIG_HAVE_ARCH_HUGE_VMAP=y
+CONFIG_HAVE_ARCH_SOFT_DIRTY=y
+CONFIG_MODULES_USE_ELF_RELA=y
+CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y
+CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
+CONFIG_HAVE_ARCH_MMAP_RND_BITS=y
+CONFIG_HAVE_EXIT_THREAD=y
+CONFIG_ARCH_MMAP_RND_BITS=28
+CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y
+CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8
+CONFIG_HAVE_COPY_THREAD_TLS=y
+CONFIG_HAVE_STACK_VALIDATION=y
+# CONFIG_HAVE_ARCH_HASH is not set
+# CONFIG_ISA_BUS_API is not set
+CONFIG_OLD_SIGSUSPEND3=y
+CONFIG_COMPAT_OLD_SIGACTION=y
+# CONFIG_CPU_NO_EFFICIENT_FFS is not set
+
+#
+# GCOV-based kernel profiling
+#
+# CONFIG_GCOV_KERNEL is not set
+CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_MODULE_SIG is not set
+# CONFIG_MODULE_COMPRESS is not set
+# CONFIG_TRIM_UNUSED_KSYMS is not set
+CONFIG_MODULES_TREE_LOOKUP=y
+CONFIG_BLOCK=y
+CONFIG_BLK_DEV_BSG=y
+CONFIG_BLK_DEV_BSGLIB=y
+# CONFIG_BLK_DEV_INTEGRITY is not set
+# CONFIG_BLK_DEV_THROTTLING is not set
+# CONFIG_BLK_CMDLINE_PARSER is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_AIX_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+CONFIG_EFI_PARTITION=y
+# CONFIG_SYSV68_PARTITION is not set
+# CONFIG_CMDLINE_PARTITION is not set
+CONFIG_BLOCK_COMPAT=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CFQ_GROUP_IOSCHED is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+CONFIG_PREEMPT_NOTIFIERS=y
+CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
+CONFIG_INLINE_READ_UNLOCK=y
+CONFIG_INLINE_READ_UNLOCK_IRQ=y
+CONFIG_INLINE_WRITE_UNLOCK=y
+CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
+CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
+CONFIG_MUTEX_SPIN_ON_OWNER=y
+CONFIG_RWSEM_SPIN_ON_OWNER=y
+CONFIG_LOCK_SPIN_ON_OWNER=y
+CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y
+CONFIG_QUEUED_SPINLOCKS=y
+CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
+CONFIG_QUEUED_RWLOCKS=y
+CONFIG_FREEZER=y
+
+#
+# Processor type and features
+#
+# CONFIG_ZONE_DMA is not set
+CONFIG_SMP=y
+CONFIG_X86_FEATURE_NAMES=y
+CONFIG_X86_FAST_FEATURE_TESTS=y
+CONFIG_X86_X2APIC=y
+CONFIG_X86_MPPARSE=y
+# CONFIG_GOLDFISH is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
+# CONFIG_X86_NUMACHIP is not set
+# CONFIG_X86_VSMP is not set
+# CONFIG_X86_UV is not set
+# CONFIG_X86_GOLDFISH is not set
+# CONFIG_X86_INTEL_MID is not set
+# CONFIG_X86_INTEL_LPSS is not set
+# CONFIG_X86_AMD_PLATFORM_DEVICE is not set
+# CONFIG_IOSF_MBI is not set
+CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+# CONFIG_PARAVIRT_DEBUG is not set
+# CONFIG_PARAVIRT_SPINLOCKS is not set
+# CONFIG_XEN is not set
+CONFIG_KVM_GUEST=y
+# CONFIG_KVM_DEBUG_FS is not set
+# CONFIG_PARAVIRT_TIME_ACCOUNTING is not set
+CONFIG_PARAVIRT_CLOCK=y
+CONFIG_NO_BOOTMEM=y
+# CONFIG_MK8 is not set
+# CONFIG_MPSC is not set
+# CONFIG_MCORE2 is not set
+# CONFIG_MATOM is not set
+CONFIG_GENERIC_CPU=y
+CONFIG_X86_INTERNODE_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_TSC=y
+CONFIG_X86_CMPXCHG64=y
+CONFIG_X86_CMOV=y
+CONFIG_X86_MINIMUM_CPU_FAMILY=64
+CONFIG_X86_DEBUGCTLMSR=y
+# CONFIG_PROCESSOR_SELECT is not set
+CONFIG_CPU_SUP_INTEL=y
+CONFIG_CPU_SUP_AMD=y
+CONFIG_CPU_SUP_CENTAUR=y
+CONFIG_HPET_TIMER=y
+CONFIG_DMI=y
+# CONFIG_GART_IOMMU is not set
+# CONFIG_CALGARY_IOMMU is not set
+CONFIG_SWIOTLB=y
+CONFIG_IOMMU_HELPER=y
+# CONFIG_MAXSMP is not set
+CONFIG_NR_CPUS=96
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
+CONFIG_X86_MCE=y
+CONFIG_X86_MCE_INTEL=y
+CONFIG_X86_MCE_AMD=y
+CONFIG_X86_MCE_THRESHOLD=y
+CONFIG_X86_MCE_INJECT=m
+CONFIG_X86_THERMAL_VECTOR=y
+
+#
+# Performance monitoring
+#
+CONFIG_PERF_EVENTS_INTEL_UNCORE=y
+CONFIG_PERF_EVENTS_INTEL_RAPL=y
+CONFIG_PERF_EVENTS_INTEL_CSTATE=y
+# CONFIG_PERF_EVENTS_AMD_POWER is not set
+# CONFIG_VM86 is not set
+CONFIG_X86_16BIT=y
+CONFIG_X86_ESPFIX64=y
+CONFIG_X86_VSYSCALL_EMULATION=y
+# CONFIG_I8K is not set
+CONFIG_MICROCODE=y
+CONFIG_MICROCODE_INTEL=y
+CONFIG_MICROCODE_AMD=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
+CONFIG_X86_MSR=m
+CONFIG_X86_CPUID=m
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_ARCH_DMA_ADDR_T_64BIT=y
+CONFIG_X86_DIRECT_GBPAGES=y
+CONFIG_NUMA=y
+CONFIG_AMD_NUMA=y
+CONFIG_X86_64_ACPI_NUMA=y
+CONFIG_NODES_SPAN_OTHER_NODES=y
+# CONFIG_NUMA_EMU is not set
+CONFIG_NODES_SHIFT=2
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_PROC_KCORE_TEXT=y
+CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM=y
+CONFIG_NEED_MULTIPLE_NODES=y
+CONFIG_HAVE_MEMORY_PRESENT=y
+CONFIG_SPARSEMEM_EXTREME=y
+CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
+CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_HAVE_MEMBLOCK=y
+CONFIG_HAVE_MEMBLOCK_NODE_MAP=y
+CONFIG_ARCH_DISCARD_MEMBLOCK=y
+# CONFIG_MOVABLE_NODE is not set
+# CONFIG_HAVE_BOOTMEM_INFO_NODE is not set
+# CONFIG_MEMORY_HOTPLUG is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
+CONFIG_COMPACTION=y
+CONFIG_MIGRATION=y
+CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_MMU_NOTIFIER=y
+# CONFIG_KSM is not set
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y
+# CONFIG_MEMORY_FAILURE is not set
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
+# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set
+CONFIG_TRANSPARENT_HUGE_PAGECACHE=y
+# CONFIG_CLEANCACHE is not set
+CONFIG_FRONTSWAP=y
+# CONFIG_CMA is not set
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_ZSWAP=y
+CONFIG_ZPOOL=y
+# CONFIG_ZBUD is not set
+# CONFIG_Z3FOLD is not set
+# CONFIG_ZSMALLOC is not set
+CONFIG_GENERIC_EARLY_IOREMAP=y
+CONFIG_ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT=y
+# CONFIG_IDLE_PAGE_TRACKING is not set
+CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y
+CONFIG_ARCH_HAS_PKEYS=y
+# CONFIG_X86_PMEM_LEGACY is not set
+# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set
+CONFIG_X86_RESERVE_LOW=64
+CONFIG_MTRR=y
+# CONFIG_MTRR_SANITIZER is not set
+CONFIG_X86_PAT=y
+CONFIG_ARCH_USES_PG_UNCACHED=y
+CONFIG_ARCH_RANDOM=y
+CONFIG_X86_SMAP=y
+# CONFIG_X86_INTEL_MPX is not set
+CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_SECCOMP=y
+# CONFIG_HZ_100 is not set
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+CONFIG_HZ_1000=y
+CONFIG_HZ=1000
+CONFIG_SCHED_HRTICK=y
+CONFIG_KEXEC=y
+# CONFIG_KEXEC_FILE is not set
+CONFIG_CRASH_DUMP=y
+CONFIG_PHYSICAL_START=0x1000000
+CONFIG_RELOCATABLE=y
+# CONFIG_RANDOMIZE_BASE is not set
+CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_HOTPLUG_CPU=y
+# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set
+# CONFIG_DEBUG_HOTPLUG_CPU0 is not set
+# CONFIG_COMPAT_VDSO is not set
+# CONFIG_LEGACY_VSYSCALL_NATIVE is not set
+CONFIG_LEGACY_VSYSCALL_EMULATE=y
+# CONFIG_LEGACY_VSYSCALL_NONE is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="oops=panic panic=10 io_delay=0xed libata.allow_tpm=1 nmi_watchdog=panic tco_start=1 quiet svm.nested=0 acpi_enforce_resources=lax"
+# CONFIG_CMDLINE_OVERRIDE is not set
+CONFIG_MODIFY_LDT_SYSCALL=y
+CONFIG_HAVE_LIVEPATCH=y
+# CONFIG_LIVEPATCH is not set
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_USE_PERCPU_NUMA_NODE_ID=y
+
+#
+# Power management and ACPI options
+#
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+# CONFIG_SUSPEND_SKIP_SYNC is not set
+# CONFIG_HIBERNATION is not set
+CONFIG_PM_SLEEP=y
+CONFIG_PM_SLEEP_SMP=y
+# CONFIG_PM_AUTOSLEEP is not set
+# CONFIG_PM_WAKELOCKS is not set
+CONFIG_PM=y
+CONFIG_PM_DEBUG=y
+# CONFIG_PM_ADVANCED_DEBUG is not set
+CONFIG_PM_SLEEP_DEBUG=y
+CONFIG_PM_TRACE=y
+CONFIG_PM_TRACE_RTC=y
+# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set
+CONFIG_ACPI=y
+CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y
+CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y
+CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y
+# CONFIG_ACPI_DEBUGGER is not set
+CONFIG_ACPI_SLEEP=y
+# CONFIG_ACPI_PROCFS_POWER is not set
+CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y
+# CONFIG_ACPI_EC_DEBUGFS is not set
+CONFIG_ACPI_AC=y
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_FAN=y
+# CONFIG_ACPI_DOCK is not set
+CONFIG_ACPI_CPU_FREQ_PSS=y
+CONFIG_ACPI_PROCESSOR_CSTATE=y
+CONFIG_ACPI_PROCESSOR_IDLE=y
+CONFIG_ACPI_PROCESSOR=m
+# CONFIG_ACPI_IPMI is not set
+CONFIG_ACPI_HOTPLUG_CPU=y
+# CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set
+CONFIG_ACPI_THERMAL=m
+CONFIG_ACPI_NUMA=y
+# CONFIG_ACPI_CUSTOM_DSDT is not set
+CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y
+CONFIG_ACPI_TABLE_UPGRADE=y
+# CONFIG_ACPI_DEBUG is not set
+# CONFIG_ACPI_PCI_SLOT is not set
+CONFIG_X86_PM_TIMER=y
+CONFIG_ACPI_CONTAINER=y
+CONFIG_ACPI_HOTPLUG_IOAPIC=y
+# CONFIG_ACPI_SBS is not set
+# CONFIG_ACPI_HED is not set
+# CONFIG_ACPI_CUSTOM_METHOD is not set
+# CONFIG_ACPI_BGRT is not set
+# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set
+# CONFIG_ACPI_NFIT is not set
+CONFIG_HAVE_ACPI_APEI=y
+CONFIG_HAVE_ACPI_APEI_NMI=y
+# CONFIG_ACPI_APEI is not set
+# CONFIG_DPTF_POWER is not set
+# CONFIG_ACPI_EXTLOG is not set
+# CONFIG_PMIC_OPREGION is not set
+# CONFIG_ACPI_CONFIGFS is not set
+# CONFIG_SFI is not set
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_ATTR_SET=y
+CONFIG_CPU_FREQ_GOV_COMMON=y
+# CONFIG_CPU_FREQ_STAT is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_USERSPACE=m
+CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+# CONFIG_CPU_FREQ_GOV_SCHEDUTIL is not set
+
+#
+# CPU frequency scaling drivers
+#
+# CONFIG_X86_INTEL_PSTATE is not set
+# CONFIG_X86_PCC_CPUFREQ is not set
+CONFIG_X86_ACPI_CPUFREQ=m
+CONFIG_X86_ACPI_CPUFREQ_CPB=y
+CONFIG_X86_POWERNOW_K8=m
+# CONFIG_X86_AMD_FREQ_SENSITIVITY is not set
+CONFIG_X86_SPEEDSTEP_CENTRINO=m
+# CONFIG_X86_P4_CLOCKMOD is not set
+
+#
+# shared options
+#
+# CONFIG_X86_SPEEDSTEP_LIB is not set
+
+#
+# CPU Idle
+#
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+# CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set
+CONFIG_INTEL_IDLE=y
+
+#
+# Memory power savings
+#
+# CONFIG_I7300_IDLE is not set
+
+#
+# Bus options (PCI etc.)
+#
+CONFIG_PCI=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCI_CNB20LE_QUIRK is not set
+# CONFIG_PCIEPORTBUS is not set
+CONFIG_PCI_BUS_ADDR_T_64BIT=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI_MSI_IRQ_DOMAIN=y
+# CONFIG_PCI_DEBUG is not set
+# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set
+# CONFIG_PCI_STUB is not set
+CONFIG_HT_IRQ=y
+CONFIG_PCI_ATS=y
+# CONFIG_PCI_IOV is not set
+CONFIG_PCI_PRI=y
+CONFIG_PCI_PASID=y
+CONFIG_PCI_LABEL=y
+CONFIG_HOTPLUG_PCI=y
+# CONFIG_HOTPLUG_PCI_ACPI is not set
+# CONFIG_HOTPLUG_PCI_CPCI is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# PCI host controller drivers
+#
+# CONFIG_PCIE_DW_PLAT is not set
+# CONFIG_ISA_BUS is not set
+# CONFIG_ISA_DMA_API is not set
+CONFIG_AMD_NB=y
+# CONFIG_PCCARD is not set
+# CONFIG_RAPIDIO is not set
+# CONFIG_X86_SYSFB is not set
+
+#
+# Executable file formats / Emulations
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_COMPAT_BINFMT_ELF=y
+CONFIG_ELFCORE=y
+CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
+CONFIG_BINFMT_SCRIPT=y
+# CONFIG_HAVE_AOUT is not set
+CONFIG_BINFMT_MISC=y
+CONFIG_COREDUMP=y
+CONFIG_IA32_EMULATION=y
+# CONFIG_IA32_AOUT is not set
+# CONFIG_X86_X32 is not set
+CONFIG_COMPAT=y
+CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
+CONFIG_SYSVIPC_COMPAT=y
+CONFIG_KEYS_COMPAT=y
+CONFIG_X86_DEV_DMA_OPS=y
+CONFIG_PMC_ATOM=y
+CONFIG_VMD=y
+CONFIG_NET=y
+CONFIG_NET_INGRESS=y
+CONFIG_NET_EGRESS=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+# CONFIG_IP_FIB_TRIE_STATS is not set
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_ROUTE_CLASSID=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IP_TUNNEL=y
+CONFIG_NET_IPGRE=y
+# CONFIG_NET_IPGRE_BROADCAST is not set
+# CONFIG_IP_MROUTE is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_NET_UDP_TUNNEL is not set
+# CONFIG_NET_FOU is not set
+# CONFIG_NET_FOU_IP_TUNNELS is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+CONFIG_INET_TUNNEL=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=y
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_CONG_WESTWOOD=y
+CONFIG_TCP_CONG_HTCP=y
+CONFIG_TCP_CONG_HSTCP=y
+CONFIG_TCP_CONG_HYBLA=y
+CONFIG_TCP_CONG_VEGAS=y
+CONFIG_TCP_CONG_NV=y
+CONFIG_TCP_CONG_SCALABLE=y
+CONFIG_TCP_CONG_LP=y
+CONFIG_TCP_CONG_VENO=y
+CONFIG_TCP_CONG_YEAH=y
+CONFIG_TCP_CONG_ILLINOIS=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TCP_CONG_CDG=y
+CONFIG_TCP_CONG_BBR=y
+CONFIG_TCP_CONG_BBR2=y
+# CONFIG_DEFAULT_CUBIC is not set
+# CONFIG_DEFAULT_DCTCP is not set
+CONFIG_DEFAULT_BBR2=y
+# CONFIG_DEFAULT_RENO is not set
+CONFIG_DEFAULT_TCP_CONG="bbr2"
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_IPV6_ILA is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+CONFIG_INET6_TUNNEL=y
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_BEET is not set
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_SIT_6RD=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+CONFIG_IPV6_TUNNEL=y
+CONFIG_IPV6_GRE=y
+# CONFIG_IPV6_FOU is not set
+# CONFIG_IPV6_FOU_TUNNEL is not set
+CONFIG_IPV6_MULTIPLE_TABLES=y
+# CONFIG_IPV6_SUBTREES is not set
+# CONFIG_IPV6_MROUTE is not set
+# CONFIG_NETLABEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+CONFIG_NET_PTP_CLASSIFY=y
+# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_NETFILTER_ADVANCED=y
+# CONFIG_BRIDGE_NETFILTER is not set
+
+#
+# Core Netfilter Configuration
+#
+CONFIG_NETFILTER_INGRESS=y
+CONFIG_NETFILTER_NETLINK=m
+# CONFIG_NETFILTER_NETLINK_ACCT is not set
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_LOG_COMMON=m
+CONFIG_NF_CONNTRACK_MARK=y
+# CONFIG_NF_CONNTRACK_ZONES is not set
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+# CONFIG_NF_CONNTRACK_TIMEOUT is not set
+# CONFIG_NF_CONNTRACK_TIMESTAMP is not set
+# CONFIG_NF_CT_PROTO_DCCP is not set
+# CONFIG_NF_CT_PROTO_SCTP is not set
+# CONFIG_NF_CT_PROTO_UDPLITE is not set
+# CONFIG_NF_CONNTRACK_AMANDA is not set
+CONFIG_NF_CONNTRACK_FTP=m
+# CONFIG_NF_CONNTRACK_H323 is not set
+# CONFIG_NF_CONNTRACK_IRC is not set
+# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set
+# CONFIG_NF_CONNTRACK_SNMP is not set
+# CONFIG_NF_CONNTRACK_PPTP is not set
+# CONFIG_NF_CONNTRACK_SANE is not set
+# CONFIG_NF_CONNTRACK_SIP is not set
+# CONFIG_NF_CONNTRACK_TFTP is not set
+CONFIG_NF_CT_NETLINK=m
+# CONFIG_NF_CT_NETLINK_TIMEOUT is not set
+# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_NEEDED=y
+# CONFIG_NF_NAT_AMANDA is not set
+CONFIG_NF_NAT_FTP=m
+# CONFIG_NF_NAT_IRC is not set
+# CONFIG_NF_NAT_SIP is not set
+# CONFIG_NF_NAT_TFTP is not set
+# CONFIG_NF_NAT_REDIRECT is not set
+# CONFIG_NF_TABLES is not set
+CONFIG_NETFILTER_XTABLES=y
+
+#
+# Xtables combined modules
+#
+CONFIG_NETFILTER_XT_MARK=m
+CONFIG_NETFILTER_XT_CONNMARK=m
+CONFIG_NETFILTER_XT_SET=m
+
+#
+# Xtables targets
+#
+# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set
+# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=y
+CONFIG_NETFILTER_XT_TARGET_HL=m
+# CONFIG_NETFILTER_XT_TARGET_HMARK is not set
+# CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_NAT=m
+# CONFIG_NETFILTER_XT_TARGET_NETMAP is not set
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_RATEEST=m
+# CONFIG_NETFILTER_XT_TARGET_REDIRECT is not set
+# CONFIG_NETFILTER_XT_TARGET_TEE is not set
+# CONFIG_NETFILTER_XT_TARGET_TPROXY is not set
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+
+#
+# Xtables matches
+#
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
+# CONFIG_NETFILTER_XT_MATCH_BPF is not set
+# CONFIG_NETFILTER_XT_MATCH_CGROUP is not set
+# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+# CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+# CONFIG_NETFILTER_XT_MATCH_CPU is not set
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set
+CONFIG_NETFILTER_XT_MATCH_DSCP=y
+CONFIG_NETFILTER_XT_MATCH_ECN=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_HL=m
+# CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+# CONFIG_NETFILTER_XT_MATCH_L2TP is not set
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y
+# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set
+# CONFIG_NETFILTER_XT_MATCH_OSF is not set
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+# CONFIG_NETFILTER_XT_MATCH_RECENT is not set
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_MAX=1024
+# CONFIG_IP_SET_BITMAP_IP is not set
+# CONFIG_IP_SET_BITMAP_IPMAC is not set
+# CONFIG_IP_SET_BITMAP_PORT is not set
+CONFIG_IP_SET_HASH_IP=m
+# CONFIG_IP_SET_HASH_IPMARK is not set
+# CONFIG_IP_SET_HASH_IPPORT is not set
+# CONFIG_IP_SET_HASH_IPPORTIP is not set
+# CONFIG_IP_SET_HASH_IPPORTNET is not set
+# CONFIG_IP_SET_HASH_MAC is not set
+# CONFIG_IP_SET_HASH_NETPORTNET is not set
+CONFIG_IP_SET_HASH_NET=m
+# CONFIG_IP_SET_HASH_NETNET is not set
+# CONFIG_IP_SET_HASH_NETPORT is not set
+# CONFIG_IP_SET_HASH_NETIFACE is not set
+# CONFIG_IP_SET_LIST_SET is not set
+# CONFIG_IP_VS is not set
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_NF_DEFRAG_IPV4=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_DUP_IPV4 is not set
+# CONFIG_NF_LOG_ARP is not set
+CONFIG_NF_LOG_IPV4=m
+CONFIG_NF_REJECT_IPV4=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_NF_NAT_MASQUERADE_IPV4=m
+# CONFIG_NF_NAT_PPTP is not set
+# CONFIG_NF_NAT_H323 is not set
+CONFIG_IP_NF_IPTABLES=y
+# CONFIG_IP_NF_MATCH_AH is not set
+CONFIG_IP_NF_MATCH_ECN=m
+# CONFIG_IP_NF_MATCH_RPFILTER is not set
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+# CONFIG_IP_NF_TARGET_SYNPROXY is not set
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+# CONFIG_IP_NF_TARGET_NETMAP is not set
+# CONFIG_IP_NF_TARGET_REDIRECT is not set
+CONFIG_IP_NF_MANGLE=y
+# CONFIG_IP_NF_TARGET_CLUSTERIP is not set
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+# CONFIG_IP_NF_SECURITY is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+
+#
+# IPv6: Netfilter Configuration
+#
+CONFIG_NF_DEFRAG_IPV6=m
+CONFIG_NF_CONNTRACK_IPV6=m
+# CONFIG_NF_DUP_IPV6 is not set
+CONFIG_NF_REJECT_IPV6=m
+CONFIG_NF_LOG_IPV6=m
+# CONFIG_NF_NAT_IPV6 is not set
+CONFIG_IP6_NF_IPTABLES=y
+# CONFIG_IP6_NF_MATCH_AH is not set
+# CONFIG_IP6_NF_MATCH_EUI64 is not set
+CONFIG_IP6_NF_MATCH_FRAG=m
+# CONFIG_IP6_NF_MATCH_OPTS is not set
+CONFIG_IP6_NF_MATCH_HL=m
+# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set
+# CONFIG_IP6_NF_MATCH_MH is not set
+# CONFIG_IP6_NF_MATCH_RPFILTER is not set
+# CONFIG_IP6_NF_MATCH_RT is not set
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+# CONFIG_IP6_NF_TARGET_SYNPROXY is not set
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=m
+# CONFIG_IP6_NF_SECURITY is not set
+# CONFIG_IP6_NF_NAT is not set
+# CONFIG_BRIDGE_NF_EBTABLES is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_L2TP is not set
+CONFIG_STP=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_IGMP_SNOOPING=y
+# CONFIG_BRIDGE_VLAN_FILTERING is not set
+CONFIG_HAVE_NET_DSA=y
+CONFIG_VLAN_8021Q=m
+# CONFIG_VLAN_8021Q_GVRP is not set
+# CONFIG_VLAN_8021Q_MVRP is not set
+# CONFIG_DECNET is not set
+CONFIG_LLC=m
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_PHONET is not set
+# CONFIG_6LOWPAN is not set
+# CONFIG_IEEE802154 is not set
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=y
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+# CONFIG_NET_SCH_SFB is not set
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+# CONFIG_NET_SCH_CHOKE is not set
+# CONFIG_NET_SCH_QFQ is not set
+CONFIG_NET_SCH_CODEL=y
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_SCH_FQ=y
+# CONFIG_NET_SCH_HHF is not set
+# CONFIG_NET_SCH_PIE is not set
+CONFIG_NET_SCH_INGRESS=m
+# CONFIG_NET_SCH_PLUG is not set
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=y
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=y
+# CONFIG_CLS_U32_PERF is not set
+# CONFIG_CLS_U32_MARK is not set
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+# CONFIG_NET_CLS_CGROUP is not set
+# CONFIG_NET_CLS_BPF is not set
+# CONFIG_NET_CLS_FLOWER is not set
+# CONFIG_NET_CLS_MATCHALL is not set
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+# CONFIG_NET_EMATCH_IPSET is not set
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+# CONFIG_GACT_PROB is not set
+CONFIG_NET_ACT_MIRRED=y
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+# CONFIG_NET_ACT_CSUM is not set
+# CONFIG_NET_ACT_VLAN is not set
+# CONFIG_NET_ACT_BPF is not set
+# CONFIG_NET_ACT_CONNMARK is not set
+# CONFIG_NET_ACT_SKBMOD is not set
+CONFIG_NET_ACT_IFE=m
+# CONFIG_NET_ACT_TUNNEL_KEY is not set
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+# CONFIG_NET_IFE_SKBTCINDEX is not set
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_SCH_FIFO=y
+CONFIG_DCB=y
+CONFIG_DNS_RESOLVER=m
+# CONFIG_BATMAN_ADV is not set
+CONFIG_OPENVSWITCH=m
+CONFIG_OPENVSWITCH_GRE=m
+# CONFIG_VSOCKETS is not set
+CONFIG_NETLINK_DIAG=y
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+# CONFIG_MPLS_ROUTING is not set
+# CONFIG_HSR is not set
+# CONFIG_NET_SWITCHDEV is not set
+# CONFIG_NET_L3_MASTER_DEV is not set
+# CONFIG_NET_NCSI is not set
+CONFIG_RPS=y
+CONFIG_RFS_ACCEL=y
+CONFIG_XPS=y
+# CONFIG_SOCK_CGROUP_DATA is not set
+# CONFIG_CGROUP_NET_PRIO is not set
+# CONFIG_CGROUP_NET_CLASSID is not set
+CONFIG_NET_RX_BUSY_POLL=y
+CONFIG_BQL=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_FLOW_LIMIT=y
+
+#
+# Network testing
+#
+CONFIG_NET_PKTGEN=m
+# CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_AF_KCM is not set
+# CONFIG_STREAM_PARSER is not set
+CONFIG_FIB_RULES=y
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+# CONFIG_CAIF is not set
+# CONFIG_CEPH_LIB is not set
+# CONFIG_NFC is not set
+# CONFIG_LWTUNNEL is not set
+CONFIG_DST_CACHE=y
+CONFIG_NET_DEVLINK=y
+CONFIG_MAY_USE_DEVLINK=y
+CONFIG_HAVE_EBPF_JIT=y
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+# CONFIG_DEVTMPFS_MOUNT is not set
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+CONFIG_FW_LOADER_USER_HELPER=y
+# CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set
+CONFIG_ALLOW_DEV_COREDUMP=y
+# CONFIG_DEBUG_DRIVER is not set
+CONFIG_DEBUG_DEVRES=y
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_GENERIC_CPU_DEVICES is not set
+CONFIG_GENERIC_CPU_AUTOPROBE=y
+# CONFIG_DMA_SHARED_BUFFER is not set
+
+#
+# Bus devices
+#
+CONFIG_CONNECTOR=y
+CONFIG_PROC_EVENTS=y
+# CONFIG_MTD is not set
+# CONFIG_OF is not set
+CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y
+# CONFIG_PARPORT is not set
+CONFIG_PNP=y
+CONFIG_PNP_DEBUG_MESSAGES=y
+
+#
+# Protocols
+#
+CONFIG_PNPACPI=y
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_NULL_BLK is not set
+# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_LOOP_MIN_COUNT=8
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_DRBD is not set
+CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_SKD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_VIRTIO_BLK is not set
+# CONFIG_BLK_DEV_HD is not set
+# CONFIG_BLK_DEV_RBD is not set
+# CONFIG_BLK_DEV_RSXX is not set
+# CONFIG_BLK_DEV_NVME is not set
+# CONFIG_NVME_RDMA is not set
+# CONFIG_NVME_TARGET is not set
+
+#
+# Misc devices
+#
+# CONFIG_SENSORS_LIS3LV02D is not set
+# CONFIG_AD525X_DPOT is not set
+# CONFIG_DUMMY_IRQ is not set
+# CONFIG_IBM_ASM is not set
+# CONFIG_PHANTOM is not set
+# CONFIG_SGI_IOC4 is not set
+# CONFIG_TIFM_CORE is not set
+# CONFIG_ICS932S401 is not set
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_HP_ILO is not set
+# CONFIG_APDS9802ALS is not set
+# CONFIG_ISL29003 is not set
+# CONFIG_ISL29020 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_SENSORS_BH1770 is not set
+# CONFIG_SENSORS_APDS990X is not set
+# CONFIG_HMC6352 is not set
+# CONFIG_DS1682 is not set
+# CONFIG_BMP085_I2C is not set
+# CONFIG_USB_SWITCH_FSA9480 is not set
+# CONFIG_SRAM is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+CONFIG_EEPROM_LEGACY=m
+# CONFIG_EEPROM_MAX6875 is not set
+# CONFIG_EEPROM_93CX6 is not set
+# CONFIG_CB710_CORE is not set
+
+#
+# Texas Instruments shared transport line discipline
+#
+# CONFIG_TI_ST is not set
+# CONFIG_SENSORS_LIS3_I2C is not set
+
+#
+# Altera FPGA firmware download module
+#
+# CONFIG_ALTERA_STAPL is not set
+# CONFIG_INTEL_MEI is not set
+# CONFIG_INTEL_MEI_ME is not set
+# CONFIG_INTEL_MEI_TXE is not set
+# CONFIG_VMWARE_VMCI is not set
+
+#
+# Intel MIC Bus Driver
+#
+# CONFIG_INTEL_MIC_BUS is not set
+
+#
+# SCIF Bus Driver
+#
+# CONFIG_SCIF_BUS is not set
+
+#
+# VOP Bus Driver
+#
+# CONFIG_VOP_BUS is not set
+
+#
+# Intel MIC Host Driver
+#
+
+#
+# Intel MIC Card Driver
+#
+
+#
+# SCIF Driver
+#
+
+#
+# Intel MIC Coprocessor State Management (COSM) Drivers
+#
+
+#
+# VOP Driver
+#
+# CONFIG_GENWQE is not set
+# CONFIG_ECHO is not set
+# CONFIG_CXL_BASE is not set
+# CONFIG_CXL_AFU_DRIVER_OPS is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI_MOD=y
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_SCSI_MQ_DEFAULT is not set
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+# CONFIG_CHR_DEV_SCH is not set
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+# CONFIG_SCSI_SCAN_ASYNC is not set
+
+#
+# SCSI Transports
+#
+CONFIG_SCSI_SPI_ATTRS=m
+# CONFIG_SCSI_FC_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=y
+# CONFIG_SCSI_SAS_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
+# CONFIG_SCSI_SRP_ATTRS is not set
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_ISCSI_TCP=y
+# CONFIG_ISCSI_BOOT_SYSFS is not set
+# CONFIG_SCSI_CXGB3_ISCSI is not set
+# CONFIG_SCSI_CXGB4_ISCSI is not set
+# CONFIG_SCSI_BNX2_ISCSI is not set
+# CONFIG_BE2ISCSI is not set
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_HPSA is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_3W_SAS is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_AIC94XX is not set
+# CONFIG_SCSI_MVSAS is not set
+# CONFIG_SCSI_MVUMI is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_ARCMSR is not set
+# CONFIG_SCSI_ESAS2R is not set
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=y
+CONFIG_MEGARAID_MAILBOX=y
+# CONFIG_MEGARAID_LEGACY is not set
+CONFIG_MEGARAID_SAS=y
+# CONFIG_SCSI_MPT3SAS is not set
+# CONFIG_SCSI_MPT2SAS is not set
+# CONFIG_SCSI_UFSHCD is not set
+# CONFIG_SCSI_HPTIOP is not set
+# CONFIG_VMWARE_PVSCSI is not set
+# CONFIG_SCSI_SNIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_ISCI is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_STEX is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_QLA_ISCSI is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_AM53C974 is not set
+# CONFIG_SCSI_WD719X is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_SCSI_PMCRAID is not set
+# CONFIG_SCSI_PM8001 is not set
+CONFIG_SCSI_VIRTIO=y
+# CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
+CONFIG_ATA=y
+# CONFIG_ATA_NONSTANDARD is not set
+CONFIG_ATA_VERBOSE_ERROR=y
+CONFIG_ATA_ACPI=y
+# CONFIG_SATA_ZPODD is not set
+CONFIG_SATA_PMP=y
+
+#
+# Controllers with non-SFF native interface
+#
+CONFIG_SATA_AHCI=y
+# CONFIG_SATA_AHCI_PLATFORM is not set
+# CONFIG_SATA_INIC162X is not set
+# CONFIG_SATA_ACARD_AHCI is not set
+CONFIG_SATA_SIL24=m
+CONFIG_ATA_SFF=y
+
+#
+# SFF controllers with custom DMA interface
+#
+# CONFIG_PDC_ADMA is not set
+# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_SX4 is not set
+CONFIG_ATA_BMDMA=y
+
+#
+# SATA SFF controllers with BMDMA
+#
+CONFIG_ATA_PIIX=y
+# CONFIG_SATA_DWC is not set
+CONFIG_SATA_MV=m
+CONFIG_SATA_NV=y
+# CONFIG_SATA_PROMISE is not set
+CONFIG_SATA_SIL=m
+# CONFIG_SATA_SIS is not set
+# CONFIG_SATA_SVW is not set
+# CONFIG_SATA_ULI is not set
+# CONFIG_SATA_VIA is not set
+# CONFIG_SATA_VITESSE is not set
+
+#
+# PATA SFF controllers with BMDMA
+#
+# CONFIG_PATA_ALI is not set
+CONFIG_PATA_AMD=y
+# CONFIG_PATA_ARTOP is not set
+# CONFIG_PATA_ATIIXP is not set
+CONFIG_PATA_ATP867X=m
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT8213 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_MARVELL is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NINJA32 is not set
+# CONFIG_PATA_NS87415 is not set
+CONFIG_PATA_OLDPIIX=y
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_RADISYS is not set
+# CONFIG_PATA_RDC is not set
+CONFIG_PATA_SCH=y
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_SIL680 is not set
+# CONFIG_PATA_SIS is not set
+# CONFIG_PATA_TOSHIBA is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
+
+#
+# PIO-only SFF controllers
+#
+# CONFIG_PATA_CMD640_PCI is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_OPTI is not set
+# CONFIG_PATA_PLATFORM is not set
+# CONFIG_PATA_RZ1000 is not set
+
+#
+# Generic fallback / legacy drivers
+#
+# CONFIG_PATA_ACPI is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_LEGACY is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_AUTODETECT=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=y
+CONFIG_MD_RAID10=m
+# CONFIG_MD_RAID456 is not set
+# CONFIG_MD_MULTIPATH is not set
+# CONFIG_MD_FAULTY is not set
+# CONFIG_BCACHE is not set
+CONFIG_BLK_DEV_DM_BUILTIN=y
+CONFIG_BLK_DEV_DM=y
+# CONFIG_DM_MQ_DEFAULT is not set
+# CONFIG_DM_DEBUG is not set
+CONFIG_DM_BUFIO=y
+# CONFIG_DM_DEBUG_BLOCK_STACK_TRACING is not set
+CONFIG_DM_BIO_PRISON=y
+CONFIG_DM_PERSISTENT_DATA=y
+CONFIG_DM_CRYPT=y
+# CONFIG_DM_SNAPSHOT is not set
+CONFIG_DM_THIN_PROVISIONING=y
+# CONFIG_DM_CACHE is not set
+# CONFIG_DM_ERA is not set
+CONFIG_DM_MIRROR=y
+# CONFIG_DM_LOG_USERSPACE is not set
+# CONFIG_DM_RAID is not set
+CONFIG_DM_ZERO=y
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_MULTIPATH_QL=y
+CONFIG_DM_MULTIPATH_ST=y
+# CONFIG_DM_DELAY is not set
+# CONFIG_DM_UEVENT is not set
+# CONFIG_DM_FLAKEY is not set
+CONFIG_DM_VERITY=y
+# CONFIG_DM_VERITY_FEC is not set
+# CONFIG_DM_SWITCH is not set
+# CONFIG_DM_LOG_WRITES is not set
+# CONFIG_TARGET_CORE is not set
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+CONFIG_FIREWIRE=m
+CONFIG_FIREWIRE_OHCI=m
+# CONFIG_FIREWIRE_SBP2 is not set
+# CONFIG_FIREWIRE_NET is not set
+# CONFIG_FIREWIRE_NOSY is not set
+# CONFIG_MACINTOSH_DRIVERS is not set
+CONFIG_NETDEVICES=y
+CONFIG_NET_CORE=y
+CONFIG_BONDING=m
+# CONFIG_DUMMY is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_NET_FC is not set
+CONFIG_IFB=m
+# CONFIG_NET_TEAM is not set
+CONFIG_MACVLAN=m
+# CONFIG_MACVTAP is not set
+# CONFIG_VXLAN is not set
+# CONFIG_MACSEC is not set
+CONFIG_NETCONSOLE=m
+# CONFIG_NETCONSOLE_DYNAMIC is not set
+CONFIG_NETPOLL=y
+CONFIG_NET_POLL_CONTROLLER=y
+CONFIG_TUN=y
+# CONFIG_TUN_VNET_CROSS_LE is not set
+CONFIG_VETH=y
+CONFIG_VIRTIO_NET=y
+# CONFIG_NLMON is not set
+# CONFIG_ARCNET is not set
+
+#
+# CAIF transport drivers
+#
+
+#
+# Distributed Switch Architecture drivers
+#
+CONFIG_ETHERNET=y
+CONFIG_MDIO=m
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_VORTEX is not set
+# CONFIG_TYPHOON is not set
+CONFIG_NET_VENDOR_ADAPTEC=y
+# CONFIG_ADAPTEC_STARFIRE is not set
+CONFIG_NET_VENDOR_AGERE=y
+# CONFIG_ET131X is not set
+CONFIG_NET_VENDOR_ALTEON=y
+# CONFIG_ACENIC is not set
+# CONFIG_ALTERA_TSE is not set
+CONFIG_NET_VENDOR_AMAZON=y
+# CONFIG_ENA_ETHERNET is not set
+CONFIG_NET_VENDOR_AMD=y
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_PCNET32 is not set
+CONFIG_NET_VENDOR_ARC=y
+CONFIG_NET_VENDOR_ATHEROS=y
+# CONFIG_ATL2 is not set
+# CONFIG_ATL1 is not set
+# CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
+# CONFIG_ALX is not set
+# CONFIG_NET_VENDOR_AURORA is not set
+CONFIG_NET_CADENCE=y
+# CONFIG_MACB is not set
+CONFIG_NET_VENDOR_BROADCOM=y
+# CONFIG_B44 is not set
+# CONFIG_BCMGENET is not set
+CONFIG_BNX2=m
+# CONFIG_CNIC is not set
+CONFIG_TIGON3=m
+CONFIG_BNX2X=m
+# CONFIG_BNXT is not set
+CONFIG_NET_VENDOR_BROCADE=y
+# CONFIG_BNA is not set
+CONFIG_NET_VENDOR_CAVIUM=y
+# CONFIG_THUNDER_NIC_PF is not set
+# CONFIG_THUNDER_NIC_VF is not set
+# CONFIG_THUNDER_NIC_BGX is not set
+# CONFIG_THUNDER_NIC_RGX is not set
+# CONFIG_LIQUIDIO is not set
+CONFIG_NET_VENDOR_CHELSIO=y
+# CONFIG_CHELSIO_T1 is not set
+CONFIG_CHELSIO_T3=m
+# CONFIG_CHELSIO_T4 is not set
+# CONFIG_CHELSIO_T4VF is not set
+CONFIG_NET_VENDOR_CISCO=y
+# CONFIG_ENIC is not set
+# CONFIG_CX_ECAT is not set
+# CONFIG_DNET is not set
+CONFIG_NET_VENDOR_DEC=y
+# CONFIG_NET_TULIP is not set
+CONFIG_NET_VENDOR_DLINK=y
+# CONFIG_DL2K is not set
+# CONFIG_SUNDANCE is not set
+CONFIG_NET_VENDOR_EMULEX=y
+# CONFIG_BE2NET is not set
+CONFIG_NET_VENDOR_EZCHIP=y
+CONFIG_NET_VENDOR_EXAR=y
+# CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
+CONFIG_NET_VENDOR_HP=y
+# CONFIG_HP100 is not set
+CONFIG_NET_VENDOR_INTEL=y
+# CONFIG_E100 is not set
+CONFIG_E1000=y
+CONFIG_E1000E=m
+CONFIG_E1000E_HWTS=y
+# CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
+# CONFIG_IXGB is not set
+# CONFIG_IXGBE is not set
+# CONFIG_IXGBEVF is not set
+# CONFIG_I40E is not set
+# CONFIG_I40EVF is not set
+# CONFIG_FM10K is not set
+CONFIG_NET_VENDOR_I825XX=y
+# CONFIG_JME is not set
+CONFIG_NET_VENDOR_MARVELL=y
+# CONFIG_MVMDIO is not set
+# CONFIG_MVNETA_BM is not set
+# CONFIG_SKGE is not set
+CONFIG_SKY2=m
+# CONFIG_SKY2_DEBUG is not set
+CONFIG_NET_VENDOR_MELLANOX=y
+CONFIG_MLX4_EN=m
+# CONFIG_MLX4_EN_DCB is not set
+CONFIG_MLX4_CORE=m
+CONFIG_MLX4_DEBUG=y
+# CONFIG_MLX5_CORE is not set
+# CONFIG_MLXSW_CORE is not set
+CONFIG_NET_VENDOR_MICREL=y
+# CONFIG_KS8851_MLL is not set
+# CONFIG_KSZ884X_PCI is not set
+CONFIG_NET_VENDOR_MYRI=y
+# CONFIG_MYRI10GE is not set
+# CONFIG_FEALNX is not set
+CONFIG_NET_VENDOR_NATSEMI=y
+# CONFIG_NATSEMI is not set
+# CONFIG_NS83820 is not set
+CONFIG_NET_VENDOR_NETRONOME=y
+# CONFIG_NFP_NETVF is not set
+CONFIG_NET_VENDOR_8390=y
+# CONFIG_NE2K_PCI is not set
+CONFIG_NET_VENDOR_NVIDIA=y
+# CONFIG_FORCEDETH is not set
+CONFIG_NET_VENDOR_OKI=y
+# CONFIG_ETHOC is not set
+CONFIG_NET_PACKET_ENGINE=y
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+CONFIG_NET_VENDOR_QLOGIC=y
+# CONFIG_QLA3XXX is not set
+# CONFIG_QLCNIC is not set
+# CONFIG_QLGE is not set
+# CONFIG_NETXEN_NIC is not set
+# CONFIG_QED is not set
+CONFIG_NET_VENDOR_QUALCOMM=y
+# CONFIG_QCOM_EMAC is not set
+CONFIG_NET_VENDOR_REALTEK=y
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_R8169 is not set
+CONFIG_NET_VENDOR_RENESAS=y
+CONFIG_NET_VENDOR_RDC=y
+# CONFIG_R6040 is not set
+CONFIG_NET_VENDOR_ROCKER=y
+CONFIG_NET_VENDOR_SAMSUNG=y
+# CONFIG_SXGBE_ETH is not set
+CONFIG_NET_VENDOR_SEEQ=y
+CONFIG_NET_VENDOR_SILAN=y
+# CONFIG_SC92031 is not set
+CONFIG_NET_VENDOR_SIS=y
+# CONFIG_SIS900 is not set
+# CONFIG_SIS190 is not set
+# CONFIG_SFC is not set
+CONFIG_NET_VENDOR_SMSC=y
+# CONFIG_EPIC100 is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_SMSC9420 is not set
+CONFIG_NET_VENDOR_STMICRO=y
+# CONFIG_STMMAC_ETH is not set
+CONFIG_NET_VENDOR_SUN=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+# CONFIG_NIU is not set
+CONFIG_NET_VENDOR_SYNOPSYS=y
+CONFIG_NET_VENDOR_TEHUTI=y
+# CONFIG_TEHUTI is not set
+CONFIG_NET_VENDOR_TI=y
+# CONFIG_TI_CPSW_ALE is not set
+# CONFIG_TLAN is not set
+CONFIG_NET_VENDOR_VIA=y
+# CONFIG_VIA_RHINE is not set
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_NET_VENDOR_WIZNET=y
+# CONFIG_WIZNET_W5100 is not set
+# CONFIG_WIZNET_W5300 is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_NET_SB1000 is not set
+CONFIG_PHYLIB=y
+
+#
+# MDIO bus device drivers
+#
+# CONFIG_MDIO_BCM_UNIMAC is not set
+# CONFIG_MDIO_BITBANG is not set
+# CONFIG_MDIO_OCTEON is not set
+# CONFIG_MDIO_THUNDER is not set
+# CONFIG_MDIO_XGENE is not set
+
+#
+# MII PHY device drivers
+#
+# CONFIG_AMD_PHY is not set
+# CONFIG_AQUANTIA_PHY is not set
+# CONFIG_AT803X_PHY is not set
+# CONFIG_BCM7XXX_PHY is not set
+# CONFIG_BCM87XX_PHY is not set
+# CONFIG_BROADCOM_PHY is not set
+# CONFIG_CICADA_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_DP83848_PHY is not set
+# CONFIG_DP83867_PHY is not set
+# CONFIG_FIXED_PHY is not set
+# CONFIG_ICPLUS_PHY is not set
+# CONFIG_INTEL_XWAY_PHY is not set
+# CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_LXT_PHY is not set
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_MICREL_PHY is not set
+# CONFIG_MICROCHIP_PHY is not set
+# CONFIG_MICROSEMI_PHY is not set
+# CONFIG_NATIONAL_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_REALTEK_PHY is not set
+# CONFIG_SMSC_PHY is not set
+# CONFIG_STE10XP is not set
+# CONFIG_TERANETICS_PHY is not set
+# CONFIG_VITESSE_PHY is not set
+# CONFIG_XILINX_GMII2RGMII is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+CONFIG_USB_NET_DRIVERS=y
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_RTL8152 is not set
+# CONFIG_USB_LAN78XX is not set
+# CONFIG_USB_USBNET is not set
+# CONFIG_USB_IPHETH is not set
+# CONFIG_WLAN is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_VMXNET3 is not set
+# CONFIG_FUJITSU_ES is not set
+# CONFIG_ISDN is not set
+# CONFIG_NVM is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+CONFIG_INPUT_SPARSEKMAP=m
+# CONFIG_INPUT_MATRIXKMAP is not set
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ADP5588 is not set
+# CONFIG_KEYBOARD_ADP5589 is not set
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_QT1070 is not set
+# CONFIG_KEYBOARD_QT2160 is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_GPIO is not set
+# CONFIG_KEYBOARD_GPIO_POLLED is not set
+# CONFIG_KEYBOARD_TCA6416 is not set
+# CONFIG_KEYBOARD_TCA8418 is not set
+# CONFIG_KEYBOARD_MATRIX is not set
+# CONFIG_KEYBOARD_LM8333 is not set
+# CONFIG_KEYBOARD_MAX7359 is not set
+# CONFIG_KEYBOARD_MCS is not set
+# CONFIG_KEYBOARD_MPR121 is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_OPENCORES is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_AD714X is not set
+# CONFIG_INPUT_BMA150 is not set
+# CONFIG_INPUT_E3X0_BUTTON is not set
+CONFIG_INPUT_PCSPKR=m
+# CONFIG_INPUT_MMA8450 is not set
+# CONFIG_INPUT_MPU3050 is not set
+# CONFIG_INPUT_GP2A is not set
+# CONFIG_INPUT_GPIO_BEEPER is not set
+# CONFIG_INPUT_GPIO_TILT_POLLED is not set
+# CONFIG_INPUT_ATLAS_BTNS is not set
+# CONFIG_INPUT_ATI_REMOTE2 is not set
+# CONFIG_INPUT_KEYSPAN_REMOTE is not set
+# CONFIG_INPUT_KXTJ9 is not set
+# CONFIG_INPUT_POWERMATE is not set
+# CONFIG_INPUT_YEALINK is not set
+# CONFIG_INPUT_CM109 is not set
+# CONFIG_INPUT_UINPUT is not set
+# CONFIG_INPUT_PCF8574 is not set
+# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set
+# CONFIG_INPUT_ADXL34X is not set
+# CONFIG_INPUT_CMA3000 is not set
+# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set
+# CONFIG_INPUT_DRV260X_HAPTICS is not set
+# CONFIG_INPUT_DRV2665_HAPTICS is not set
+# CONFIG_INPUT_DRV2667_HAPTICS is not set
+# CONFIG_RMI4_CORE is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_SERIO_ALTERA_PS2 is not set
+# CONFIG_SERIO_PS2MULT is not set
+# CONFIG_SERIO_ARC_PS2 is not set
+# CONFIG_USERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_TTY=y
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_VT_CONSOLE_SLEEP=y
+CONFIG_HW_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_NOZOMI is not set
+# CONFIG_N_GSM is not set
+# CONFIG_TRACE_SINK is not set
+CONFIG_DEVMEM=y
+CONFIG_DEVKMEM=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_EARLYCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y
+CONFIG_SERIAL_8250_PNP=y
+# CONFIG_SERIAL_8250_FINTEK is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DMA=y
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+# CONFIG_SERIAL_8250_FSL is not set
+# CONFIG_SERIAL_8250_DW is not set
+# CONFIG_SERIAL_8250_RT288X is not set
+# CONFIG_SERIAL_8250_MID is not set
+# CONFIG_SERIAL_8250_MOXA is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_UARTLITE is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+# CONFIG_SERIAL_SCCNXP is not set
+# CONFIG_SERIAL_SC16IS7XX is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
+# CONFIG_SERIAL_ARC is not set
+# CONFIG_SERIAL_RP2 is not set
+# CONFIG_SERIAL_FSL_LPUART is not set
+# CONFIG_TTY_PRINTK is not set
+# CONFIG_VIRTIO_CONSOLE is not set
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
+# CONFIG_IPMI_SSIF is not set
+# CONFIG_IPMI_WATCHDOG is not set
+CONFIG_IPMI_POWEROFF=m
+# CONFIG_HW_RANDOM is not set
+CONFIG_NVRAM=m
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_RAW_DRIVER is not set
+CONFIG_HPET=y
+CONFIG_HPET_MMAP=y
+CONFIG_HPET_MMAP_DEFAULT=y
+# CONFIG_HANGCHECK_TIMER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+CONFIG_DEVPORT=y
+# CONFIG_XILLYBUS is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_COMPAT=y
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_MUX=m
+
+#
+# Multiplexer I2C Chip support
+#
+# CONFIG_I2C_MUX_GPIO is not set
+CONFIG_I2C_MUX_PCA9541=m
+CONFIG_I2C_MUX_PCA954x=m
+# CONFIG_I2C_MUX_REG is not set
+CONFIG_I2C_HELPER_AUTO=y
+CONFIG_I2C_SMBUS=m
+CONFIG_I2C_ALGOBIT=m
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# PC SMBus host controller drivers
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+CONFIG_I2C_I801=m
+# CONFIG_I2C_ISCH is not set
+# CONFIG_I2C_ISMT is not set
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_NFORCE2=m
+# CONFIG_I2C_NFORCE2_S4985 is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+
+#
+# ACPI drivers
+#
+# CONFIG_I2C_SCMI is not set
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_CBUS_GPIO is not set
+# CONFIG_I2C_DESIGNWARE_PCI is not set
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_PXA_PCI is not set
+# CONFIG_I2C_SIMTEC is not set
+# CONFIG_I2C_XILINX is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_DIOLAN_U2C is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_ROBOTFUZZ_OSIF is not set
+# CONFIG_I2C_TAOS_EVM is not set
+# CONFIG_I2C_TINY_USB is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_SLAVE is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_SPI is not set
+# CONFIG_SPMI is not set
+# CONFIG_HSI is not set
+
+#
+# PPS support
+#
+CONFIG_PPS=y
+# CONFIG_PPS_DEBUG is not set
+
+#
+# PPS clients support
+#
+# CONFIG_PPS_CLIENT_KTIMER is not set
+# CONFIG_PPS_CLIENT_LDISC is not set
+# CONFIG_PPS_CLIENT_GPIO is not set
+
+#
+# PPS generators support
+#
+
+#
+# PTP clock support
+#
+CONFIG_PTP_1588_CLOCK=y
+
+#
+# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks.
+#
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_DEVRES=y
+CONFIG_GPIO_ACPI=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO drivers
+#
+# CONFIG_GPIO_AMDPT is not set
+# CONFIG_GPIO_DWAPB is not set
+# CONFIG_GPIO_GENERIC_PLATFORM is not set
+# CONFIG_GPIO_ICH is not set
+# CONFIG_GPIO_LYNXPOINT is not set
+# CONFIG_GPIO_VX855 is not set
+# CONFIG_GPIO_ZX is not set
+
+#
+# Port-mapped I/O GPIO drivers
+#
+# CONFIG_GPIO_F7188X is not set
+# CONFIG_GPIO_IT87 is not set
+# CONFIG_GPIO_SCH is not set
+# CONFIG_GPIO_SCH311X is not set
+
+#
+# I2C GPIO expanders
+#
+# CONFIG_GPIO_ADP5588 is not set
+# CONFIG_GPIO_MAX7300 is not set
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+# CONFIG_GPIO_TPIC2810 is not set
+
+#
+# MFD GPIO expanders
+#
+
+#
+# PCI GPIO expanders
+#
+# CONFIG_GPIO_AMD8111 is not set
+# CONFIG_GPIO_BT8XX is not set
+# CONFIG_GPIO_ML_IOH is not set
+# CONFIG_GPIO_RDC321X is not set
+
+#
+# SPI or I2C GPIO expanders
+#
+
+#
+# USB GPIO expanders
+#
+CONFIG_W1=m
+CONFIG_W1_CON=y
+
+#
+# 1-wire Bus Masters
+#
+# CONFIG_W1_MASTER_MATROX is not set
+# CONFIG_W1_MASTER_DS2490 is not set
+CONFIG_W1_MASTER_DS2482=m
+# CONFIG_W1_MASTER_DS1WM is not set
+# CONFIG_W1_MASTER_GPIO is not set
+
+#
+# 1-wire Slaves
+#
+CONFIG_W1_SLAVE_THERM=m
+# CONFIG_W1_SLAVE_SMEM is not set
+# CONFIG_W1_SLAVE_DS2408 is not set
+# CONFIG_W1_SLAVE_DS2413 is not set
+# CONFIG_W1_SLAVE_DS2406 is not set
+# CONFIG_W1_SLAVE_DS2423 is not set
+# CONFIG_W1_SLAVE_DS2431 is not set
+# CONFIG_W1_SLAVE_DS2433 is not set
+# CONFIG_W1_SLAVE_DS2760 is not set
+# CONFIG_W1_SLAVE_DS2780 is not set
+# CONFIG_W1_SLAVE_DS2781 is not set
+# CONFIG_W1_SLAVE_DS28E04 is not set
+# CONFIG_W1_SLAVE_BQ27000 is not set
+CONFIG_POWER_SUPPLY=y
+# CONFIG_POWER_SUPPLY_DEBUG is not set
+# CONFIG_PDA_POWER is not set
+# CONFIG_TEST_POWER is not set
+# CONFIG_BATTERY_DS2780 is not set
+# CONFIG_BATTERY_DS2781 is not set
+# CONFIG_BATTERY_DS2782 is not set
+# CONFIG_BATTERY_SBS is not set
+# CONFIG_BATTERY_BQ27XXX is not set
+# CONFIG_BATTERY_MAX17040 is not set
+# CONFIG_BATTERY_MAX17042 is not set
+# CONFIG_CHARGER_MAX8903 is not set
+# CONFIG_CHARGER_LP8727 is not set
+# CONFIG_CHARGER_GPIO is not set
+# CONFIG_CHARGER_BQ2415X is not set
+# CONFIG_CHARGER_BQ24190 is not set
+# CONFIG_CHARGER_BQ24735 is not set
+# CONFIG_CHARGER_BQ25890 is not set
+# CONFIG_CHARGER_SMB347 is not set
+# CONFIG_BATTERY_GAUGE_LTC2941 is not set
+# CONFIG_CHARGER_RT9455 is not set
+# CONFIG_POWER_RESET is not set
+# CONFIG_POWER_AVS is not set
+CONFIG_HWMON=y
+CONFIG_HWMON_VID=m
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Native drivers
+#
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ABITUGURU3 is not set
+# CONFIG_SENSORS_AD7414 is not set
+# CONFIG_SENSORS_AD7418 is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7410 is not set
+# CONFIG_SENSORS_ADT7411 is not set
+# CONFIG_SENSORS_ADT7462 is not set
+# CONFIG_SENSORS_ADT7470 is not set
+# CONFIG_SENSORS_ADT7475 is not set
+# CONFIG_SENSORS_ASC7621 is not set
+# CONFIG_SENSORS_K8TEMP is not set
+# CONFIG_SENSORS_K10TEMP is not set
+# CONFIG_SENSORS_FAM15H_POWER is not set
+# CONFIG_SENSORS_APPLESMC is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS620 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_DELL_SMM is not set
+# CONFIG_SENSORS_I5K_AMB is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_FTSTEUTATES is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_G760A is not set
+# CONFIG_SENSORS_G762 is not set
+# CONFIG_SENSORS_GPIO_FAN is not set
+# CONFIG_SENSORS_HIH6130 is not set
+# CONFIG_SENSORS_IBMAEM is not set
+# CONFIG_SENSORS_IBMPEX is not set
+# CONFIG_SENSORS_I5500 is not set
+CONFIG_SENSORS_CORETEMP=m
+CONFIG_SENSORS_IT87=m
+# CONFIG_SENSORS_JC42 is not set
+# CONFIG_SENSORS_POWR1220 is not set
+# CONFIG_SENSORS_LINEAGE is not set
+# CONFIG_SENSORS_LTC2945 is not set
+# CONFIG_SENSORS_LTC2990 is not set
+# CONFIG_SENSORS_LTC4151 is not set
+# CONFIG_SENSORS_LTC4215 is not set
+# CONFIG_SENSORS_LTC4222 is not set
+# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LTC4260 is not set
+# CONFIG_SENSORS_LTC4261 is not set
+# CONFIG_SENSORS_MAX16065 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_MAX1668 is not set
+# CONFIG_SENSORS_MAX197 is not set
+# CONFIG_SENSORS_MAX6639 is not set
+# CONFIG_SENSORS_MAX6642 is not set
+# CONFIG_SENSORS_MAX6650 is not set
+# CONFIG_SENSORS_MAX6697 is not set
+# CONFIG_SENSORS_MAX31790 is not set
+# CONFIG_SENSORS_MCP3021 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM73 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LM95234 is not set
+# CONFIG_SENSORS_LM95241 is not set
+# CONFIG_SENSORS_LM95245 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_NTC_THERMISTOR is not set
+# CONFIG_SENSORS_NCT6683 is not set
+# CONFIG_SENSORS_NCT6775 is not set
+# CONFIG_SENSORS_NCT7802 is not set
+# CONFIG_SENSORS_NCT7904 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_PMBUS is not set
+# CONFIG_SENSORS_SHT15 is not set
+# CONFIG_SENSORS_SHT21 is not set
+# CONFIG_SENSORS_SHT3x is not set
+# CONFIG_SENSORS_SHTC1 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_DME1737 is not set
+# CONFIG_SENSORS_EMC1403 is not set
+# CONFIG_SENSORS_EMC2103 is not set
+# CONFIG_SENSORS_EMC6W201 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_SCH56XX_COMMON is not set
+# CONFIG_SENSORS_SCH5627 is not set
+# CONFIG_SENSORS_SCH5636 is not set
+# CONFIG_SENSORS_SMM665 is not set
+# CONFIG_SENSORS_ADC128D818 is not set
+# CONFIG_SENSORS_ADS1015 is not set
+# CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_AMC6821 is not set
+# CONFIG_SENSORS_INA209 is not set
+# CONFIG_SENSORS_INA2XX is not set
+# CONFIG_SENSORS_INA3221 is not set
+# CONFIG_SENSORS_TC74 is not set
+# CONFIG_SENSORS_THMC50 is not set
+# CONFIG_SENSORS_TMP102 is not set
+# CONFIG_SENSORS_TMP103 is not set
+# CONFIG_SENSORS_TMP401 is not set
+# CONFIG_SENSORS_TMP421 is not set
+# CONFIG_SENSORS_VIA_CPUTEMP is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83793 is not set
+# CONFIG_SENSORS_W83795 is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83L786NG is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+
+#
+# ACPI drivers
+#
+# CONFIG_SENSORS_ACPI_POWER is not set
+# CONFIG_SENSORS_ATK0110 is not set
+CONFIG_THERMAL=y
+CONFIG_THERMAL_HWMON=y
+CONFIG_THERMAL_WRITABLE_TRIPS=y
+CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y
+# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set
+# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set
+# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set
+# CONFIG_THERMAL_GOV_FAIR_SHARE is not set
+CONFIG_THERMAL_GOV_STEP_WISE=y
+# CONFIG_THERMAL_GOV_BANG_BANG is not set
+CONFIG_THERMAL_GOV_USER_SPACE=y
+# CONFIG_THERMAL_GOV_POWER_ALLOCATOR is not set
+# CONFIG_THERMAL_EMULATION is not set
+# CONFIG_INTEL_POWERCLAMP is not set
+CONFIG_X86_PKG_TEMP_THERMAL=m
+# CONFIG_INTEL_SOC_DTS_THERMAL is not set
+
+#
+# ACPI INT340X thermal drivers
+#
+# CONFIG_INT340X_THERMAL is not set
+# CONFIG_INTEL_PCH_THERMAL is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_CORE=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+# CONFIG_WATCHDOG_SYSFS is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+# CONFIG_XILINX_WATCHDOG is not set
+# CONFIG_ZIIRAVE_WATCHDOG is not set
+# CONFIG_CADENCE_WATCHDOG is not set
+# CONFIG_DW_WATCHDOG is not set
+# CONFIG_MAX63XX_WATCHDOG is not set
+# CONFIG_ACQUIRE_WDT is not set
+# CONFIG_ADVANTECH_WDT is not set
+# CONFIG_ALIM1535_WDT is not set
+# CONFIG_ALIM7101_WDT is not set
+# CONFIG_F71808E_WDT is not set
+CONFIG_SP5100_TCO=y
+# CONFIG_SBC_FITPC2_WATCHDOG is not set
+# CONFIG_EUROTECH_WDT is not set
+# CONFIG_IB700_WDT is not set
+# CONFIG_IBMASR is not set
+# CONFIG_WAFER_WDT is not set
+# CONFIG_I6300ESB_WDT is not set
+# CONFIG_IE6XX_WDT is not set
+CONFIG_ITCO_WDT=m
+# CONFIG_ITCO_VENDOR_SUPPORT is not set
+CONFIG_IT8712F_WDT=m
+# CONFIG_IT87_WDT is not set
+# CONFIG_HP_WATCHDOG is not set
+# CONFIG_SC1200_WDT is not set
+# CONFIG_PC87413_WDT is not set
+CONFIG_NV_TCO=y
+# CONFIG_60XX_WDT is not set
+# CONFIG_CPU5_WDT is not set
+# CONFIG_SMSC_SCH311X_WDT is not set
+# CONFIG_SMSC37B787_WDT is not set
+# CONFIG_VIA_WDT is not set
+# CONFIG_W83627HF_WDT is not set
+# CONFIG_W83877F_WDT is not set
+# CONFIG_W83977F_WDT is not set
+# CONFIG_MACHZ_WDT is not set
+# CONFIG_SBC_EPX_C3_WATCHDOG is not set
+# CONFIG_NI903X_WDT is not set
+# CONFIG_MEN_A21_WDT is not set
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+
+#
+# USB-based Watchdog Cards
+#
+# CONFIG_USBPCWATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+CONFIG_BCMA_POSSIBLE=y
+
+#
+# Broadcom specific AMBA
+#
+# CONFIG_BCMA is not set
+
+#
+# Multifunction device drivers
+#
+CONFIG_MFD_CORE=y
+# CONFIG_MFD_BCM590XX is not set
+# CONFIG_MFD_AXP20X_I2C is not set
+# CONFIG_MFD_CROS_EC is not set
+# CONFIG_MFD_DA9062 is not set
+# CONFIG_MFD_DA9063 is not set
+# CONFIG_MFD_DA9150 is not set
+# CONFIG_MFD_DLN2 is not set
+# CONFIG_MFD_MC13XXX_I2C is not set
+# CONFIG_HTC_PASIC3 is not set
+CONFIG_LPC_ICH=y
+# CONFIG_LPC_SCH is not set
+# CONFIG_MFD_INTEL_LPSS_ACPI is not set
+# CONFIG_MFD_INTEL_LPSS_PCI is not set
+# CONFIG_MFD_JANZ_CMODIO is not set
+# CONFIG_MFD_KEMPLD is not set
+# CONFIG_MFD_88PM800 is not set
+# CONFIG_MFD_88PM805 is not set
+# CONFIG_MFD_MAX14577 is not set
+# CONFIG_MFD_MAX77693 is not set
+# CONFIG_MFD_MAX8907 is not set
+# CONFIG_MFD_MT6397 is not set
+# CONFIG_MFD_MENF21BMC is not set
+# CONFIG_MFD_VIPERBOARD is not set
+# CONFIG_MFD_RETU is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_MFD_RDC321X is not set
+# CONFIG_MFD_RTSX_PCI is not set
+# CONFIG_MFD_RT5033 is not set
+# CONFIG_MFD_RTSX_USB is not set
+# CONFIG_MFD_SI476X_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_SKY81452 is not set
+# CONFIG_ABX500_CORE is not set
+# CONFIG_MFD_SYSCON is not set
+# CONFIG_MFD_TI_AM335X_TSCADC is not set
+# CONFIG_MFD_LP3943 is not set
+# CONFIG_TPS6105X is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TPS6507X is not set
+# CONFIG_MFD_TPS65086 is not set
+# CONFIG_MFD_TPS65217 is not set
+# CONFIG_MFD_TPS65218 is not set
+# CONFIG_MFD_TPS65912_I2C is not set
+# CONFIG_MFD_WL1273_CORE is not set
+# CONFIG_MFD_LM3533 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_VX855 is not set
+# CONFIG_MFD_ARIZONA_I2C is not set
+# CONFIG_MFD_WM8994 is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_MEDIA_SUPPORT is not set
+
+#
+# Graphics support
+#
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+# CONFIG_AGP_INTEL is not set
+# CONFIG_AGP_SIS is not set
+# CONFIG_AGP_VIA is not set
+CONFIG_VGA_ARB=y
+CONFIG_VGA_ARB_MAX_GPUS=16
+# CONFIG_VGA_SWITCHEROO is not set
+# CONFIG_DRM is not set
+
+#
+# ACP (Audio CoProcessor) Configuration
+#
+
+#
+# Frame buffer Devices
+#
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+# CONFIG_VGASTATE is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_DUMMY_CONSOLE_COLUMNS=80
+CONFIG_DUMMY_CONSOLE_ROWS=25
+# CONFIG_SOUND is not set
+
+#
+# HID support
+#
+CONFIG_HID=y
+# CONFIG_HID_BATTERY_STRENGTH is not set
+CONFIG_HIDRAW=y
+# CONFIG_UHID is not set
+CONFIG_HID_GENERIC=y
+
+#
+# Special HID drivers
+#
+# CONFIG_HID_A4TECH is not set
+# CONFIG_HID_ACRUX is not set
+# CONFIG_HID_APPLE is not set
+# CONFIG_HID_APPLEIR is not set
+# CONFIG_HID_AUREAL is not set
+# CONFIG_HID_BELKIN is not set
+# CONFIG_HID_BETOP_FF is not set
+# CONFIG_HID_CHERRY is not set
+# CONFIG_HID_CHICONY is not set
+# CONFIG_HID_CMEDIA is not set
+# CONFIG_HID_CP2112 is not set
+# CONFIG_HID_CYPRESS is not set
+# CONFIG_HID_DRAGONRISE is not set
+# CONFIG_HID_EMS_FF is not set
+# CONFIG_HID_ELECOM is not set
+# CONFIG_HID_ELO is not set
+# CONFIG_HID_EZKEY is not set
+# CONFIG_HID_GEMBIRD is not set
+# CONFIG_HID_GFRM is not set
+# CONFIG_HID_HOLTEK is not set
+# CONFIG_HID_KEYTOUCH is not set
+# CONFIG_HID_KYE is not set
+# CONFIG_HID_UCLOGIC is not set
+# CONFIG_HID_WALTOP is not set
+# CONFIG_HID_GYRATION is not set
+# CONFIG_HID_ICADE is not set
+# CONFIG_HID_TWINHAN is not set
+# CONFIG_HID_KENSINGTON is not set
+# CONFIG_HID_LCPOWER is not set
+# CONFIG_HID_LENOVO is not set
+# CONFIG_HID_LOGITECH is not set
+# CONFIG_HID_MAGICMOUSE is not set
+# CONFIG_HID_MICROSOFT is not set
+# CONFIG_HID_MONTEREY is not set
+# CONFIG_HID_MULTITOUCH is not set
+# CONFIG_HID_NTRIG is not set
+# CONFIG_HID_ORTEK is not set
+# CONFIG_HID_PANTHERLORD is not set
+# CONFIG_HID_PENMOUNT is not set
+# CONFIG_HID_PETALYNX is not set
+# CONFIG_HID_PICOLCD is not set
+# CONFIG_HID_PLANTRONICS is not set
+# CONFIG_HID_PRIMAX is not set
+# CONFIG_HID_ROCCAT is not set
+# CONFIG_HID_SAITEK is not set
+# CONFIG_HID_SAMSUNG is not set
+# CONFIG_HID_SPEEDLINK is not set
+# CONFIG_HID_STEELSERIES is not set
+# CONFIG_HID_SUNPLUS is not set
+# CONFIG_HID_RMI is not set
+# CONFIG_HID_GREENASIA is not set
+# CONFIG_HID_SMARTJOYPLUS is not set
+# CONFIG_HID_TIVO is not set
+# CONFIG_HID_TOPSEED is not set
+# CONFIG_HID_THRUSTMASTER is not set
+# CONFIG_HID_WACOM is not set
+# CONFIG_HID_XINMO is not set
+# CONFIG_HID_ZEROPLUS is not set
+# CONFIG_HID_ZYDACRON is not set
+# CONFIG_HID_SENSOR_HUB is not set
+# CONFIG_HID_ALPS is not set
+
+#
+# USB HID support
+#
+CONFIG_USB_HID=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+
+#
+# I2C HID support
+#
+# CONFIG_I2C_HID is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_COMMON=y
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEFAULT_PERSIST=y
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+# CONFIG_USB_OTG_WHITELIST is not set
+# CONFIG_USB_OTG_BLACKLIST_HUB is not set
+# CONFIG_USB_ULPI_BUS is not set
+CONFIG_USB_MON=y
+# CONFIG_USB_WUSB_CBAF is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_C67X00_HCD is not set
+# CONFIG_USB_XHCI_HCD is not set
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_PCI=m
+# CONFIG_USB_EHCI_HCD_PLATFORM is not set
+# CONFIG_USB_OXU210HP_HCD is not set
+# CONFIG_USB_ISP116X_HCD is not set
+# CONFIG_USB_ISP1362_HCD is not set
+# CONFIG_USB_FOTG210_HCD is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_OHCI_HCD_PCI=m
+# CONFIG_USB_OHCI_HCD_PLATFORM is not set
+CONFIG_USB_UHCI_HCD=m
+# CONFIG_USB_SL811_HCD is not set
+# CONFIG_USB_R8A66597_HCD is not set
+# CONFIG_USB_HCD_TEST_MODE is not set
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_ACM=m
+# CONFIG_USB_PRINTER is not set
+# CONFIG_USB_WDM is not set
+# CONFIG_USB_TMC is not set
+
+#
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
+#
+
+#
+# also be needed; see USB_STORAGE Help for more info
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_REALTEK is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+# CONFIG_USB_STORAGE_KARMA is not set
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+# CONFIG_USB_STORAGE_ENE_UB6250 is not set
+# CONFIG_USB_UAS is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+# CONFIG_USBIP_CORE is not set
+# CONFIG_USB_MUSB_HDRC is not set
+# CONFIG_USB_DWC3 is not set
+# CONFIG_USB_DWC2 is not set
+# CONFIG_USB_CHIPIDEA is not set
+# CONFIG_USB_ISP1760 is not set
+
+#
+# USB port drivers
+#
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+# CONFIG_USB_SERIAL_SIMPLE is not set
+# CONFIG_USB_SERIAL_AIRCABLE is not set
+# CONFIG_USB_SERIAL_ARK3116 is not set
+# CONFIG_USB_SERIAL_BELKIN is not set
+# CONFIG_USB_SERIAL_CH341 is not set
+# CONFIG_USB_SERIAL_WHITEHEAT is not set
+# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set
+# CONFIG_USB_SERIAL_CP210X is not set
+# CONFIG_USB_SERIAL_CYPRESS_M8 is not set
+# CONFIG_USB_SERIAL_EMPEG is not set
+CONFIG_USB_SERIAL_FTDI_SIO=m
+# CONFIG_USB_SERIAL_VISOR is not set
+# CONFIG_USB_SERIAL_IPAQ is not set
+# CONFIG_USB_SERIAL_IR is not set
+# CONFIG_USB_SERIAL_EDGEPORT is not set
+# CONFIG_USB_SERIAL_EDGEPORT_TI is not set
+# CONFIG_USB_SERIAL_F81232 is not set
+# CONFIG_USB_SERIAL_GARMIN is not set
+# CONFIG_USB_SERIAL_IPW is not set
+# CONFIG_USB_SERIAL_IUU is not set
+# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set
+CONFIG_USB_SERIAL_KEYSPAN=m
+# CONFIG_USB_SERIAL_KEYSPAN_MPR is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28XA is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28XB is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA19QW is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA19QI is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA49WLC is not set
+# CONFIG_USB_SERIAL_KLSI is not set
+# CONFIG_USB_SERIAL_KOBIL_SCT is not set
+# CONFIG_USB_SERIAL_MCT_U232 is not set
+# CONFIG_USB_SERIAL_METRO is not set
+# CONFIG_USB_SERIAL_MOS7720 is not set
+# CONFIG_USB_SERIAL_MOS7840 is not set
+# CONFIG_USB_SERIAL_MXUPORT is not set
+# CONFIG_USB_SERIAL_NAVMAN is not set
+CONFIG_USB_SERIAL_PL2303=m
+# CONFIG_USB_SERIAL_OTI6858 is not set
+# CONFIG_USB_SERIAL_QCAUX is not set
+# CONFIG_USB_SERIAL_QUALCOMM is not set
+# CONFIG_USB_SERIAL_SPCP8X5 is not set
+# CONFIG_USB_SERIAL_SAFE is not set
+# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set
+# CONFIG_USB_SERIAL_SYMBOL is not set
+# CONFIG_USB_SERIAL_TI is not set
+# CONFIG_USB_SERIAL_CYBERJACK is not set
+# CONFIG_USB_SERIAL_XIRCOM is not set
+# CONFIG_USB_SERIAL_OPTION is not set
+# CONFIG_USB_SERIAL_OMNINET is not set
+# CONFIG_USB_SERIAL_OPTICON is not set
+# CONFIG_USB_SERIAL_XSENS_MT is not set
+# CONFIG_USB_SERIAL_WISHBONE is not set
+# CONFIG_USB_SERIAL_SSU100 is not set
+# CONFIG_USB_SERIAL_QT2 is not set
+# CONFIG_USB_SERIAL_DEBUG is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_ADUTUX is not set
+# CONFIG_USB_SEVSEG is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_FTDI_ELAN is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
+# CONFIG_USB_IOWARRIOR is not set
+CONFIG_USB_TEST=m
+# CONFIG_USB_EHSET_TEST_FIXTURE is not set
+# CONFIG_USB_ISIGHTFW is not set
+# CONFIG_USB_YUREX is not set
+CONFIG_USB_EZUSB_FX2=m
+# CONFIG_USB_HSIC_USB3503 is not set
+# CONFIG_USB_LINK_LAYER_TEST is not set
+# CONFIG_UCSI is not set
+
+#
+# USB Physical Layer drivers
+#
+# CONFIG_USB_PHY is not set
+# CONFIG_NOP_USB_XCEIV is not set
+# CONFIG_USB_GPIO_VBUS is not set
+# CONFIG_USB_ISP1301 is not set
+# CONFIG_USB_GADGET is not set
+# CONFIG_UWB is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_USER_MEM=y
+CONFIG_INFINIBAND_ON_DEMAND_PAGING=y
+CONFIG_INFINIBAND_ADDR_TRANS=y
+CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y
+# CONFIG_INFINIBAND_MTHCA is not set
+# CONFIG_INFINIBAND_QIB is not set
+# CONFIG_INFINIBAND_CXGB3 is not set
+CONFIG_MLX4_INFINIBAND=m
+# CONFIG_INFINIBAND_NES is not set
+# CONFIG_INFINIBAND_OCRDMA is not set
+# CONFIG_INFINIBAND_USNIC is not set
+# CONFIG_INFINIBAND_IPOIB is not set
+# CONFIG_INFINIBAND_SRP is not set
+# CONFIG_INFINIBAND_ISER is not set
+CONFIG_INFINIBAND_RDMAVT=m
+CONFIG_INFINIBAND_HFI1=m
+# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set
+CONFIG_HFI1_VERBS_31BIT_PSN=y
+# CONFIG_SDMA_VERBOSITY is not set
+CONFIG_EDAC_ATOMIC_SCRUB=y
+CONFIG_EDAC_SUPPORT=y
+# CONFIG_EDAC is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_MC146818_LIB=y
+# CONFIG_RTC_CLASS is not set
+CONFIG_DMADEVICES=y
+# CONFIG_DMADEVICES_DEBUG is not set
+
+#
+# DMA Devices
+#
+CONFIG_DMA_ACPI=y
+# CONFIG_INTEL_IDMA64 is not set
+# CONFIG_INTEL_IOATDMA is not set
+# CONFIG_QCOM_HIDMA_MGMT is not set
+# CONFIG_QCOM_HIDMA is not set
+# CONFIG_DW_DMAC is not set
+# CONFIG_DW_DMAC_PCI is not set
+
+#
+# DMABUF options
+#
+# CONFIG_SYNC_FILE is not set
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_UIO is not set
+# CONFIG_VFIO is not set
+CONFIG_IRQ_BYPASS_MANAGER=y
+# CONFIG_VIRT_DRIVERS is not set
+CONFIG_VIRTIO=y
+
+#
+# Virtio drivers
+#
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_PCI_LEGACY=y
+# CONFIG_VIRTIO_BALLOON is not set
+# CONFIG_VIRTIO_INPUT is not set
+# CONFIG_VIRTIO_MMIO is not set
+
+#
+# Microsoft Hyper-V guest support
+#
+# CONFIG_HYPERV is not set
+# CONFIG_STAGING is not set
+CONFIG_X86_PLATFORM_DEVICES=y
+# CONFIG_ACERHDF is not set
+# CONFIG_DELL_SMBIOS is not set
+# CONFIG_DELL_SMO8800 is not set
+# CONFIG_FUJITSU_TABLET is not set
+# CONFIG_HP_ACCEL is not set
+# CONFIG_HP_WIRELESS is not set
+# CONFIG_SENSORS_HDAPS is not set
+# CONFIG_INTEL_MENLOW is not set
+# CONFIG_ASUS_WIRELESS is not set
+# CONFIG_ACPI_WMI is not set
+# CONFIG_TOPSTAR_LAPTOP is not set
+# CONFIG_TOSHIBA_BT_RFKILL is not set
+# CONFIG_TOSHIBA_HAPS is not set
+# CONFIG_ACPI_CMPC is not set
+CONFIG_INTEL_HID_EVENT=m
+# CONFIG_INTEL_VBTN is not set
+# CONFIG_INTEL_IPS is not set
+# CONFIG_INTEL_PMC_CORE is not set
+# CONFIG_IBM_RTL is not set
+# CONFIG_SAMSUNG_Q10 is not set
+# CONFIG_INTEL_RST is not set
+# CONFIG_INTEL_SMARTCONNECT is not set
+# CONFIG_PVPANIC is not set
+# CONFIG_INTEL_PMC_IPC is not set
+# CONFIG_SURFACE_PRO3_BUTTON is not set
+CONFIG_INTEL_PUNIT_IPC=m
+# CONFIG_CHROME_PLATFORMS is not set
+
+#
+# Hardware Spinlock drivers
+#
+
+#
+# Clock Source drivers
+#
+CONFIG_CLKEVT_I8253=y
+CONFIG_I8253_LOCK=y
+CONFIG_CLKBLD_I8253=y
+# CONFIG_ATMEL_PIT is not set
+# CONFIG_SH_TIMER_CMT is not set
+# CONFIG_SH_TIMER_MTU2 is not set
+# CONFIG_SH_TIMER_TMU is not set
+# CONFIG_EM_TIMER_STI is not set
+# CONFIG_MAILBOX is not set
+CONFIG_IOMMU_API=y
+CONFIG_IOMMU_SUPPORT=y
+
+#
+# Generic IOMMU Pagetable Support
+#
+CONFIG_IOMMU_IOVA=y
+CONFIG_AMD_IOMMU=y
+# CONFIG_AMD_IOMMU_V2 is not set
+CONFIG_DMAR_TABLE=y
+CONFIG_INTEL_IOMMU=y
+# CONFIG_INTEL_IOMMU_SVM is not set
+# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
+CONFIG_INTEL_IOMMU_FLOPPY_WA=y
+CONFIG_IRQ_REMAP=y
+
+#
+# Remoteproc drivers
+#
+# CONFIG_STE_MODEM_RPROC is not set
+
+#
+# Rpmsg drivers
+#
+
+#
+# SOC (System On Chip) specific Drivers
+#
+
+#
+# Broadcom SoC drivers
+#
+# CONFIG_SUNXI_SRAM is not set
+# CONFIG_SOC_TI is not set
+# CONFIG_PM_DEVFREQ is not set
+# CONFIG_EXTCON is not set
+# CONFIG_MEMORY is not set
+# CONFIG_IIO is not set
+# CONFIG_NTB is not set
+# CONFIG_VME_BUS is not set
+# CONFIG_PWM is not set
+CONFIG_ARM_GIC_MAX_NR=1
+# CONFIG_IPACK_BUS is not set
+# CONFIG_RESET_CONTROLLER is not set
+# CONFIG_FMC is not set
+
+#
+# PHY Subsystem
+#
+CONFIG_GENERIC_PHY=y
+# CONFIG_PHY_PXA_28NM_HSIC is not set
+# CONFIG_PHY_PXA_28NM_USB2 is not set
+# CONFIG_BCM_KONA_USB2_PHY is not set
+# CONFIG_POWERCAP is not set
+# CONFIG_MCB is not set
+
+#
+# Performance monitor support
+#
+# CONFIG_RAS is not set
+# CONFIG_THUNDERBOLT is not set
+
+#
+# Android
+#
+# CONFIG_ANDROID is not set
+# CONFIG_LIBNVDIMM is not set
+# CONFIG_DEV_DAX is not set
+# CONFIG_NVMEM is not set
+# CONFIG_STM is not set
+# CONFIG_INTEL_TH is not set
+
+#
+# FPGA Configuration Support
+#
+# CONFIG_FPGA is not set
+
+#
+# Firmware Drivers
+#
+# CONFIG_EDD is not set
+CONFIG_FIRMWARE_MEMMAP=y
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
+CONFIG_DMIID=y
+# CONFIG_DMI_SYSFS is not set
+CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y
+CONFIG_ISCSI_IBFT_FIND=y
+# CONFIG_ISCSI_IBFT is not set
+# CONFIG_FW_CFG_SYSFS is not set
+CONFIG_GOOGLE_FIRMWARE=y
+
+#
+# Google Firmware Drivers
+#
+CONFIG_GOOGLE_SMI=y
+CONFIG_GOOGLE_MEMCONSOLE=y
+
+#
+# EFI (Extensible Firmware Interface) Support
+#
+CONFIG_EFI_VARS=y
+CONFIG_EFI_ESRT=y
+CONFIG_EFI_RUNTIME_MAP=y
+# CONFIG_EFI_FAKE_MEMMAP is not set
+CONFIG_EFI_RUNTIME_WRAPPERS=y
+# CONFIG_EFI_BOOTLOADER_CONTROL is not set
+# CONFIG_EFI_CAPSULE_LOADER is not set
+
+#
+# File systems
+#
+CONFIG_DCACHE_WORD_ACCESS=y
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_USE_FOR_EXT2=y
+# CONFIG_EXT4_FS_POSIX_ACL is not set
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_EXT4_ENCRYPTION is not set
+# CONFIG_EXT4_DEBUG is not set
+CONFIG_JBD2=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+# CONFIG_F2FS_FS is not set
+# CONFIG_FS_DAX is not set
+CONFIG_FS_POSIX_ACL=y
+CONFIG_EXPORTFS=y
+# CONFIG_EXPORTFS_BLOCK_OPS is not set
+CONFIG_FILE_LOCKING=y
+CONFIG_MANDATORY_FILE_LOCKING=y
+# CONFIG_FS_ENCRYPTION is not set
+CONFIG_FSNOTIFY=y
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_FANOTIFY=y
+# CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not set
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+# CONFIG_QUOTA_DEBUG is not set
+CONFIG_QUOTA_TREE=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+CONFIG_QUOTACTL=y
+CONFIG_QUOTACTL_COMPAT=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+# CONFIG_CUSE is not set
+# CONFIG_OVERLAY_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_FAT_DEFAULT_UTF8 is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_VMCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_PROC_CHILDREN=y
+CONFIG_KERNFS=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_EFIVAR_FS=m
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ORANGEFS_FS is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_ECRYPT_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_LOGFS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX6FS_FS is not set
+CONFIG_ROMFS_FS=m
+CONFIG_ROMFS_BACKED_BY_BLOCK=y
+CONFIG_ROMFS_ON_BLOCK=y
+# CONFIG_PSTORE is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
+CONFIG_NFS_V3=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+# CONFIG_NFS_SWAP is not set
+# CONFIG_NFS_V4_1 is not set
+# CONFIG_NFS_USE_LEGACY_DNS is not set
+CONFIG_NFS_USE_KERNEL_DNS=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_GRACE_PERIOD=m
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+# CONFIG_SUNRPC_DEBUG is not set
+CONFIG_SUNRPC_XPRT_RDMA=m
+# CONFIG_CEPH_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_MAC_ROMAN is not set
+# CONFIG_NLS_MAC_CELTIC is not set
+# CONFIG_NLS_MAC_CENTEURO is not set
+# CONFIG_NLS_MAC_CROATIAN is not set
+# CONFIG_NLS_MAC_CYRILLIC is not set
+# CONFIG_NLS_MAC_GAELIC is not set
+# CONFIG_NLS_MAC_GREEK is not set
+# CONFIG_NLS_MAC_ICELAND is not set
+# CONFIG_NLS_MAC_INUIT is not set
+# CONFIG_NLS_MAC_ROMANIAN is not set
+# CONFIG_NLS_MAC_TURKISH is not set
+CONFIG_NLS_UTF8=y
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+
+#
+# printk and dmesg options
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+
+#
+# Compile-time checks and compiler options
+#
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_INFO_REDUCED is not set
+# CONFIG_DEBUG_INFO_SPLIT is not set
+# CONFIG_DEBUG_INFO_DWARF4 is not set
+# CONFIG_GDB_SCRIPTS is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=2048
+# CONFIG_STRIP_ASM_SYMS is not set
+# CONFIG_READABLE_ASM is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_PAGE_OWNER is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+# CONFIG_DEBUG_SECTION_MISMATCH is not set
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_ARCH_WANT_FRAME_POINTERS=y
+CONFIG_FRAME_POINTER=y
+# CONFIG_STACK_VALIDATION is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1
+CONFIG_DEBUG_KERNEL=y
+
+#
+# Memory Debugging
+#
+# CONFIG_PAGE_EXTENSION is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_PAGE_POISONING is not set
+# CONFIG_DEBUG_PAGE_REF is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_HAVE_DEBUG_KMEMLEAK=y
+# CONFIG_DEBUG_KMEMLEAK is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_VIRTUAL is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_PER_CPU_MAPS is not set
+CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+CONFIG_HAVE_ARCH_KMEMCHECK=y
+CONFIG_HAVE_ARCH_KASAN=y
+# CONFIG_KASAN is not set
+CONFIG_ARCH_HAS_KCOV=y
+# CONFIG_KCOV is not set
+# CONFIG_DEBUG_SHIRQ is not set
+
+#
+# Debug Lockups and Hangs
+#
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+# CONFIG_DETECT_HUNG_TASK is not set
+# CONFIG_WQ_WATCHDOG is not set
+# CONFIG_PANIC_ON_OOPS is not set
+CONFIG_PANIC_ON_OOPS_VALUE=0
+CONFIG_PANIC_TIMEOUT=0
+CONFIG_SCHED_DEBUG=y
+CONFIG_SCHED_INFO=y
+CONFIG_SCHEDSTATS=y
+# CONFIG_SCHED_STACK_END_CHECK is not set
+# CONFIG_DEBUG_TIMEKEEPING is not set
+CONFIG_TIMER_STATS=y
+
+#
+# Lock Debugging (spinlocks, mutexes, etc...)
+#
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_ATOMIC_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_LOCK_TORTURE_TEST is not set
+CONFIG_STACKTRACE=y
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_PI_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
+
+#
+# RCU Debugging
+#
+# CONFIG_PROVE_RCU is not set
+# CONFIG_SPARSE_RCU_POINTER is not set
+# CONFIG_TORTURE_TEST is not set
+# CONFIG_RCU_PERF_TEST is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=21
+# CONFIG_RCU_TRACE is not set
+# CONFIG_RCU_EQS_DEBUG is not set
+CONFIG_DEBUG_WQ_FORCE_RR_CPU=y
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set
+# CONFIG_NOTIFIER_ERROR_INJECTION is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
+CONFIG_HAVE_FENTRY=y
+CONFIG_HAVE_C_RECORDMCOUNT=y
+CONFIG_TRACE_CLOCK=y
+CONFIG_RING_BUFFER=y
+CONFIG_EVENT_TRACING=y
+CONFIG_CONTEXT_SWITCH_TRACER=y
+CONFIG_TRACING=y
+CONFIG_GENERIC_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
+# CONFIG_IRQSOFF_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+CONFIG_FTRACE_SYSCALLS=y
+# CONFIG_TRACER_SNAPSHOT is not set
+CONFIG_BRANCH_PROFILE_NONE=y
+# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set
+# CONFIG_PROFILE_ALL_BRANCHES is not set
+# CONFIG_STACK_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KPROBE_EVENT=y
+# CONFIG_UPROBE_EVENT is not set
+CONFIG_PROBE_EVENTS=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_DYNAMIC_FTRACE_WITH_REGS=y
+# CONFIG_FUNCTION_PROFILER is not set
+CONFIG_FTRACE_MCOUNT_RECORD=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
+# CONFIG_HIST_TRIGGERS is not set
+# CONFIG_TRACEPOINT_BENCHMARK is not set
+# CONFIG_RING_BUFFER_BENCHMARK is not set
+# CONFIG_RING_BUFFER_STARTUP_TEST is not set
+# CONFIG_TRACE_ENUM_MAP_FILE is not set
+CONFIG_TRACING_EVENTS_GPIO=y
+
+#
+# Runtime Testing
+#
+CONFIG_LKDTM=y
+# CONFIG_TEST_LIST_SORT is not set
+# CONFIG_KPROBES_SANITY_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_RBTREE_TEST is not set
+# CONFIG_INTERVAL_TREE_TEST is not set
+# CONFIG_PERCPU_TEST is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
+# CONFIG_TEST_HEXDUMP is not set
+# CONFIG_TEST_STRING_HELPERS is not set
+# CONFIG_TEST_KSTRTOX is not set
+# CONFIG_TEST_PRINTF is not set
+CONFIG_TEST_BITMAP=y
+# CONFIG_TEST_UUID is not set
+# CONFIG_TEST_RHASHTABLE is not set
+# CONFIG_TEST_HASH is not set
+CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
+# CONFIG_DMA_API_DEBUG is not set
+# CONFIG_TEST_LKM is not set
+# CONFIG_TEST_USER_COPY is not set
+# CONFIG_TEST_BPF is not set
+# CONFIG_TEST_FIRMWARE is not set
+# CONFIG_TEST_UDELAY is not set
+# CONFIG_MEMTEST is not set
+# CONFIG_TEST_STATIC_KEYS is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y
+# CONFIG_UBSAN is not set
+CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_X86_VERBOSE_BOOTUP=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_EARLY_PRINTK_DBGP=y
+# CONFIG_EARLY_PRINTK_EFI is not set
+# CONFIG_X86_PTDUMP_CORE is not set
+# CONFIG_X86_PTDUMP is not set
+# CONFIG_EFI_PGT_DUMP is not set
+# CONFIG_DEBUG_RODATA_TEST is not set
+# CONFIG_DEBUG_WX is not set
+# CONFIG_DEBUG_SET_MODULE_RONX is not set
+CONFIG_DEBUG_NX_TEST=m
+CONFIG_DOUBLEFAULT=y
+# CONFIG_DEBUG_TLBFLUSH is not set
+# CONFIG_IOMMU_STRESS is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
+# CONFIG_X86_DECODER_SELFTEST is not set
+CONFIG_IO_DELAY_TYPE_0X80=0
+CONFIG_IO_DELAY_TYPE_0XED=1
+CONFIG_IO_DELAY_TYPE_UDELAY=2
+CONFIG_IO_DELAY_TYPE_NONE=3
+CONFIG_IO_DELAY_0X80=y
+# CONFIG_IO_DELAY_0XED is not set
+# CONFIG_IO_DELAY_UDELAY is not set
+# CONFIG_IO_DELAY_NONE is not set
+CONFIG_DEFAULT_IO_DELAY_TYPE=0
+CONFIG_DEBUG_BOOT_PARAMS=y
+# CONFIG_CPA_DEBUG is not set
+CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_DEBUG_ENTRY is not set
+# CONFIG_DEBUG_NMI_SELFTEST is not set
+CONFIG_X86_DEBUG_FPU=y
+# CONFIG_PUNIT_ATOM_DEBUG is not set
+
+#
+# Security options
+#
+CONFIG_KEYS=y
+# CONFIG_PERSISTENT_KEYRINGS is not set
+# CONFIG_BIG_KEYS is not set
+# CONFIG_ENCRYPTED_KEYS is not set
+# CONFIG_KEY_DH_OPERATIONS is not set
+# CONFIG_SECURITY_DMESG_RESTRICT is not set
+CONFIG_SECURITY=y
+# CONFIG_SECURITYFS is not set
+CONFIG_SECURITY_NETWORK=y
+# CONFIG_SECURITY_PATH is not set
+# CONFIG_INTEL_TXT is not set
+CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y
+CONFIG_HAVE_ARCH_HARDENED_USERCOPY=y
+# CONFIG_HARDENED_USERCOPY is not set
+# CONFIG_SECURITY_SELINUX is not set
+# CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_SECURITY_APPARMOR is not set
+# CONFIG_SECURITY_LOADPIN is not set
+# CONFIG_SECURITY_YAMA is not set
+CONFIG_INTEGRITY=y
+# CONFIG_INTEGRITY_SIGNATURE is not set
+CONFIG_INTEGRITY_AUDIT=y
+# CONFIG_IMA is not set
+# CONFIG_EVM is not set
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEFAULT_SECURITY=""
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG=m
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_RNG_DEFAULT=m
+CONFIG_CRYPTO_AKCIPHER2=y
+CONFIG_CRYPTO_KPP2=y
+# CONFIG_CRYPTO_RSA is not set
+# CONFIG_CRYPTO_DH is not set
+# CONFIG_CRYPTO_ECDH is not set
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_USER is not set
+CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y
+# CONFIG_CRYPTO_GF128MUL is not set
+CONFIG_CRYPTO_NULL=y
+CONFIG_CRYPTO_NULL2=y
+# CONFIG_CRYPTO_PCRYPT is not set
+CONFIG_CRYPTO_WORKQUEUE=y
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_MCRYPTD is not set
+CONFIG_CRYPTO_AUTHENC=y
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_CHACHA20POLY1305 is not set
+# CONFIG_CRYPTO_SEQIV is not set
+CONFIG_CRYPTO_ECHAINIV=m
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+# CONFIG_CRYPTO_KEYWRAP is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_CMAC is not set
+CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_XCBC is not set
+CONFIG_CRYPTO_VMAC=y
+
+#
+# Digest
+#
+CONFIG_CRYPTO_CRC32C=y
+# CONFIG_CRYPTO_CRC32C_INTEL is not set
+# CONFIG_CRYPTO_CRC32 is not set
+# CONFIG_CRYPTO_CRC32_PCLMUL is not set
+# CONFIG_CRYPTO_CRCT10DIF is not set
+# CONFIG_CRYPTO_GHASH is not set
+# CONFIG_CRYPTO_POLY1305 is not set
+# CONFIG_CRYPTO_POLY1305_X86_64 is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+CONFIG_CRYPTO_SHA1=y
+# CONFIG_CRYPTO_SHA1_SSSE3 is not set
+# CONFIG_CRYPTO_SHA256_SSSE3 is not set
+# CONFIG_CRYPTO_SHA512_SSSE3 is not set
+# CONFIG_CRYPTO_SHA1_MB is not set
+# CONFIG_CRYPTO_SHA256_MB is not set
+# CONFIG_CRYPTO_SHA512_MB is not set
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+# CONFIG_CRYPTO_SHA3 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set
+
+#
+# Ciphers
+#
+CONFIG_CRYPTO_AES=y
+# CONFIG_CRYPTO_AES_X86_64 is not set
+# CONFIG_CRYPTO_AES_NI_INTEL is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+CONFIG_CRYPTO_ARC4=y
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_BLOWFISH_X86_64 is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAMELLIA_X86_64 is not set
+# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set
+# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_DES3_EDE_X86_64 is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SALSA20_X86_64 is not set
+# CONFIG_CRYPTO_CHACHA20 is not set
+# CONFIG_CRYPTO_CHACHA20_X86_64 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set
+# CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set
+# CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_TWOFISH_X86_64 is not set
+# CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set
+# CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+CONFIG_CRYPTO_LZO=y
+# CONFIG_CRYPTO_842 is not set
+# CONFIG_CRYPTO_LZ4 is not set
+# CONFIG_CRYPTO_LZ4HC is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DRBG_MENU=m
+CONFIG_CRYPTO_DRBG_HMAC=y
+# CONFIG_CRYPTO_DRBG_HASH is not set
+CONFIG_CRYPTO_DRBG=m
+CONFIG_CRYPTO_JITTERENTROPY=m
+# CONFIG_CRYPTO_USER_API_HASH is not set
+# CONFIG_CRYPTO_USER_API_SKCIPHER is not set
+# CONFIG_CRYPTO_USER_API_RNG is not set
+# CONFIG_CRYPTO_USER_API_AEAD is not set
+CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_DEV_PADLOCK is not set
+# CONFIG_CRYPTO_DEV_CCP is not set
+# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set
+# CONFIG_CRYPTO_DEV_QAT_C3XXX is not set
+# CONFIG_CRYPTO_DEV_QAT_C62X is not set
+# CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set
+# CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set
+# CONFIG_CRYPTO_DEV_QAT_C62XVF is not set
+# CONFIG_ASYMMETRIC_KEY_TYPE is not set
+
+#
+# Certificates for signature checking
+#
+CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
+CONFIG_HAVE_KVM_IRQFD=y
+CONFIG_HAVE_KVM_IRQ_ROUTING=y
+CONFIG_HAVE_KVM_EVENTFD=y
+CONFIG_KVM_MMIO=y
+CONFIG_KVM_ASYNC_PF=y
+CONFIG_HAVE_KVM_MSI=y
+CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y
+CONFIG_KVM_VFIO=y
+CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y
+CONFIG_KVM_COMPAT=y
+CONFIG_HAVE_KVM_IRQ_BYPASS=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
+# CONFIG_KVM_MMU_AUDIT is not set
+# CONFIG_KVM_DEVICE_ASSIGNMENT is not set
+# CONFIG_VHOST_NET is not set
+# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set
+CONFIG_BINARY_PRINTF=y
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+# CONFIG_HAVE_ARCH_BITREVERSE is not set
+CONFIG_RATIONAL=y
+CONFIG_GENERIC_STRNCPY_FROM_USER=y
+CONFIG_GENERIC_STRNLEN_USER=y
+CONFIG_GENERIC_NET_UTILS=y
+CONFIG_GENERIC_FIND_FIRST_BIT=y
+CONFIG_GENERIC_PCI_IOMAP=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_IO=y
+CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
+CONFIG_ARCH_HAS_FAST_MULTIPLIER=y
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC16=y
+# CONFIG_CRC_T10DIF is not set
+CONFIG_CRC_ITU_T=m
+CONFIG_CRC32=y
+# CONFIG_CRC32_SELFTEST is not set
+CONFIG_CRC32_SLICEBY8=y
+# CONFIG_CRC32_SLICEBY4 is not set
+# CONFIG_CRC32_SARWATE is not set
+# CONFIG_CRC32_BIT is not set
+# CONFIG_CRC7 is not set
+CONFIG_LIBCRC32C=y
+# CONFIG_CRC8 is not set
+# CONFIG_AUDIT_ARCH_COMPAT_GENERIC is not set
+# CONFIG_RANDOM32_SELFTEST is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_LZO_COMPRESS=y
+CONFIG_LZO_DECOMPRESS=y
+CONFIG_LZ4_DECOMPRESS=y
+CONFIG_XZ_DEC=y
+CONFIG_XZ_DEC_X86=y
+CONFIG_XZ_DEC_POWERPC=y
+CONFIG_XZ_DEC_IA64=y
+CONFIG_XZ_DEC_ARM=y
+CONFIG_XZ_DEC_ARMTHUMB=y
+CONFIG_XZ_DEC_SPARC=y
+CONFIG_XZ_DEC_BCJ=y
+# CONFIG_XZ_DEC_TEST is not set
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
+CONFIG_DECOMPRESS_XZ=y
+CONFIG_DECOMPRESS_LZO=y
+CONFIG_DECOMPRESS_LZ4=y
+CONFIG_GENERIC_ALLOCATOR=y
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+CONFIG_RADIX_TREE_MULTIORDER=y
+CONFIG_ASSOCIATIVE_ARRAY=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT_MAP=y
+CONFIG_HAS_DMA=y
+CONFIG_CHECK_SIGNATURE=y
+CONFIG_CPU_RMAP=y
+CONFIG_DQL=y
+CONFIG_GLOB=y
+# CONFIG_GLOB_SELFTEST is not set
+CONFIG_NLATTR=y
+CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y
+# CONFIG_CORDIC is not set
+# CONFIG_DDR is not set
+CONFIG_IRQ_POLL=y
+CONFIG_OID_REGISTRY=m
+CONFIG_UCS2_STRING=y
+# CONFIG_SG_SPLIT is not set
+CONFIG_SG_POOL=y
+CONFIG_ARCH_HAS_SG_CHAIN=y
+CONFIG_ARCH_HAS_PMEM_API=y
+CONFIG_ARCH_HAS_MMIO_FLUSH=y
--
2.34.1
From 2867ba805abf8e11292f5287188684e2679bdf80 Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sat, 23 Jan 2016 20:54:46 -0500
Subject: [PATCH 16/27] net-test: adds a gce-install.sh script to build and
install kernel on GCE machine
This commit adds a script to build an upstream Linux kernel and
install it and boot it on a Google Cloud (GCE) virtual machine.
Usage:
./gce-install.sh -m <MACHINE_IP>
e.g.:
./gce-install.sh -m 1.2.3.4
ssh 1.2.3.4
Tested: On GCE.
Effort: net-test
Change-Id: I149233b802202335af93183728050aadb52cca2c
---
.gitignore | 3 ++
gce-install.sh | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 92 insertions(+)
create mode 100755 gce-install.sh
diff --git a/.gitignore b/.gitignore
index 5da004814678..f09d026f4820 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,6 +154,9 @@ x509.genkey
/allrandom.config
/allyes.config
+# tmp build/install directory for /gce-install.sh:
+/gce/
+
# Kconfig savedefconfig output
/defconfig
diff --git a/gce-install.sh b/gce-install.sh
new file mode 100755
index 000000000000..1d84fe1d313e
--- /dev/null
+++ b/gce-install.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# Build a Linux kernel, install it on a remote machine, and reboot the machine.
+# This script is only known to work for Debian/Ubuntu-based GCE VMs.
+
+set -e
+
+usage() {
+ echo "gce-install.sh -m <MACHINE_IP>"
+}
+
+MACHINE=""
+VERBOSE=""
+
+while getopts "h?vm:p:z:" opt; do
+ case "${opt}" in
+ h|\?)
+ usage
+ exit 0
+ ;;
+ v)
+ VERBOSE="set -x"
+ ;;
+ m)
+ MACHINE=${OPTARG}
+ ;;
+ esac
+done
+
+if [ -z ${MACHINE} ]; then
+ usage
+ exit -1
+fi
+
+umask 022
+
+${VERBOSE}
+
+BRANCH=`git rev-parse --abbrev-ref HEAD | sed s/-/+/g`
+SHA1=`git rev-parse --short HEAD`
+LOCALVERSION=+${BRANCH}+${SHA1}+GCE
+GCE_PKG_DIR=${PWD}/gce/${LOCALVERSION}/pkg
+GCE_INSTALL_DIR=${PWD}/gce/${LOCALVERSION}/install
+GCE_BUILD_DIR=${PWD}/gce/${LOCALVERSION}/build
+KERNEL_PKG=kernel-${LOCALVERSION}.tar.gz2
+MAKE_OPTS="-j`nproc` \
+ LOCALVERSION=${LOCALVERSION} \
+ EXTRAVERSION="" \
+ INSTALL_PATH=${GCE_INSTALL_DIR}/boot \
+ INSTALL_MOD_PATH=${GCE_INSTALL_DIR}"
+
+echo "cleaning..."
+mkdir -p ${GCE_BUILD_DIR}
+mkdir -p ${GCE_INSTALL_DIR}/boot
+mkdir -p ${GCE_PKG_DIR}
+
+set +e
+echo "copying config.gce to .config ..."
+cp config.gce .config
+echo "running make olddefconfig ..."
+make olddefconfig > /tmp/make.olddefconfig
+make ${MAKE_OPTS} prepare > /tmp/make.prepare
+echo "making..."
+make ${MAKE_OPTS} > /tmp/make.default
+echo "making modules ..."
+make ${MAKE_OPTS} modules > /tmp/make.modules
+echo "making install ..."
+make ${MAKE_OPTS} install > /tmp/make.install
+echo "making modules_install ..."
+make ${MAKE_OPTS} modules_install > /tmp/make.modules_install
+set -e
+
+echo "making tarball ..."
+(cd ${GCE_INSTALL_DIR}; tar -cvzf ${GCE_PKG_DIR}/${KERNEL_PKG} boot/* lib/modules/* --owner=0 --group=0 > /tmp/make.tarball)
+
+echo "running: scp $GCE_PKG_DIR/$KERNEL_PKG ${MACHINE}:~/"
+scp ${GCE_PKG_DIR}/${KERNEL_PKG} ${MACHINE}:~/
+
+ssh ${MACHINE} "
+$VERBOSE
+sudo rm -rf /boot/*GCE /lib/modules/*GCE
+sudo tar --no-same-owner -xzvf ${KERNEL_PKG} -C / > /tmp/tar.out.txt
+cd /boot
+for v in \$(ls vmlinuz-* | sed s/vmlinuz-//g); do
+ sudo mkinitramfs -k -o initrd.img-\${v} \${v}
+done
+sudo update-grub
+sudo reboot
+"
+umask 027
--
2.34.1
From 59c67beb45a1d4d4684d06400b90109bbdb6320e Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Wed, 29 Aug 2018 10:27:59 -0400
Subject: [PATCH 17/27] net-test: scripts for testing bbr2 with upstream Linux
kernels
- runs a small set of simple tests
- sets up netem to emulate a configured network scenario
- runs /usr/bin/netperf and /usr/bin/netserver to generate traffic
- writes pcaps and ss logs
- analyzes test results
- generates graphs
Usage:
./run_tests.sh
./graph_tests.sh
Effort: net-test
Change-Id: I38662f554b3c905aa79947a2c52a2ecfe3943f8c
---
gtests/net/tcp/bbr/nsperf/graph_tests.sh | 306 ++++++++++++
gtests/net/tcp/bbr/nsperf/median.py | 43 ++
gtests/net/tcp/bbr/nsperf/nsperf.py | 540 +++++++++++++++++++++
gtests/net/tcp/bbr/nsperf/run_tests.sh | 201 ++++++++
gtests/net/tcp/bbr/nsperf/ss_log_parser.py | 193 ++++++++
5 files changed, 1283 insertions(+)
create mode 100755 gtests/net/tcp/bbr/nsperf/graph_tests.sh
create mode 100755 gtests/net/tcp/bbr/nsperf/median.py
create mode 100755 gtests/net/tcp/bbr/nsperf/nsperf.py
create mode 100755 gtests/net/tcp/bbr/nsperf/run_tests.sh
create mode 100755 gtests/net/tcp/bbr/nsperf/ss_log_parser.py
diff --git a/gtests/net/tcp/bbr/nsperf/graph_tests.sh b/gtests/net/tcp/bbr/nsperf/graph_tests.sh
new file mode 100755
index 000000000000..6277c4ffa1cc
--- /dev/null
+++ b/gtests/net/tcp/bbr/nsperf/graph_tests.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+# For all test results, generate png graphs and an HTML page linking to them.
+#
+# By default, graphs all tests:
+# ./graph_tests.sh
+# But you can also graph a subset of tests by setting the "tests"
+# environment variable:
+# tests="coexist shallow" ./graph_tests.sh
+#
+# fancier usage:
+# indir=out.180.sec/ outdir=graphs/ ./graph_tests.sh
+
+if [ "$indir" = "" ]; then
+ indir="out/"
+fi
+
+if [ "$outdir" = "" ]; then
+ outdir="graphs/"
+fi
+
+# By default graph all tests.
+# To graph a subset of tests, set the environment variable: tests="foo bar".
+if [ "$tests" = "" ]; then
+ tests="coexist random_loss shallow bufferbloat ecn_bulk"
+fi
+
+format=png
+if [ "$format" = "png" ]; then
+ PNG_SIZE="1024,768"
+ TERMINAL="set terminal pngcairo noenhanced size $PNG_SIZE"
+else
+ TERMINAL="set terminal wxt noenhanced size 1024,768"
+fi
+
+mkdir -p $outdir
+
+# Start HTML for a web page showing all png graphs we generate.
+TITLE="bbr v2 alpha upstream core tests"
+HTML_PATH="${outdir}/index.html"
+echo > $HTML_PATH
+echo "<html><title>$TITLE</title><body> <b> $TITLE </b> <br>\n" >> $HTML_PATH
+
+if [[ $tests == *"coexist"* ]]; then
+ #######
+ # show acceptable coexistence w/ cubic:
+ # graph tput of 1 cubic, 1 BBR at a range of buffer depths:
+ # (bw=50M, rtt=30ms, buf={...}xBDP)
+ rm -f $outdir/coexist.*
+ for cc_combo in cubic:1,bbr:1 cubic:1,bbr2:1; do
+ for bdp_of_buf in 0.1 1 2 4 8 16; do
+ echo -n "$bdp_of_buf " >> $outdir/coexist.${cc_combo}
+ grep THROUGHPUT $indir/coexist/${cc_combo}/${bdp_of_buf}/netperf.out.1.txt | \
+ cut -d= -f2 >> $outdir/coexist.${cc_combo}
+ done
+ done
+
+ OUTPNG="$outdir/coexist_1xcubic_1xbbr2_50M_30ms_varybuf.png"
+ OUTPUT="\n\
+set output '$OUTPNG'"
+
+ echo -e "set y2tics\n\
+ $TERMINAL $OUTPUT\n\
+ set key top left\n\
+ set ytics nomirror\n\
+ set grid\n\
+ set title 'cubic vs BBR throughput'\n\
+ set xlabel 'buffer size (as a multiple of BDP)'\n\
+ set ylabel 'throughput in Mbit/sec'\n\
+ set yrange [0:50]\n\
+ plot '$outdir/coexist.cubic:1,bbr:1' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\
+ '$outdir/coexist.cubic:1,bbr2:1' u 1:2 t 'bbr2' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\
+ \n" > $outdir/coexist.gnuplot
+
+ gnuplot -persist $outdir/coexist.gnuplot
+ echo -e "<img src='$OUTPNG'>\n" >> $HTML_PATH
+fi
+
+
+if [[ $tests == *"random_loss"* ]]; then
+ #######
+ # show high throughput with random loss up to design parameter:
+ # graph tput of cubic, bbr2 at a range of random loss rates
+ # (bw=1G, rtt=100ms, loss={...}
+ rm -f $outdir/random_loss.*
+ loss_rates="0.00001 0.0001 0.001 0.01 0.1 0.2 0.5 1 2 3 10 15 20"
+ for loss_rate in $loss_rates; do
+ for cc_name in cubic bbr bbr2; do
+ cc="${cc_name}:1"
+ sumd="$indir/random_loss/${cc}/${loss_rate}/summary/"
+ mkdir -p $sumd
+ rm -f "${sumd}/*txt"
+ for rep in `seq 1 10`; do
+ d="$indir/random_loss/${cc}/${loss_rate}/rep-${rep}"
+ grep THROUGHPUT ${d}/netperf.out.0.txt | cut -d= -f2 >> ${sumd}/THROUGHPUT.samples.txt
+ done
+ infile="${sumd}/THROUGHPUT.samples.txt" ./median.py > \
+ ${sumd}/THROUGHPUT.median.txt
+ echo -n "$loss_rate " >> $outdir/random_loss.${cc}
+ cat ${sumd}/THROUGHPUT.median.txt >> $outdir/random_loss.${cc}
+ done
+ done
+
+ OUTPNG="$outdir/random_loss_1G_100ms_varyloss.png"
+ OUTPUT="\n\
+set output '$OUTPNG'"
+
+ echo -e "set y2tics\n\
+ $TERMINAL $OUTPUT\n\
+ set key top right\n\
+ set ytics nomirror\n\
+ set grid\n\
+ set logscale x\n\
+ set title 'cubic, bbr, and bbr2 throughput with random loss'\n\
+ set xlabel 'random loss rate, in percent'\n\
+ set ylabel 'throughput in Mbit/sec'\n\
+ set yrange [0:1000]\n\
+ set xrange [:20]\n\
+ plot '$outdir/random_loss.cubic:1' u 1:2 t 'cubic' w lp lw 2 pt 7 lt rgb \"#d7191c\",\
+ '$outdir/random_loss.bbr:1' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\
+ '$outdir/random_loss.bbr2:1' u 1:2 t 'bbr2' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\
+ \n" > $outdir/random_loss.gnuplot
+
+ gnuplot -persist $outdir/random_loss.gnuplot
+ echo -e "<img src='$OUTPNG'>\n" >> $HTML_PATH
+fi
+
+
+if [[ $tests == *"shallow"* ]]; then
+ #######
+ # show reasonably low loss rates in shallow buffers:
+ # graph retransmit rate for range of flow counts
+ # (bw=1G, rtt=100ms, buf=1ms, num_flows={...})
+ # BDP is 1G*100ms = 8256 packets
+ rm -f $outdir/shallow_buf.*
+ for num_flows in 1 10 30 60 100; do
+ for cc_name in cubic bbr bbr2; do
+ echo -n "$num_flows " >> $outdir/shallow_buf.${cc_name}
+ d="$indir/shallow/${cc_name}:${num_flows}/${num_flows}"
+ infile=${d}/ss.log outdir=${d}/ ./ss_log_parser.py
+ cat ${d}/retrans.out.total.txt >> $outdir/shallow_buf.${cc_name}
+ done
+ done
+
+ OUTPNG="$outdir/shallow_buf_1G_100ms_varynumflows.png"
+ OUTPUT="\n\
+set output '$OUTPNG'"
+
+ echo -e "set y2tics\n\
+ $TERMINAL $OUTPUT\n\
+ set key top left\n\
+ set ytics nomirror\n\
+ set grid\n\
+ set logscale x\n\
+ set title 'cubic, bbr, and bbr2 retransmit rate in shallow buffers'\n\
+ set xlabel 'number of flows'\n\
+ set ylabel 'retransmit rate (percent)'\n\
+ set yrange [0:15]\n\
+ set xrange [:]\n\
+ plot '$outdir/shallow_buf.cubic' u 1:2 t 'cubic' w lp lw 2 pt 7 lt rgb \"#d7191c\",\
+ '$outdir/shallow_buf.bbr' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\
+ '$outdir/shallow_buf.bbr2' u 1:2 t 'bbr2' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\
+ \n" > $outdir/shallow_buf.gnuplot
+
+ gnuplot -persist $outdir/shallow_buf.gnuplot
+ echo -e "<img src='$OUTPNG'>\n" >> $HTML_PATH
+fi
+
+
+if [[ $tests == *"bufferbloat"* ]]; then
+ #######
+ # show low delay in deep buffers, even without ECN signal:
+ # graph p50 RTT for two flows using either cubic or bbr2,
+ # at a range of buffer depths.
+ # (bw=50M, rtt=30ms, buf={...}xBDP)
+ rm -f $outdir/bufferbloat.*
+ for bdp_of_buf in 1 10 50 100; do
+ for cc_name in cubic bbr bbr2; do
+ echo -n "$bdp_of_buf " >> $outdir/bufferbloat.${cc_name}
+ num_flows=2
+ d="$indir/bufferbloat/${cc_name}:${num_flows}/${bdp_of_buf}"
+ infile=${d}/ss.log outdir=${d}/ ./ss_log_parser.py
+ cat ${d}/rtt_p50.out.total.txt >> $outdir/bufferbloat.${cc_name}
+ done
+ done
+
+ OUTPNG="$outdir/bufferbloat_50M_30ms_varybuf.png"
+ OUTPUT="\n\
+set output '$OUTPNG'"
+
+ echo -e "set y2tics\n\
+ $TERMINAL $OUTPUT\n\
+ set key top left\n\
+ set ytics nomirror\n\
+ set grid\n\
+ set title 'cubic, bbr, and bbr2 median RTT'\n\
+ set xlabel 'buffer size (as a multiple of BDP)'\n\
+ set ylabel 'median srtt sample (ms)'\n\
+ set yrange [0:]\n\
+ set xrange [1:100]\n\
+ plot '$outdir/bufferbloat.cubic' u 1:2 t 'cubic' w lp lw 2 pt 7 lt rgb \"#d7191c\",\
+ '$outdir/bufferbloat.bbr' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\
+ '$outdir/bufferbloat.bbr2' u 1:2 t 'bbr2' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\
+ \n" > $outdir/bufferbloat.gnuplot
+
+ gnuplot -persist $outdir/bufferbloat.gnuplot
+ echo -e "<img src='$OUTPNG'>\n" >> $HTML_PATH
+fi
+
+if [[ $tests == *"ecn_bulk"* ]]; then
+ rm -f $outdir/ecn_bulk.*
+
+ #######
+ # show ECN support can keep queues very low:
+ # graph p50 for range of flow counts.
+ # (bw=1G, rtt=1ms, num_flows={...})
+ # For each CC and flow count, show the median of the p50 RTT from N trials.
+ for cc_name in dctcp bbr2 bbr; do
+ for num_flows in 1 4 10 40 100; do
+ sumd="$indir/ecn_bulk/${cc_name}/${num_flows}/summary/"
+ mkdir -p $sumd
+ rm -f "${sumd}/*txt"
+ for rep in `seq 1 10`; do
+ # Find median srtt for this rep, and add it to list
+ # of all samples.
+ d="$indir/ecn_bulk/${cc_name}/${num_flows}/rep-${rep}"
+ infile=${d}/ss.log outdir=${d}/ ./ss_log_parser.py
+ cat ${d}/rtt_p50.out.total.txt >> ${sumd}/rtt_p50.out.samples.txt
+ done
+ infile="${sumd}/rtt_p50.out.samples.txt" ./median.py > \
+ ${sumd}/rtt_p50.out.median.txt
+ echo -n "$num_flows " >> $outdir/ecn_bulk.${cc_name}
+ cat ${sumd}/rtt_p50.out.median.txt >> $outdir/ecn_bulk.${cc_name}
+ done
+ done
+
+ OUTPNG="$outdir/ecn_bulk_1G_1ms_rtt_varynumflows.png"
+ OUTPUT="\n\
+set output '$OUTPNG'"
+
+ echo -e "set y2tics\n\
+ $TERMINAL $OUTPUT\n\
+ set key top left\n\
+ set ytics nomirror\n\
+ set grid\n\
+ set logscale x\n\
+ set title 'dctcp, bbr, and bbr2 median RTT'\n\
+ set xlabel 'number of flows'\n\
+ set ylabel 'median srtt sample (ms)'\n\
+ set yrange [0:]\n\
+ set xrange [1:100]\n\
+ plot '$outdir/ecn_bulk.dctcp' u 1:2 t 'dctcp' w lp lw 2 pt 7 lt rgb \"#d7191c\",\
+ '$outdir/ecn_bulk.bbr' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\
+ '$outdir/ecn_bulk.bbr2' u 1:2 t 'bbr2' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\
+ \n" > $outdir/ecn_bulk_rtt.gnuplot
+
+ gnuplot -persist $outdir/ecn_bulk_rtt.gnuplot
+ echo -e "<img src='$OUTPNG'>\n" >> $HTML_PATH
+
+
+ #######
+ # show ECN support can keep queues very low:
+ # graph median of retrans rates across N trials:
+ for cc_name in dctcp bbr2 bbr; do
+ for num_flows in 1 4 10 40 100; do
+ sumd="$indir/ecn_bulk/${cc_name}/${num_flows}/summary/"
+ mkdir -p $sumd
+ rm -f "${sumd}/*txt"
+ for rep in `seq 1 10`; do
+ # Find overall retrans rate for this rep, and add it to list
+ # of all samples.
+ d="$indir/ecn_bulk/${cc_name}/${num_flows}/rep-${rep}"
+ cat ${d}/retrans.out.total.txt >> ${sumd}/retrans.out.samples.txt
+ done
+ infile="${sumd}/retrans.out.samples.txt" ./median.py > \
+ ${sumd}/retrans.out.median.txt
+ echo -n "$num_flows " >> $outdir/ecn_bulk.retrans.${cc_name}
+ cat ${sumd}/retrans.out.median.txt >> $outdir/ecn_bulk.retrans.${cc_name}
+ done
+ done
+
+ OUTPNG="$outdir/ecn_bulk_1G_1ms_retrans_varynumflows.png"
+ OUTPUT="\n\
+set output '$OUTPNG'"
+
+ echo -e "set y2tics\n\
+ $TERMINAL $OUTPUT\n\
+ set key top left\n\
+ set grid\n\
+ set logscale x\n\
+ set logscale y\n\
+ set title 'dctcp, bbr, and bbr2 retransmit rate'\n\
+ set xlabel 'number of flows'\n\
+ set ylabel 'retransmit rate (percent)'\n\
+ set yrange [:]\n\
+ set xrange [1:100]\n\
+ plot '$outdir/ecn_bulk.retrans.dctcp' u 1:2 t 'dctcp' axis x1y1 w lp lw 2 pt 7 lt rgb \"#d7191c\",\
+ '$outdir/ecn_bulk.retrans.bbr' u 1:2 t 'bbr' axis x1y1 w lp lw 2 pt 7 lt rgb \"#abd9e9\",\
+ '$outdir/ecn_bulk.retrans.bbr2' u 1:2 t 'bbr2' axis x1y1 w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\
+ \n" > $outdir/ecn_bulk_retrans.gnuplot
+
+ gnuplot -persist $outdir/ecn_bulk_retrans.gnuplot
+ echo -e "<img src='$OUTPNG'>\n" >> $HTML_PATH
+
+fi
+
+echo "done graphing all tests: $tests"
diff --git a/gtests/net/tcp/bbr/nsperf/median.py b/gtests/net/tcp/bbr/nsperf/median.py
new file mode 100755
index 000000000000..f09ae072e0fd
--- /dev/null
+++ b/gtests/net/tcp/bbr/nsperf/median.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+#
+# Read a file with one float per line, and print the median of all numbers.
+# Usage:
+# infile=numbers.txt ./median.py
+
+import os
+import sys
+
+def read_file():
+ """Read a file with one float per line, and return as a list of floats."""
+
+ nums = []
+
+ path = os.environ['infile']
+ f = open(path)
+
+ # Read a line, or EOF.
+ line = f.readline()
+ while True:
+ if not line:
+ return nums
+ num_str = line.strip()
+ num = float(num_str)
+ nums.append(num)
+ line = f.readline()
+
+def median(nums):
+ """Return median of all numbers."""
+
+ sorted_nums = sorted(nums)
+ n = len(sorted_nums)
+ m = n - 1
+ return (sorted_nums[n/2] + sorted_nums[m/2]) / 2.0
+
+def main():
+ """Main function to run everything."""
+ nums = read_file()
+ print('%s' % median(nums))
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/gtests/net/tcp/bbr/nsperf/nsperf.py b/gtests/net/tcp/bbr/nsperf/nsperf.py
new file mode 100755
index 000000000000..c8b5f697f0a8
--- /dev/null
+++ b/gtests/net/tcp/bbr/nsperf/nsperf.py
@@ -0,0 +1,540 @@
+#!/usr/bin/python
+#
+# Use netem, network namespaces, and veth virtual NICs
+# to run a multi-flow TCP test on a single Linux machine.
+#
+# There is one network namespace for each emulated host.
+# The emulated hosts are as follows:
+#
+# srv: server (sender)
+# srt: server router
+# mid: middle host to emulate delays and bandwidth constraints
+# crt: client router
+# cli: client (receiver)
+#
+# Most hosts have both a left ("l") and right ("r") virtual NIC.
+# The server has only an "r" NIC and the client has only an "l" NIC.
+#
+# The topology is as follows:
+#
+# +-------+ +-------+ +-------+ +-------+ +-------+
+# | srv | | srt | | mid | | crt | | cli |
+# | r +-+ l r +-+ l r +-+ l r +-+ l |
+# +-------+ +-------+ +-------+ +-------+ +-------+
+#
+# Authors:
+# Neal Cardwell
+# Soheil Hassas Yeganeh
+# Kevin (Yudong) Yang
+# Arjun Roy
+
+import os
+import os.path
+import socket
+import sys
+import threading
+import time
+
+HOSTS = ['cli', 'crt', 'mid', 'srt', 'srv']
+IP_MODE = socket.AF_INET6
+SS_INTERVAL_SECONDS = 0.1 # gather 'ss' stats each X seconds
+FIRST_PORT = 10000 # first TCP port to use
+
+# On Ubuntu 18.04.2 LTS, there are issues with the iproute2 binaries:
+# (1) the 'tc' binary has a bug and cannot parse netem random loss rates
+# (2) the 'ss' tool is missing recent socket stats
+# So to use this testing tool you may need to build your own iproute2 tools
+# from the latest iproute2 sources:
+# sudo su -
+# apt install pkg-config bison flex
+# mkdir -p /root/iproute2/
+# cd /root/iproute2
+# git clone git://git.kernel.org/pub/scm/network/iproute2/iproute2.git
+# cd iproute2/
+# ./configure
+# make
+SS_PATH = '/root/iproute2/iproute2/misc/ss'
+TC_PATH = '/root/iproute2/iproute2/tc/tc'
+
+def netperf():
+ if os.path.isfile('./netperf'):
+ return './netperf'
+ else:
+ return '/usr/bin/netperf'
+
+def netserver():
+ if os.path.isfile('./netserver'):
+ return './netserver'
+ else:
+ return '/usr/bin/netserver'
+
+def log_dir():
+ return '/tmp/'
+
+def run(cmd, verbose=True):
+ if verbose:
+ print('running: |%s|' % (cmd))
+ status = os.system(cmd)
+ if status != 0:
+ sys.stderr.write('error %d executing: %s' % (status, cmd))
+
+def cleanup():
+ """Delete all veth pairs and all network namespaces."""
+ for host in HOSTS:
+ run('( ip netns exec %(host)s ip link del dev %(host)s.l; '
+ ' ip netns exec %(host)s ip link del dev %(host)s.r; '
+ ' ip netns del %(host)s ) 2> /dev/null' % {'host' : host})
+
+def setup_logging():
+ """Set up all logging."""
+ # Zero out /var/log/kern-debug.log so that we only get our test logs.
+ run('logrotate -f /etc/logrotate.conf')
+ # Set up BBR to log with printk to /var/log/kern-debug.log.
+ run('echo Y > /sys/module/tcp_bbr2/parameters/debug_with_printk')
+ run('echo 3 > /sys/module/tcp_bbr2/parameters/flags')
+
+def setup_namespaces():
+ """Set up all network namespaces."""
+ for host in HOSTS:
+ run('ip netns add %(host)s' % {'host' : host})
+
+def setup_loopback():
+ """Set up loopback devices for all namespaces."""
+ for host in HOSTS:
+ run('ip netns exec %(host)s ifconfig lo up' % {'host' : host})
+
+def setup_veth():
+ """Set up all veth interfaces."""
+ c = ''
+ c += 'ip link add srv.r type veth peer name srt.l\n'
+ c += 'ip link add srt.r type veth peer name mid.l\n'
+ c += 'ip link add mid.r type veth peer name crt.l\n'
+ c += 'ip link add crt.r type veth peer name cli.l\n'
+
+ c += 'ip link set dev srv.r netns srv\n'
+ c += 'ip link set dev srt.r netns srt\n'
+ c += 'ip link set dev srt.l netns srt\n'
+ c += 'ip link set dev mid.r netns mid\n'
+ c += 'ip link set dev mid.l netns mid\n'
+ c += 'ip link set dev crt.l netns crt\n'
+ c += 'ip link set dev crt.r netns crt\n'
+ c += 'ip link set dev cli.l netns cli\n'
+
+ c += 'ip netns exec srv ip link set srv.r up\n'
+ c += 'ip netns exec srt ip link set srt.r up\n'
+ c += 'ip netns exec srt ip link set srt.l up\n'
+ c += 'ip netns exec mid ip link set mid.r up\n'
+ c += 'ip netns exec mid ip link set mid.l up\n'
+ c += 'ip netns exec crt ip link set crt.r up\n'
+ c += 'ip netns exec crt ip link set crt.l up\n'
+ c += 'ip netns exec cli ip link set cli.l up\n'
+
+ # Disable TSO, GSO, GRO, or else netem limit is interpreted per
+ # multi-MSS skb, not per packet on the emulated wire.
+ c += 'ip netns exec srt ethtool -K srt.r tso off gso off gro off\n'
+ c += 'ip netns exec mid ethtool -K mid.l tso off gso off gro off\n'
+ c += 'ip netns exec mid ethtool -K mid.r tso off gso off gro off\n'
+ c += 'ip netns exec srt ethtool -K crt.l tso off gso off gro off\n'
+
+ # server
+ c += 'ip netns exec srv ip addr add 192.168.0.1/24 dev srv.r\n'
+
+ # server router
+ c += 'ip netns exec srt ip addr add 192.168.0.100/24 dev srt.l\n'
+ c += 'ip netns exec srt ip addr add 192.168.1.1/24 dev srt.r\n'
+
+ # mid
+ c += 'ip netns exec mid ip addr add 192.168.1.100/24 dev mid.l\n'
+ c += 'ip netns exec mid ip addr add 192.168.2.1/24 dev mid.r\n'
+
+ # client router
+ c += 'ip netns exec crt ip addr add 192.168.2.100/24 dev crt.l\n'
+ c += 'ip netns exec crt ip addr add 192.168.3.1/24 dev crt.r\n'
+
+ # client
+ c += 'ip netns exec cli ip addr add 192.168.3.100/24 dev cli.l\n'
+
+ run(c)
+
+def setup_routes():
+ """Set up all routes."""
+ c = ''
+
+ # server
+ c += 'h=srv\n'
+ c += 'ip netns exec $h tc qdisc add dev $h.r root fq\n'
+ c += 'ip netns exec $h ip route add default via 192.168.0.100 dev $h.r\n'
+
+ # server router
+ c += 'h=srt\n'
+ c += 'ip netns exec $h ip route add default via 192.168.1.100 dev $h.r\n'
+
+ # mid
+ c += 'h=mid\n'
+ c += 'ip netns exec $h ip route add 192.168.3.0/24 via 192.168.2.100\n'
+ c += 'ip netns exec $h ip route add default via 192.168.1.1 dev $h.l\n'
+
+ # client router
+ c += 'h=crt\n'
+ c += 'ip netns exec $h ip route add default via 192.168.2.1 dev $h.l\n'
+
+ # cli
+ c += 'h=cli\n'
+ c += 'ip netns exec $h ip route add default via 192.168.3.1 dev $h.l\n'
+
+ run(c)
+
+def setup_forwarding():
+ """Enable forwarding in each namespace."""
+ for host in HOSTS:
+ run('ip netns exec %(host)s sysctl -q -w '
+ 'net.ipv4.ip_forward=1 '
+ 'net.ipv6.conf.all.forwarding=1' % {'host' : host})
+
+def netem_limit(rate, delay, buf):
+ """Get netem limit in packets.
+
+ Needs to hold the packets in emulated pipe and emulated buffer.
+ """
+ bdp_bits = (rate * 1000000.0) * (delay / 1000.0)
+ bdp_bytes = bdp_bits / 8.0
+ bdp = int(bdp_bytes / 1500.0)
+ limit = bdp + buf
+ return limit
+
+# Parse string like 'cubic:1,bbr:2' and return an array like:
+# ['cubic', 'bbr', 'bbr']
+def parse_cc_param(param_string):
+ cc_list = []
+ groups = param_string.split(',')
+ for group in groups:
+ (cc_name, count) = group.split(':')
+ count = int(count)
+ for i in range(0, count):
+ cc_list.append(cc_name)
+ return cc_list
+
+def get_params():
+ # Invocations of this tool should set the following parameters as
+ # environment variables.
+ params = {
+ 'bw': -1, # input bottleneck bw in Mbit/sec; required
+ 'rtt': -1, # RTT in ms; required
+ 'buf': -1, # input bottleneck buffer in packets; required
+ 'loss': 0, # input bottleneck loss rate in percent; optional
+ 'policer': 0, # input bottleneck policer rate, Mbit/sec; optional
+ 'cc': '', # congestion control algorithm: required
+ 'interval': 0, # interval between flow starts, in secs; optional
+ 'dur': -1, # length of test in secs: required
+ 'outdir': '', # output directory for results
+ 'qdisc': '', # qdisc at downstream bottleneck (empty for FIFO)
+ 'cmd': '', # command to run (e.g. set sysctl values)
+ 'pcap': 0, # bytes per packet to capture; 0 for no tracing
+ }
+
+ for key in params.keys():
+ print('parsing key %s' % key)
+ if key in os.environ:
+ print('looking at env var with key %s, val %s' % (key, os.environ[key]))
+ else:
+ print('no env var with key %s' % (key))
+ if key not in os.environ:
+ if params[key] != 0:
+ sys.stderr.write('missing %s in environment variables\n' % key)
+ sys.exit(1)
+ elif key == 'cc':
+ params[key] = parse_cc_param(os.environ[key])
+ elif type(params[key]) == str:
+ params[key] = os.environ[key]
+ else:
+ params[key] = float(os.environ[key])
+
+ print(params)
+ params['netperf'] = netperf()
+ params['receiver_ip'] = '192.168.3.100'
+ # 10Gbit/sec * 100ms is 125MBytes, so to tolerate
+ # high loss rates and lots of SACKed data, we use
+ # 512MByte socket send and receive buffers:
+ params['mem'] = 536870912
+ return params
+
+# Put bandwidth rate limiting using HTB, tied to user-specified
+# queuing discipline at that bottleneck, on traffic coming in the cli.l device.
+def setup_htb_and_qdisc(d):
+ """Set up HTB for rate limiting, and user-specified qdisc for the queue."""
+
+ c = ''
+
+ # First load the necessary modules.
+ c += ('rmmod ifb\n'
+ 'modprobe ifb numifbs=10\n'
+ 'modprobe act_mirred\n')
+
+ # Clear old queuing disciplines (qdisc) on the interfaces
+ d['ext'] = 'cli.l'
+ d['ext_ingress'] = 'cli.ifb0'
+ d['host'] = 'cli'
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc del dev %(ext)s root\n') % d
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc del dev %(ext)s ingress\n') % d
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc del dev %(ext_ingress)s root\n') % d
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc del dev %(ext_ingress)s ingress\n') % d
+
+ # Create ingress ifb0 on client interface.
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc add dev %(ext)s handle ffff: ingress\n') % d
+ c += ('ip netns exec %(host)s '
+ 'ip link add %(ext_ingress)s type ifb\n') % d
+ c += ('ip netns exec %(host)s '
+ 'ip link set dev %(ext_ingress)s up\n') % d
+ c += ('ip netns exec %(host)s '
+ 'ifconfig %(ext_ingress)s txqueuelen 128000\n') % d
+ c += ('ip netns exec %(host)s '
+ 'ifconfig %(ext_ingress)s\n') % d
+
+ # Forward all ingress traffic to the IFB device.
+ c += ('ip netns exec %(host)s '
+ '%(tc)s filter add dev %(ext)s parent ffff: protocol all u32 '
+ 'match u32 0 0 action mirred egress redirect '
+ 'dev %(ext_ingress)s\n') % d
+
+ # Create an egress filter on the IFB device.
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc add dev %(ext_ingress)s root handle 1: '
+ 'htb default 11\n') % d
+
+ # Add root class HTB with rate limiting.
+ c += ('ip netns exec %(host)s '
+ '%(tc)s class add dev %(ext_ingress)s parent 1: classid 1:11 '
+ ' htb rate %(IRATE)sMbit ceil %(IRATE)sMbit\n') % d
+
+ # Add qdisc for downstream bottleneck.
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc add dev %(ext_ingress)s parent 1:11 handle 20: '
+ '%(QDISC)s\n') % d
+
+ c += ('ip netns exec %(host)s %(tc)s -stat qdisc show\n') % d
+
+ return c
+
+def setup_netem(params):
+ """Set up netem on the crt (client router) host."""
+
+ d = {}
+
+ # Parameters for data direction.
+ d['IRATE'] = params['bw'] # Mbit/sec
+ d['IDELAY'] = params['rtt'] / 2 # ms
+ d['IBUF'] = params['buf'] # packets
+ d['ILOSS'] = params['loss']
+ d['IREO'] = 0 # TODO: not implemented yet
+ d['ILIMIT'] = netem_limit(rate=d['IRATE'], delay=d['IDELAY'], buf=d['IBUF'])
+ d['POLICER'] = params['policer'] # Mbit/sec
+ d['QDISC'] = params['qdisc']
+
+ # Parameters for ACK direction.
+ d['ORATE'] = 1000 # Mbit/sec; TODO: not implemented yet
+ d['ODELAY'] = params['rtt'] / 2 # ms
+ d['OBUF'] = 1000 # packets; TODO: not implemented yet
+ d['OLOSS'] = 0 # TODO: not implemented yet
+ d['OREO'] = 0 # TODO: not implemented yet
+ d['OLIMIT'] = netem_limit(rate=d['ORATE'], delay=d['ODELAY'], buf=d['OBUF'])
+
+ d['tc'] = TC_PATH
+
+ c = ''
+
+ # TODO: fix the policer mechanism to actually work...
+ if params['policer'] > 0:
+ d['host'] = 'mid'
+ c = ('ip netns exec %(host)s '
+ '%(tc)s filter list dev %(host)s.r\n'%
+ d)
+ run(c)
+
+ c = ('ip netns exec %(host)s '
+ '%(tc)s qdisc add dev %(host)s.l ingress\n' %
+ d)
+ run(c)
+
+ c = ('ip netns exec %(host)s '
+ '%(tc)s filter add dev %(host)s.l '
+ 'parent 1: protocol ip prio 10 u32 '
+ 'match ip src 192.168.0.1/32 flowid 1:2 '
+ 'action police rate %(POLICER)sMbit burst 100k drop\n' %
+ d)
+ run(c)
+ c = ''
+
+ if d['QDISC'] == '':
+ # If the user doesn't need a fancy qdisc, and FIFO will do,
+ # then use netem for rate limiting and buffering,
+ # since netem seems more accurate than HTB.
+ d['INETEM_RATE'] = 'rate %(IRATE)sMbit' % d
+ else:
+ d['INETEM_RATE'] = ''
+ d['ILIMIT'] = '%d' % (2*1000*1000*1000) # buffer is in user's qdisc
+
+ # Inbound from sender -> receiver. Downstream rate limiting is on cli.l.
+ d['host'] = 'crt'
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc add dev %(host)s.r root netem '
+ 'limit %(ILIMIT)s delay %(IDELAY)sms %(IREO)sms '
+ 'loss random %(ILOSS)s%% %(INETEM_RATE)s\n') % d
+
+ # Outbound from receiver -> sender.
+ d['host'] = 'crt'
+ c += ('ip netns exec %(host)s '
+ '%(tc)s qdisc add dev %(host)s.l root netem '
+ 'limit %(OLIMIT)s delay %(ODELAY)sms %(OREO)sms '
+ 'loss random %(OLOSS)s%% '
+ 'rate %(ORATE)sMbit\n') % d
+
+ c += ('ip netns exec %(host)s %(tc)s -stat qdisc show\n') % d
+
+ if (d['QDISC'] != ''):
+ c += setup_htb_and_qdisc(d)
+
+ run(c)
+
+def ss_log_thread(params):
+ """Repeatedly run ss command and append log to file."""
+ dur = params['dur']
+ outdir = params['outdir']
+ ss_log_path = os.path.join(outdir, 'ss.log')
+ receiver_ip = params['receiver_ip']
+ num_conns = len(params['cc'])
+
+ t0 = time.time()
+ t = t0
+ port_cnt = num_conns
+ f = open(ss_log_path, 'w')
+ f.truncate()
+ f.close()
+ if IP_MODE == socket.AF_INET6:
+ ss_ip = '[%s]'
+ else:
+ ss_ip = '%s'
+ ss_ip %= receiver_ip
+ ss_cmd = ('ip netns exec srv '
+ '%s -tinm "dport >= :%d and dport < :%d and dst %s" >> %s' % (
+ SS_PATH,
+ FIRST_PORT, FIRST_PORT + port_cnt, ss_ip, ss_log_path))
+
+ while t < t0 + dur:
+ f = open(ss_log_path, 'a')
+ f.write('# %f\n' % (time.time(),))
+ f.close()
+ run(ss_cmd, verbose=False)
+ t += SS_INTERVAL_SECONDS
+ to_sleep = t - time.time()
+ if to_sleep > 0:
+ time.sleep(to_sleep)
+
+def launch_ss(params):
+ t = threading.Thread(target=ss_log_thread, args=(params,))
+ t.start()
+ return t
+
+def run_test(params):
+ """Run one test case."""
+ print('command: %s' % (sys.argv))
+ run('uname -a; date; uptime')
+ run('grep . /sys/module/tcp_bbr2/parameters/*')
+ run('sysctl net.ipv4.tcp_ecn')
+
+ # Configure sender namespaces.
+ run('ip netns exec srv bash -c "%s"' % params['cmd'])
+
+ # Configure receiver namespace.
+ run('ip netns exec cli bash -c "%s"' % params['cmd'])
+
+ # Set up receiver process.
+ run('pkill -f netserver')
+ run('ip netns exec cli %s -N' % (netserver()))
+
+ # Set up output directory.
+ outdir = params['outdir']
+ run('mkdir -p %s' % outdir)
+
+ # Set up sender-side packet capture.
+ if params['pcap'] > 0:
+ snaplen = params['pcap']
+ path = os.path.join(outdir, 'out.pcap')
+ run('ip netns exec srv tcpdump -i srv.r -s %(snaplen)d -w %(path)s &' %
+ {'path': path, 'snaplen': snaplen})
+
+ # Set up periodic sender-side 'ss' stat capture.
+ ss_thread = launch_ss(params)
+
+ if sys.argv[1] == 'stream':
+ num_conns = len(params['cc'])
+ print('num_conns = %d' % (num_conns))
+ t0 = time.time()
+ t = t0
+ for i in range(0, num_conns):
+ conn_params = params.copy()
+ if i != num_conns - 1:
+ conn_params['bg'] = '&' # all but the last in the background
+ else:
+ conn_params['bg'] = ''
+ conn_params['cc'] = params['cc'][i]
+ conn_params['port'] = FIRST_PORT + i
+ conn_params['outfile'] = '%s/netperf.out.%d.txt' % (outdir, i)
+ run('ip netns exec srv %(netperf)s '
+ '-l %(dur)d -H %(receiver_ip)s -- -k THROUGHPUT '
+ '-s %(mem)s,%(mem)s -S %(mem)s,%(mem)s '
+ '-K %(cc)s -P %(port)s '
+ '> %(outfile)s '
+ '%(bg)s' % conn_params)
+ t += params['interval']
+ to_sleep = t - time.time()
+ if to_sleep > 0:
+ time.sleep(to_sleep)
+ elif sys.argv[1] == 'rr':
+ params['request_size'] = (10 + 20 + 40 + 80 + 160) * 1448
+ params['test'] = sys.argv[2]
+ conn_params['port'] = FIRST_PORT
+ run('ip netns exec srv %(netperf)s '
+ ' -P 0 -t %(test)s -H %(receiver_ip)s -- '
+ '-K %(cc)s -P %(port)s '
+ '-r %(request_size)d,1 '
+ '-o P50_LATENCY,P90_LATENCY,P99_LATENCY,MAX_LATENCY,'
+ 'TRANSACTION_RATE,'
+ 'LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS' % params)
+ else:
+ sys.stderr.write('unknown test type argument: %s\n' % sys.argv[1])
+ sys.exit(1)
+
+ ss_thread.join()
+ run('killall tcpdump')
+
+ run('ls -l /tmp/*.gz')
+ run('cp -af /var/log/kern-debug.log ' + outdir)
+ run('rm -f ' + outdir + '/*.gz')
+ run('ls -l /tmp/*.gz')
+ run('gzip ' + outdir + '/kern-debug.log')
+ run('gzip ' + outdir + '/out.pcap')
+ run('ls -l /tmp/*gz')
+
+def main():
+ """Main function to run everything."""
+ params = get_params()
+ cleanup()
+ setup_logging()
+ setup_namespaces()
+ setup_loopback()
+ setup_veth()
+ setup_routes()
+ setup_forwarding()
+ setup_netem(params)
+ run_test(params)
+ cleanup()
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/gtests/net/tcp/bbr/nsperf/run_tests.sh b/gtests/net/tcp/bbr/nsperf/run_tests.sh
new file mode 100755
index 000000000000..31fd028d1319
--- /dev/null
+++ b/gtests/net/tcp/bbr/nsperf/run_tests.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+#
+# Run a set of tests with bbr2, bbr, cubic, dctcp.
+# By default, runs all tests:
+# ./run_tests.sh
+# But you can also run a subset of tests by setting the "tests"
+# environment variable:
+# tests="coexist shallow" ./run_tests.sh
+#
+
+# By default run all tests.
+# To run a subset of tests, set the environment variable: tests="foo bar".
+if [ "$tests" = "" ]; then
+ tests="coexist random_loss shallow bufferbloat ecn_bulk"
+fi
+
+# Module parameters for the alpha research release of bbr2 are here:
+MOD_PARAM_DIR=/sys/module/tcp_bbr2/parameters/
+
+# Disable ECN support:
+function disable_bbr_ecn() {
+ echo 0 > $MOD_PARAM_DIR/ecn_enable
+ egrep . $MOD_PARAM_DIR/* | grep ecn_enable
+ echo 5000 > $MOD_PARAM_DIR/ecn_max_rtt_us
+ egrep . $MOD_PARAM_DIR/* | grep ecn_max_rtt_us
+}
+
+# Enable ECN support, with the understanding that all ECN signals we get
+# here will be DCTCP/L4S ECN signals:
+function enable_bbr_ecn() {
+ echo 1 > $MOD_PARAM_DIR/ecn_enable
+ egrep . $MOD_PARAM_DIR/* | grep ecn_enable
+ echo 0 > $MOD_PARAM_DIR/ecn_max_rtt_us
+ egrep . $MOD_PARAM_DIR/* | grep ecn_max_rtt_us
+}
+
+# Make sure send and receive buffers can grow quite large, e.g. for
+# bw=1G, rtt=100ms or larger.
+sysctl -w net.core.rmem_max=250000000 net.ipv4.tcp_rmem='4096 131072 250000000'
+sysctl -w net.core.wmem_max=250000000 net.ipv4.tcp_wmem='4096 16384 250000000'
+disable_bbr_ecn
+
+function get_buf_pkts() {
+ buf_pkts=`echo | awk -v bw=$bw -v rtt=$rtt -v bdp_of_buf=$bdp_of_buf '{bdp_pkts = int(bw*1000*1000*rtt/1000.0 / (1514 * 8) * bdp_of_buf); print bdp_pkts;}'`
+}
+
+if [[ $tests == *"coexist"* ]]; then
+ # show acceptable coexistence w/ cubic:
+ # graph tput of 1 cubic, 1 bbr2 at a range of buffer depths:
+ # (bw=50M, rtt=30ms, buf={...}xBDP)
+ # [run for a very long time, 10minutes, to find convergence...]
+ for cc_combo in cubic:1,bbr:1 cubic:1,bbr2:1; do
+ for bdp_of_buf in 0.1 1 2 4 8 16; do
+ cmd=""
+ cc=$cc_combo # mix of CCs in this experiment
+ interval=2 # interval between flow starts, in secs
+ bw=50 # Mbit/sec
+ rtt=30 # ms
+ qdisc='' # use netem FIFO
+ loss=0 # loss in percent
+ dur=180 # test duration in secs
+ outdir="out/coexist/${cc}/$bdp_of_buf/"
+ # Create output directory:
+ mkdir -p $outdir
+ get_buf_pkts
+ set +e
+ cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \
+ dur=$dur cmd=$cmd outdir=$outdir interval=$interval \
+ ./nsperf.py stream | tee ${outdir}/nsperf.out.txt
+ set -e
+ done
+ done
+fi
+
+if [[ $tests == *"random_loss"* ]]; then
+ # show high throughput with random loss up to design parameter:
+ # graph tput of cubic, bbr2 at a range of random loss rates
+ # (bw=1G, rtt=100ms, loss={....}
+ for rep in `seq 1 10`; do
+ for cc_name in cubic bbr2 bbr; do
+ loss_rates="0.00001 0.0001 0.001 0.01 0.1 0.2 0.5 1 2 3 10 15 20"
+ for loss_rate in $loss_rates; do
+ cmd=""
+ cc=${cc_name}:1 # 1 flow
+ interval=0 # interval between flow starts, in secs
+ bw=1000 # Mbit/sec
+ rtt=100 # ms
+ bdp_of_buf=1 # buffer = 100% of BDP, or 100ms
+ qdisc='' # use netem FIFO
+ loss=$loss_rate # loss in percent
+ dur=60 # test duration in secs
+ outdir="out/random_loss/${cc}/${loss}/rep-${rep}/"
+ # Create output directory:
+ mkdir -p $outdir
+ get_buf_pkts
+ set +e
+ cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \
+ dur=$dur cmd=$cmd outdir=$outdir interval=$interval \
+ ./nsperf.py stream | tee ${outdir}/nsperf.out.txt
+ set -e
+ done
+ done
+ done
+fi
+
+if [[ $tests == *"shallow"* ]]; then
+ # show reasonably low loss rates in shallow buffers:
+ # graph retransmit rate for range of flow counts
+ # (bw=1G, rtt=100ms, buf=1ms, num_flows={...})
+ # BDP is 1G*100ms = 8256 packets
+ for cc_name in cubic bbr2 bbr; do
+ for num_flows in 1 10 30 60 100; do
+ cmd=""
+ cc=${cc_name}:${num_flows} # all flows bbr2
+ interval=.139 # interval between flow starts, in secs
+ bw=1000 # Mbit/sec
+ rtt=100 # ms
+ bdp_of_buf=0.02 # buffer = 2% of BDP, or 2ms
+ qdisc='' # use netem FIFO
+ loss=0 # loss in percent
+ dur=300 # test duration in secs
+ outdir="out/shallow/${cc}/${num_flows}/"
+ # Create output directory:
+ mkdir -p $outdir
+ get_buf_pkts
+ set +e
+ cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \
+ dur=$dur cmd=$cmd outdir=$outdir interval=$interval \
+ ./nsperf.py stream | tee ${outdir}/nsperf.out.txt
+ set -e
+ done
+ done
+fi
+
+if [[ $tests == *"bufferbloat"* ]]; then
+ # show low delay in deep buffers, even without ECN signal:
+ # graph p50 RTT for two flows using either cubic or bbr2,
+ # at a range of buffer depths.
+ # (bw=50M, rtt=30ms, buf={...}xBDP)
+ for cc_name in cubic bbr2 bbr; do
+ for bdp_of_buf in 1 10 50 100; do
+ cmd=""
+ cc=${cc_name}:2 # 2 flows
+ interval=2 # interval between flow starts, in secs
+ bw=50 # Mbit/sec
+ rtt=30 # ms
+ qdisc='' # use netem FIFO
+ loss=0 # loss in percent
+ dur=120 # test duration in secs
+ outdir="out/bufferbloat/${cc}/${bdp_of_buf}/"
+ # Create output directory:
+ mkdir -p $outdir
+ get_buf_pkts
+ set +e
+ cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \
+ dur=$dur cmd=$cmd outdir=$outdir interval=$interval \
+ ./nsperf.py stream | tee ${outdir}/nsperf.out.txt
+ set -e
+ done
+ done
+fi
+
+
+if [[ $tests == *"ecn_bulk"* ]]; then
+ # show ECN support can keep queues very low:
+ # graph p50 and p95 RTT (and retx, tput, fairness) for range of flow counts
+ # (bw=1G, rtt=1ms, num_flows={...})
+ enable_bbr_ecn
+ for rep in `seq 1 10`; do
+ for cc_name in dctcp bbr2 bbr; do
+ for num_flows in 1 4 10 40 100; do
+ # Inside the child/test namespaces, enable ECN for
+ # both active and passive connections:
+ cmd='sysctl net.ipv4.tcp_ecn=1'
+ cc=${cc_name}:${num_flows} # all flows bbr2
+ interval=.005 # interval between flow starts, in secs
+ bw=1000 # Mbit/sec
+ rtt=1 # ms
+ buf_pkts=0 # not using netem buffer
+ # We set the limit to 1000 packets, or 12ms at 1Gbit/sec.
+ # We configure the target to be far higher, to disable
+ # Codel-based drops.
+ qdisc='codel ce_threshold 242us limit 1000 target 100ms'
+ loss=0 # loss in percent
+ dur=10 # test duration in secs
+ outdir="out/ecn_bulk/${cc_name}/${num_flows}/rep-${rep}/"
+ # Create output directory:
+ mkdir -p $outdir
+ get_buf_pkts
+ set +e
+ cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \
+ dur=$dur cmd=$cmd outdir=$outdir interval=$interval \
+ ./nsperf.py stream | tee ${outdir}/nsperf.out.txt
+ set -e
+ done
+ done
+ done
+ disable_bbr_ecn
+fi
+
+echo "done running all tests: $tests"
diff --git a/gtests/net/tcp/bbr/nsperf/ss_log_parser.py b/gtests/net/tcp/bbr/nsperf/ss_log_parser.py
new file mode 100755
index 000000000000..3717a9fc7c23
--- /dev/null
+++ b/gtests/net/tcp/bbr/nsperf/ss_log_parser.py
@@ -0,0 +1,193 @@
+#!/usr/bin/python
+#
+# Parse ss.log textual output written by ss_log_thread() in nsperf.py.
+# Usage:
+# infile=foo/ss.log outdir=out/ ss_log_parser.py
+#
+# Author:
+# Neal Cardwell
+# Based on code by:
+# Kevin (Yudong) Yang
+# Soheil Hassas Yeganeh
+
+import os
+import socket
+import sys
+import time
+
+DEBUG = False # enable debugging output?
+
+def debug(s):
+ if DEBUG:
+ print('DEBUG: %s' % s)
+
+def median(nums):
+ """Return median of all numbers."""
+
+ if len(nums) == 0:
+ return 0
+ sorted_nums = sorted(nums)
+ n = len(sorted_nums)
+ m = n - 1
+ return (sorted_nums[n/2] + sorted_nums[m/2]) / 2.0
+
+def read_file():
+ """Read the ss.log file and parse into a dictionary."""
+ all_data = {} # data for all time: <time>: time_data
+ time_data = {} # data for the current timestamp: <port>: { field: value }
+ time_secs = -1
+ ss_log_path = os.environ['infile']
+ debug('reading path: %s' % (ss_log_path))
+ f = open(ss_log_path)
+
+ # Read a timestamp line, or per-flow tuple line, or EOF.
+ line = f.readline()
+ debug('readline 1 => %s' % (line))
+ while True:
+ debug('line => %s' % (line))
+
+ # If the file is done or data for current time is done, save time data.
+ if not line or line.startswith('# ') and len(time_data):
+ debug('all_data time %d => time_data %s' %
+ (time_secs, time_data))
+ all_data[time_secs] = time_data
+ time_data = {}
+
+ if not line:
+ return all_data
+
+ # Check to see if we have data for a new point in time
+ if line.startswith('# '):
+ time_secs = float(line[2:])
+ assert time_secs > 0, time_secs
+ debug('time_secs = %s' % (time_secs))
+ # Read ss column headers ("State...")
+ line = f.readline()
+ debug('readline column headers => %s' % (line))
+ # Read next line
+ line = f.readline()
+ continue
+
+ # Parse line with 4-tuple
+ debug('readline for 4-tuple => %s' % (line))
+ if not line or line.startswith('# '):
+ continue # No live sockets with ports maching the ss query...
+ if len(line.split()) != 5:
+ sys.stderr.write('unable to find 4-tuple in: %s' % (line))
+ #print('unable to find 4-tuple in: %s' % (line))
+ sys.exit()
+ flow_data = {}
+ port = line.strip()
+ port = int(port[port.rfind(':') + 1:])
+ flow_data['port'] = port
+
+ # Read line with flow stats
+ line = f.readline()
+ debug('readline flow stats => %s' % (line))
+ assert line, 'expected flow stats for port %d' % (port)
+ stats = line.strip().split()
+ debug('stats: %s' % (stats))
+ for item in stats:
+ if item.startswith('cwnd:'):
+ flow_data['cwnd'] = int(item[item.rfind(':') + 1:])
+ elif item.startswith('bytes_acked:'):
+ flow_data['bytes_acked'] = int(item[item.rfind(':') + 1:])
+ elif item.startswith('retrans:'):
+ flow_data['retrans'] = int(item[item.rfind('/') + 1:])
+ elif item.startswith('data_segs_out:'):
+ flow_data['data_segs_out'] = int(item[item.rfind(':') + 1:])
+ elif item.startswith('rtt:'):
+ flow_data['rtt'] = (
+ float(item[item.find(':') + 1:item.rfind('/')]) / 1000
+ )
+ elif item.startswith('unacked:'):
+ flow_data['unacked'] = int(item[item.find(':') + 1:])
+ debug('time_data for time %s port %d: %s' %
+ (time_secs, port, flow_data))
+ if not 'cwnd' in flow_data:
+ sys.stderr.write('unable to find cwnd in: %s' % (line))
+ #print('unable to find cwnd in: %s' % (line))
+ sys.exit()
+ time_data[port] = flow_data
+ # Move on to the next line:
+ line = f.readline()
+
+def log_retrans_rate(all_data):
+ """Log average retransmit rate for each flow and globally."""
+ outdir = os.environ['outdir']
+
+ last_data_segs_out = {} # last data_segs_out per port
+ last_retrans = {} # last retransmitted packet count per port
+ retrans_rates = {} # maps port number to retrans rate
+ for t in sorted(all_data.keys()):
+ time_data = all_data[t]
+ for port, flow_data in time_data.items():
+ debug('port %d flow_data %s' % (port, flow_data))
+ last_data_segs_out[port] = flow_data.get('data_segs_out', 0)
+ debug('port %d last_data_segs_out=%s' %
+ (port, last_data_segs_out[port]))
+ last_retrans[port] = flow_data.get('retrans', 0)
+ debug('port %d last_retrans=' % last_retrans[port])
+
+ total_retrans = 0
+ total_data_segs_out = 0
+ for port in sorted(last_data_segs_out):
+ if last_data_segs_out[port] == 0:
+ sys.stderr.write('outdir=%s port %d: last_data_segs_out==0\n' %
+ (outdir, port))
+ retrans = 0
+ else:
+ retrans = float(last_retrans[port]) / float(last_data_segs_out[port])
+ retrans_rates[port] = retrans
+ total_retrans += last_retrans[port]
+ total_data_segs_out += last_data_segs_out[port]
+ if total_data_segs_out == 0:
+ sys.stderr.write('outdir=%s total_data_segs_out==0\n' % (outdir))
+ total_retrans_rate = 0
+ else:
+ total_retrans_rate = float(total_retrans) / float(total_data_segs_out)
+
+ # Write average retx rate for each flow, in percent.
+ i = 0
+ for port, retrans_rate in retrans_rates.items():
+ filename = 'retrans.out.%d.txt' % (i)
+ f = open(os.path.join(outdir, filename), 'w')
+ f.write('%.5f\n' % (retrans_rate * 100.0))
+ f.close()
+ i += 1
+
+ # Write average retx rate across all flows, in percent.
+ filename = 'retrans.out.total.txt'
+ f = open(os.path.join(outdir, filename), 'w')
+ f.write('%.5f\n' % (total_retrans_rate * 100.0))
+ f.close()
+
+def log_rtt(all_data):
+ """Log median srtt for all srtt samples we took from periodic ss dumps."""
+ rtts = []
+ for t in sorted(all_data.keys()):
+ time_data = all_data[t]
+ for port, flow_data in time_data.items():
+ debug('port %d flow_data %s' % (port, flow_data))
+ if 'rtt' in flow_data:
+ rtt = flow_data['rtt']
+ rtts.append(rtt)
+
+ p50_rtt = median(rtts)
+ p50_rtt = p50_rtt * 1000.0 # convert to ms
+ # Write p50 srtt sample (in secs) we took across all flows.
+ outdir = os.environ['outdir']
+ filename = 'rtt_p50.out.total.txt'
+ f = open(os.path.join(outdir, filename), 'w')
+ f.write('%s\n' % p50_rtt) # RTT in ms
+ f.close()
+
+def main():
+ """Main function to run everything."""
+ all_data = read_file()
+ log_retrans_rate(all_data)
+ log_rtt(all_data)
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
--
2.34.1
From 04fe6a14db3068391b7b8599e6edf3a47a0b2294 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 22 Jul 2019 23:18:56 -0400
Subject: [PATCH 18/27] net-tcp_bbr: v2: add a README.md for TCP BBR v2 alpha
release
Change-Id: I35a8c984e299d2af6e78c3d4b3aade5627678306
---
README.md | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 169 insertions(+)
create mode 100644 README.md
diff --git a/README.md b/README.md
new file mode 100644
index 000000000000..340d7c472bca
--- /dev/null
+++ b/README.md
@@ -0,0 +1,169 @@
+# TCP BBR v2 Alpha/Preview Release
+
+This document gives a quick overview of Google's TCP BBR v2
+alpha/preview release for Linux, and how to download, build, install,
+and test it.
+
+The TCP BBR v2 alpha/preview release is intended to enable research
+collaboration and wider testing. We encourage researchers to dive in
+and help evaluate/improve the BBR v2 algorithm and code. We welcome
+patches with good solutions to issues.
+
+This document shows how to download, build, install, and test
+a Linux kernel running TCP BBR v2 alpha.
+
+## License
+
+Like Linux TCP BBR v1, the v2 code is dual-licensed as both GPLv2.0 (like the
+Linux kernel) and BSD. You may use it under either license.
+
+## Viewing the TCP BBR v2 alpha sources
+
+You can view the current sources here:
+[tcp_bbr2.c](https://github.com/google/bbr/blob/v2alpha/net/ipv4/tcp_bbr2.c)
+
+## Obtaining kernel sources with TCP BBR v2 alpha
+
+There are two main options for downloading the code:
+
+1. To create a new git repo starting from a Linux kernel with TCP BBR v2 alpha,
+you can run:
+
+```
+git clone -o google-bbr -b v2alpha https://github.com/google/bbr.git
+cd bbr/
+```
+
+2. To download the code into an existing git repo, you can use:
+
+```
+git remote add google-bbr https://github.com/google/bbr.git
+git fetch google-bbr
+git checkout google-bbr/v2alpha
+```
+
+Note that if you already have a git repo that has imported the Linux source
+tree, then the second option will be much faster and use much less space, since
+it will only need to download the small deltas relative to the mainline Linux
+source distribution.
+
+## Building and installing the kernel
+
+To build a Linux kernel with TCP BBR v2 support, copy that kernel to a target
+(Debian or Ubuntu) test machine (bare metal or GCE), and reboot that machine,
+you can use the following script, included in the TCP BBR v2 distribution:
+
+```
+./gce-install.sh -m ${HOST}
+```
+
+## Checking the kernel installation
+
+Once the target test machine has finished rebooting, then ssh to the target
+test machine and become root with sudo or equivalent. First check that the
+machine booted the kernel you built above:
+
+```
+uname -a
+```
+
+You should see the branch name SHA1 hash, and build time stamp from the kernel
+you built above.
+
+
+Then check what congestion control modules are available with:
+```
+sysctl net.ipv4.tcp_available_congestion_control
+```
+
+You should see something like:
+```
+net.ipv4.tcp_available_congestion_control = reno bbr bbr2 cubic dctcp
+```
+
+## Install test dependencies
+
+Next, copy the test scripts to the target test machine with:
+
+```
+scp -r gtests/net/tcp/bbr/nsperf/ ${HOST}:/tmp/
+```
+
+Before running the tests for the first time, as a one-time step you'll need to
+install the dependencies on the test machine, as root:
+
+```
+mv /tmp/nsperf /root/
+apt-get install --yes python netperf gnuplot5-nox
+```
+
+The 'tc' and 'ss' binaries on some prominent distributions, including Ubuntu 18
+LTS, are out of date and buggy. To run the TCP BBR v2 test scripts, you will
+probably need to download and use the latest versions:
+
+```
+apt-get install pkg-config bison flex
+mkdir -p /root/iproute2/
+cd /root/iproute2
+git clone git://git.kernel.org/pub/scm/network/iproute2/iproute2.git
+cd iproute2/
+./configure
+make
+```
+
+## Running TCP BBR v2 tests and generating graphs
+
+To run the tests, ssh to the target test machine and become root with sudo or
+equivalent. Then run the tests and generate graphs with:
+
+```
+cd /root/nsperf
+./run_tests.sh
+./graph_tests.sh
+```
+
+This will run for hours, and place the graphs in the ./graphs/ directory.
+
+You can run and graph a subset of the tests by specifying the test by name as
+an environment variable. For example:
+
+```
+cd /root/nsperf
+tests=random_loss ./run_tests.sh
+tests=random_loss ./graph_tests.sh
+```
+
+Enjoy!
+
+## Release Notes and Details
+
+### Enabling ECN support
+
+For lab testing, researchers can enable BBRv2 ECN support with the following
+commands. This is for use when you know that any ECN marks that the connections
+experience will be DCTCP/L4S-style ECN marks, rather than RFC3168 ECN marks:
+```
+# negotiate TCP ECN for active and passive connections:
+sysctl net.ipv4.tcp_ecn=1
+# enable BBRv2 ECN response:
+echo 1 > /sys/module/tcp_bbr2/parameters/ecn_enable
+# enable BBRv2 ECN response at any RTT:
+echo 0 > /sys/module/tcp_bbr2/parameters/ecn_max_rtt_us
+```
+Production use of the BBRv2 ECN functionality depends on negotiation or
+configuration that is outside the scope of the BBRv2 alpha release.
+
+### Enabling experimental pacing approach discussed at IETF 106 ICCRG session
+
+To try the experimental pacing approach described in our IETF 106 presentation,
+you can check out the `v2alpha-experimental-pacing` branch from the Google
+BBR github repository:
+```
+git remote add google-bbr https://github.com/google/bbr.git
+git fetch google-bbr
+git checkout google-bbr/v2alpha-experimental-pacing
+```
+
+## FAQ
+
+If you have questions about BBR, check the [BBR FAQ](https://github.com/google/bbr/blob/master/Documentation/bbr-faq.md).
--
2.34.1
From 1ebc5cc0a0055ee0ec5fb9b0ceef5d4fbed36d77 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 21 Nov 2019 15:28:01 -0500
Subject: [PATCH 19/27] net-tcp_bbr: v2: remove unnecessary rs.delivered_ce
logic upon loss
There is no reason to compute rs.delivered_ce upon loss.
In fact, we specifically do not want to compute rs.delivered_ce upon loss.
Two issues:
(1) This would be the wrong thing to do, in behavior terms. With
RACK's dynamic reordering window, losses can be marked long after
the sequence hole appears in the ACK/SACK stream. We want to to
catch the ECN mark rate rising too high as quickly as possible,
which means we want to check for high ECN mark rates at ACK time
(as BBRv2 currently does) and not loss marking time.
(2) This is dead code. The ECN mark rate cannot be detected as too
high because the check needs rs->delivered to be > 0 as well:
if (rs->delivered_ce > 0 && rs->delivered > 0 &&
Since we are not setting rs->delivered upon loss, this check
cannot succeed, so setting delivered_ce is pointless.
This dead and wrong line was discovered by Randall Stewart at Netflix
as he was reading the BBRv2 code.
Change-Id: I37f83f418a259ec31d8f82de986db071b364b76a
---
net/ipv4/tcp_bbr2.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
index a6959b70e51d..e00b47850dce 100644
--- a/net/ipv4/tcp_bbr2.c
+++ b/net/ipv4/tcp_bbr2.c
@@ -2508,7 +2508,6 @@ static void bbr2_skb_marked_lost(struct sock *sk, const struct sk_buff *skb)
memset(&rs, 0, sizeof(rs));
rs.tx_in_flight = scb->tx.in_flight;
rs.lost = tp->lost - scb->tx.lost;
- rs.delivered_ce = tp->delivered_ce - scb->tx.delivered_ce;
rs.is_app_limited = scb->tx.is_app_limited;
if (bbr2_is_inflight_too_high(sk, &rs)) {
rs.tx_in_flight = bbr2_inflight_hi_from_lost_skb(sk, &rs, skb);
--
2.34.1
From f862d14080d2f53983f5417678140aaf5b51ef8b Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 16 Nov 2019 16:54:59 -0500
Subject: [PATCH 20/27] net-gbuild: add Gconfig.bbr2 to gbuild kernel with
CONFIG_TCP_CONG_BBR2=y
Change-Id: I6edbb4240c8af97e6248d85591f86d5548586d2b
---
net/ipv4/Gconfig.bbr2 | 1 +
1 file changed, 1 insertion(+)
create mode 100644 net/ipv4/Gconfig.bbr2
diff --git a/net/ipv4/Gconfig.bbr2 b/net/ipv4/Gconfig.bbr2
new file mode 100644
index 000000000000..1cdc3953f2be
--- /dev/null
+++ b/net/ipv4/Gconfig.bbr2
@@ -0,0 +1 @@
+CONFIG_TCP_CONG_BBR2=y
--
2.34.1
From aa5718aaea0df59629a98736cdbf5e4c16c27e8a Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 17 Aug 2020 19:08:41 -0400
Subject: [PATCH 21/27] net-tcp_bbr: v2: remove field bw_rtts that is unused in
BBRv2
Change-Id: I58e3346c707748a6f316f3ed060d2da84c32a79b
---
net/ipv4/tcp_bbr2.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
index e00b47850dce..6121cd46f81a 100644
--- a/net/ipv4/tcp_bbr2.c
+++ b/net/ipv4/tcp_bbr2.c
@@ -179,9 +179,8 @@ struct bbr {
min_rtt_win_sec:5, /* max allowed value: 31 */
probe_rtt_mode_ms:9, /* max allowed value: 511 */
full_bw_cnt:3, /* max allowed value: 7 */
- bw_rtts:5, /* max allowed value: 31 */
cwnd_tso_budget:1, /* allowed values: {0, 1} */
- unused3:1,
+ unused3:6,
drain_to_target:1, /* boolean */
precise_ece_ack:1, /* boolean */
extra_acked_in_startup:1, /* allowed values: {0, 1} */
@@ -237,8 +236,6 @@ struct bbr_context {
u32 log:1;
};
-/* Window length of bw filter (in rounds). Max allowed value is 31 (0x1F) */
-static int bbr_bw_rtts = CYCLE_LEN + 2;
/* Window length of min_rtt filter (in sec). Max allowed value is 31 (0x1F) */
static u32 bbr_min_rtt_win_sec = 10;
/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode.
@@ -390,7 +387,6 @@ static bool bbr_usage_based_cwnd; /* default: disabled */
*/
static bool bbr_ecn_enable = false;
-module_param_named(bw_rtts, bbr_bw_rtts, int, 0644);
module_param_named(min_tso_rate, bbr_min_tso_rate, int, 0644);
module_param_named(tso_rtt_shift, bbr_tso_rtt_shift, int, 0644);
module_param_named(high_gain, bbr_high_gain, int, 0644);
@@ -1247,7 +1243,6 @@ static void bbr_init(struct sock *sk)
bbr->params.min_rtt_win_sec = min(0x1FU, bbr_min_rtt_win_sec);
bbr->params.probe_rtt_mode_ms = min(0x1FFU, bbr_probe_rtt_mode_ms);
bbr->params.full_bw_cnt = min(0x7U, bbr_full_bw_cnt);
- bbr->params.bw_rtts = min(0x1F, bbr_bw_rtts);
bbr->params.full_bw_thresh = min(0x3FFU, bbr_full_bw_thresh);
bbr->params.extra_acked_gain = min(0x7FF, bbr_extra_acked_gain);
bbr->params.extra_acked_win_rtts = min(0x1FU, bbr_extra_acked_win_rtts);
--
2.34.1
From 416700eb80d584802cfe40c4199f914485832fa0 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 17 Aug 2020 19:10:21 -0400
Subject: [PATCH 22/27] net-tcp_bbr: v2: remove cycle_rand parameter that is
unused in BBRv2
Change-Id: Iee1df7e41e42de199068d7c89131ed3d228327c0
---
net/ipv4/tcp_bbr2.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
index 6121cd46f81a..57b4abebb275 100644
--- a/net/ipv4/tcp_bbr2.c
+++ b/net/ipv4/tcp_bbr2.c
@@ -303,8 +303,6 @@ static int bbr_pacing_gain[] = {
BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */
BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */
};
-/* Randomize the starting gain cycling phase over N phases: */
-static u32 bbr_cycle_rand = 7;
/* Try to keep at least this many packets in flight, if things go smoothly. For
* smooth functioning, a sliding window protocol ACKing every other packet
@@ -395,7 +393,6 @@ module_param_named(startup_cwnd_gain, bbr_startup_cwnd_gain, int, 0644);
module_param_named(cwnd_gain, bbr_cwnd_gain, int, 0644);
module_param_array_named(pacing_gain, bbr_pacing_gain, int,
&bbr_pacing_gain_size, 0644);
-module_param_named(cycle_rand, bbr_cycle_rand, uint, 0644);
module_param_named(cwnd_min_target, bbr_cwnd_min_target, uint, 0644);
module_param_named(probe_rtt_cwnd_gain,
bbr_probe_rtt_cwnd_gain, uint, 0664);
--
2.34.1
From 2547331871679f48054cd712131b519792d7cef6 Mon Sep 17 00:00:00 2001
From: Jason Xing <kerneljasonxing@gmail.com>
Date: Thu, 10 Sep 2020 09:02:18 -0400
Subject: [PATCH 23/27] net-test: use crt namespace when nsperf disables crt.l
TSO/GSO/GRO
Changing the 'srt' to 'crt' is the right way to disable the features of
the crt.l.
Signed-off-by: Jason Xing <kerneljasonxing@gmail.com>
---
gtests/net/tcp/bbr/nsperf/nsperf.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gtests/net/tcp/bbr/nsperf/nsperf.py b/gtests/net/tcp/bbr/nsperf/nsperf.py
index c8b5f697f0a8..ed107c2fa245 100755
--- a/gtests/net/tcp/bbr/nsperf/nsperf.py
+++ b/gtests/net/tcp/bbr/nsperf/nsperf.py
@@ -134,7 +134,7 @@ def setup_veth():
c += 'ip netns exec srt ethtool -K srt.r tso off gso off gro off\n'
c += 'ip netns exec mid ethtool -K mid.l tso off gso off gro off\n'
c += 'ip netns exec mid ethtool -K mid.r tso off gso off gro off\n'
- c += 'ip netns exec srt ethtool -K crt.l tso off gso off gro off\n'
+ c += 'ip netns exec crt ethtool -K crt.l tso off gso off gro off\n'
# server
c += 'ip netns exec srv ip addr add 192.168.0.1/24 dev srv.r\n'
--
2.34.1
From 2955af01a0000c8e29e3202d401acadc0b6e6409 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 28 Dec 2020 19:23:09 -0500
Subject: [PATCH 24/27] net-tcp_bbr: v2: don't assume prior_cwnd was set
entering CA_Loss
Fix WARN_ON_ONCE() warnings that were firing and pointing to a
bbr->prior_cwnd of 0 when exiting CA_Loss and transitioning to
CA_Open.
The issue was that tcp_simple_retransmit() calls:
tcp_set_ca_state(sk, TCP_CA_Loss);
without first calling icsk_ca_ops->ssthresh(sk) (because
tcp_simple_retransmit() is dealing with losses due to MTU issues and
not congestion). The lack of this callback means that BBR did not get
a chance to set bbr->prior_cwnd, and thus upon exiting CA_Loss in such
cases the WARN_ON_ONCE() would fire due to a zero bbr->prior_cwnd.
This commit removes that warning, since a bbr->prior_cwnd of 0 is a
valid situation in this state transition.
For setting inflight_lo upon entering CA_Loss, to avoid setting an
inflight_lo of 0 in this case, this commit switches to taking the max
of cwnd and prior_cwnd. We plan to remove that line of code when we
switch to cautious (PRR-style) recovery, so that awkwardness will go
away.
Change-Id: I575dce871c2f20e91e3e9449e1706f42a07b8118
---
net/ipv4/tcp_bbr2.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
index 57b4abebb275..5510adc92bbb 100644
--- a/net/ipv4/tcp_bbr2.c
+++ b/net/ipv4/tcp_bbr2.c
@@ -2617,15 +2617,12 @@ static void bbr2_set_state(struct sock *sk, u8 new_state)
/* bbr_adapt_lower_bounds() needs cwnd before
* we suffered an RTO, to update inflight_lo:
*/
- WARN_ON_ONCE(bbr->prior_cwnd == 0);
- WARN_ON_ONCE(bbr->prior_cwnd == ~0U);
- bbr->inflight_lo = bbr->prior_cwnd;
+ bbr->inflight_lo =
+ max(tp->snd_cwnd, bbr->prior_cwnd);
}
bbr_debug(sk, 0, &rs, &ctx);
} else if (bbr->prev_ca_state == TCP_CA_Loss &&
new_state != TCP_CA_Loss) {
- WARN_ON_ONCE(bbr->prior_cwnd == 0);
- WARN_ON_ONCE(bbr->prior_cwnd == ~0U);
tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
bbr->try_fast_path = 0; /* bound cwnd using latest model */
}
--
2.34.1
From 81f6b1d79f9bccdfb2c10cea7930ccf6e18ce30f Mon Sep 17 00:00:00 2001
From: Adithya Abraham Philip <abrahamphilip@google.com>
Date: Fri, 11 Jun 2021 21:56:10 +0000
Subject: [PATCH 25/27] net-tcp_bbr: v2: Fix missing ECT markings on
retransmits for BBRv2
Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to
indicate that retransmitted packets and pure ACKs must have the
ECT bit set. This is a necessary fix for BBRv2, which when using
ECN expects ECT to be set even on retransmitted packets and ACKs.
Currently CCAs like BBRv2 which can use ECN but don't "need" it
do not have a way to indicate that ECT should be set on
retransmissions/ACKs.
Signed-off-by: Adithya Abraham Philip <abrahamphilip@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
---
include/net/tcp.h | 1 +
net/ipv4/tcp_bbr2.c | 3 +++
net/ipv4/tcp_output.c | 3 ++-
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b65268d3a3c2..a8868e515e13 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -375,6 +375,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk,
#define TCP_ECN_QUEUE_CWR 2
#define TCP_ECN_DEMAND_CWR 4
#define TCP_ECN_SEEN 8
+#define TCP_ECN_ECT_PERMANENT 16
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
index 5510adc92bbb..fa49e17c47ca 100644
--- a/net/ipv4/tcp_bbr2.c
+++ b/net/ipv4/tcp_bbr2.c
@@ -2471,6 +2471,9 @@ static void bbr2_init(struct sock *sk)
bbr->alpha_last_delivered_ce = 0;
tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode);
+
+ if ((tp->ecn_flags & TCP_ECN_OK) && bbr_ecn_enable)
+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT;
}
/* Core TCP stack informs us that the given skb was just marked lost. */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5284249f4689..a9ceec2702b2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -375,7 +375,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
th->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
}
- } else if (!tcp_ca_needs_ecn(sk)) {
+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) &&
+ !tcp_ca_needs_ecn(sk)) {
/* ACK or retransmitted segment: clear ECT|CE */
INET_ECN_dontxmit(sk);
}
--
2.34.1
From 5d6b1c62321349f5ec6361f4f698f3c1e03e8f20 Mon Sep 17 00:00:00 2001
From: Mubashir Adnan Qureshi <mubashirq@google.com>
Date: Wed, 20 Jul 2022 00:11:26 +0000
Subject: [PATCH 26/27] net-tcp_bbr: v2: add support for PLB in TCP and BBRv2
PLB (Protective Load Balancing) is a host based mechanism for load
balancing across switch links. It leverages congestion signals(e.g. ECN)
from transport layer to randomly change the path of the connection
experiencing congestion. PLB changes the path of the connection by
changing the outgoing IPv6 flow label for IPv6 connections (implemented
in Linux by calling sk_rethink_txhash()). Because of this implementation
mechanism, PLB can currently only work for IPv6 traffic. For more
information, see the SIGCOMM 2022 paper:
https://doi.org/10.1145/3544216.3544226
Congestion control algorithms track PLB state and cause the connection
to trigger a path change when either of the 2 conditions is satisfied:
- No packets are in flight and (# consecutive congested rounds >=
sysctl_tcp_plb_idle_rehash_rounds)
- (# consecutive congested rounds >= sysctl_tcp_plb_rehash_rounds)
A round (RTT) is marked as congested when congestion signal
(ECN ce_ratio) over an RTT is greater than sysctl_tcp_plb_cong_thresh.
In the event of RTO, PLB (via tcp_write_timeout()) triggers a path
change and disables congestion-triggered path changes for random time
between (sysctl_tcp_plb_suspend_rto_sec, 2*sysctl_tcp_plb_suspend_rto_sec)
to avoid hopping onto the "connectivity blackhole". RTO-triggered
path changes can still happen during this cool-off period.
Change-Id: I5d0fb3ab55b27b506b0cf32bc93df892b5336c2c
---
Documentation/networking/ip-sysctl.rst | 58 ++++++++++++++
include/linux/tcp.h | 3 +
include/net/inet_connection_sock.h | 2 +-
include/net/netns/ipv4.h | 5 ++
include/net/tcp.h | 17 +++++
include/uapi/linux/snmp.h | 1 +
net/ipv4/Makefile | 2 +-
net/ipv4/proc.c | 1 +
net/ipv4/sysctl_net_ipv4.c | 43 +++++++++++
net/ipv4/tcp_bbr2.c | 28 +++++--
net/ipv4/tcp_ipv4.c | 7 ++
net/ipv4/tcp_plb.c | 100 +++++++++++++++++++++++++
12 files changed, 260 insertions(+), 7 deletions(-)
create mode 100644 net/ipv4/tcp_plb.c
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index e7b3fa7bb3f7..03a85332d5d1 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1069,6 +1069,64 @@ tcp_child_ehash_entries - INTEGER
Default: 0
+tcp_plb_enabled - BOOLEAN
+ If set, TCP PLB (Protective Load Balancing) is enabled. PLB is
+ described in the following paper:
+ https://doi.org/10.1145/3544216.3544226. Based on PLB parameters,
+ upon sensing sustained congestion, TCP triggers a change in
+ flow label field for outgoing IPv6 packets. A change in flow label
+ field potentially changes the path of outgoing packets for switches
+ that use ECMP/WCMP for routing.
+
+ Default: 0
+
+tcp_plb_cong_thresh - INTEGER
+ Fraction of packets marked with congestion over a round (RTT) to
+ tag that round as congested. This is referred to as K in the PLB paper:
+ https://doi.org/10.1145/3544216.3544226.
+
+ The 0-1 fraction range is mapped to 0-256 range to avoid floating
+ point operations. For example, 128 means that if at least 50% of
+ the packets in a round were marked as congested then the round
+ will be tagged as congested.
+
+ Possible Values: 0 - 256
+
+ Default: 128
+
+tcp_plb_idle_rehash_rounds - INTEGER
+ Number of consecutive congested rounds (RTT) seen after which
+ a rehash can be performed, given there are no packets in flight.
+ This is referred to as M in PLB paper:
+ https://doi.org/10.1145/3544216.3544226.
+
+ Possible Values: 0 - 31
+
+ Default: 3
+
+tcp_plb_rehash_rounds - INTEGER
+ Number of consecutive congested rounds (RTT) seen after which
+ a forced rehash can be performed. Be careful when setting this
+ parameter, as a small value increases the risk of retransmissions.
+ This is referred to as N in PLB paper:
+ https://doi.org/10.1145/3544216.3544226.
+
+ Possible Values: 0 - 31
+
+ Default: 12
+
+tcp_plb_suspend_rto_sec - INTEGER
+ Time, in seconds, to suspend PLB in event of an RTO. In order to avoid
+ having PLB repath onto a connectivity "black hole", after an RTO a TCP
+ connection suspends PLB repathing for a random duration between 1x and
+ 2x of this parameter. Randomness is added to avoid concurrent rehashing
+ of multiple TCP connections. This should be set corresponding to the
+ amount of time it takes to repair a failed link.
+
+ Possible Values: 0 - 255
+
+ Default: 60
+
UDP variables
=============
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d8f94ef1a297..85a47c049662 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -450,6 +450,9 @@ struct tcp_sock {
*/
struct request_sock __rcu *fastopen_rsk;
struct saved_syn *saved_syn;
+
+/* Rerouting information */
+ u16 ecn_rehash; /* PLB triggered rehash attempts */
};
enum tsq_enum {
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 1387e67a017f..30cc3b859a06 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -136,7 +136,7 @@ struct inet_connection_sock {
u32 icsk_user_timeout;
/* XXX inflated by temporary internal debugging info */
-#define ICSK_CA_PRIV_SIZE (216)
+#define ICSK_CA_PRIV_SIZE (224)
u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)];
};
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1b8004679445..78d7ced36f94 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -183,6 +183,11 @@ struct netns_ipv4 {
unsigned long tfo_active_disable_stamp;
u32 tcp_challenge_timestamp;
u32 tcp_challenge_count;
+ u8 sysctl_tcp_plb_enabled;
+ int sysctl_tcp_plb_cong_thresh;
+ u8 sysctl_tcp_plb_idle_rehash_rounds;
+ u8 sysctl_tcp_plb_rehash_rounds;
+ u8 sysctl_tcp_plb_suspend_rto_sec;
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a8868e515e13..46e4b0da345d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2171,6 +2171,23 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
extern void tcp_rack_reo_timeout(struct sock *sk);
extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
+/* tcp_plb.c */
+
+#define TCP_PLB_SCALE 8 /* scaling factor for fractions in PLB (e.g. ce_ratio) */
+
+/* State for PLB (Protective Load Balancing) for a single TCP connection. */
+struct tcp_plb_state {
+ u8 consec_cong_rounds:5, /* consecutive congested rounds */
+ enabled:1, /* Check if PLB is enabled */
+ unused:2;
+ u32 pause_until; /* jiffies32 when PLB can resume repathing */
+};
+
+void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
+ const int cong_ratio);
+void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb);
+void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb);
+
/* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk)
{
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 4d7470036a8b..8ce035f1c874 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -292,6 +292,7 @@ enum
LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */
LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */
LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */
+ LINUX_MIB_TCPECNREHASH, /* TCPECNRehash */
__LINUX_MIB_MAX
};
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 8dee1547d820..e7a86a50838a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
tcp_rate.o tcp_recovery.o tcp_ulp.o \
- tcp_offload.o datagram.o raw.o udp.o udplite.o \
+ tcp_offload.o tcp_plb.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 5386f460bd20..2c33c19b2423 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -297,6 +297,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
+ SNMP_MIB_ITEM("TCPECNRehash", LINUX_MIB_TCPECNREHASH),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9b8a6db7a66b..74a2e916175e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -40,6 +40,8 @@ static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
+static int tcp_plb_max_rounds = 31;
+static int tcp_plb_max_cong_thresh = 256;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -1384,6 +1386,47 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
+ {
+ .procname = "tcp_plb_enabled",
+ .data = &init_net.ipv4.sysctl_tcp_plb_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "tcp_plb_cong_thresh",
+ .data = &init_net.ipv4.sysctl_tcp_plb_cong_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_plb_max_cong_thresh,
+ },
+ {
+ .procname = "tcp_plb_idle_rehash_rounds",
+ .data = &init_net.ipv4.sysctl_tcp_plb_idle_rehash_rounds,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra2 = &tcp_plb_max_rounds,
+ },
+ {
+ .procname = "tcp_plb_rehash_rounds",
+ .data = &init_net.ipv4.sysctl_tcp_plb_rehash_rounds,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra2 = &tcp_plb_max_rounds,
+ },
+ {
+ .procname = "tcp_plb_suspend_rto_sec",
+ .data = &init_net.ipv4.sysctl_tcp_plb_suspend_rto_sec,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ },
{ }
};
diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c
index fa49e17c47ca..09319695da82 100644
--- a/net/ipv4/tcp_bbr2.c
+++ b/net/ipv4/tcp_bbr2.c
@@ -167,6 +167,7 @@ struct bbr {
initialized:1; /* has bbr_init() been called? */
u32 alpha_last_delivered; /* tp->delivered at alpha update */
u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */
+ struct tcp_plb_state plb;
/* Params configurable using setsockopt. Refer to correspoding
* module param for detailed description of params.
@@ -733,7 +734,11 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- if (event == CA_EVENT_TX_START && tp->app_limited) {
+ if (event == CA_EVENT_TX_START) {
+ tcp_plb_check_rehash(sk, &bbr->plb);
+
+ if (!tp->app_limited)
+ return;
bbr->idle_restart = 1;
bbr->ack_epoch_mstamp = tp->tcp_mstamp;
bbr->ack_epoch_acked = 0;
@@ -1389,7 +1394,7 @@ static void bbr2_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio)
}
}
-static void bbr2_update_ecn_alpha(struct sock *sk)
+static int bbr2_update_ecn_alpha(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
@@ -1398,14 +1403,14 @@ static void bbr2_update_ecn_alpha(struct sock *sk)
u32 gain;
if (bbr->params.ecn_factor == 0)
- return;
+ return -1;
delivered = tp->delivered - bbr->alpha_last_delivered;
delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce;
if (delivered == 0 || /* avoid divide by zero */
WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */
- return;
+ return -1;
/* See if we should use ECN sender logic for this connection. */
if (!bbr->ecn_eligible && bbr_ecn_enable &&
@@ -1424,6 +1429,7 @@ static void bbr2_update_ecn_alpha(struct sock *sk)
bbr->alpha_last_delivered_ce = tp->delivered_ce;
bbr2_check_ecn_too_high_in_startup(sk, ce_ratio);
+ return (int)ce_ratio;
}
/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */
@@ -2238,6 +2244,7 @@ static void bbr2_main(struct sock *sk, const struct rate_sample *rs)
struct bbr_context ctx = { 0 };
bool update_model = true;
u32 bw;
+ int ce_ratio = -1;
bbr->debug.event = '.'; /* init to default NOP (no event yet) */
@@ -2245,7 +2252,9 @@ static void bbr2_main(struct sock *sk, const struct rate_sample *rs)
if (bbr->round_start) {
bbr->rounds_since_probe =
min_t(s32, bbr->rounds_since_probe + 1, 0xFF);
- bbr2_update_ecn_alpha(sk);
+ ce_ratio = bbr2_update_ecn_alpha(sk);
+ tcp_plb_update_state(sk, &bbr->plb, ce_ratio);
+ tcp_plb_check_rehash(sk, &bbr->plb);
}
bbr->ecn_in_round |= rs->is_ece;
@@ -2408,6 +2417,7 @@ static void bbr2_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
+ const struct net *net = sock_net(sk);
bbr_init(sk); /* run shared init code for v1 and v2 */
@@ -2470,6 +2480,13 @@ static void bbr2_init(struct sock *sk)
bbr->alpha_last_delivered = 0;
bbr->alpha_last_delivered_ce = 0;
+ bbr->plb.enabled = 0;
+ bbr->plb.consec_cong_rounds = 0;
+ bbr->plb.pause_until = 0;
+ if ((tp->ecn_flags & TCP_ECN_OK) &&
+ net->ipv4.sysctl_tcp_plb_enabled)
+ bbr->plb.enabled = 1;
+
tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode);
if ((tp->ecn_flags & TCP_ECN_OK) && bbr_ecn_enable)
@@ -2614,6 +2631,7 @@ static void bbr2_set_state(struct sock *sk, u8 new_state)
struct rate_sample rs = { .losses = 1 };
struct bbr_context ctx = { 0 };
+ tcp_plb_update_state_upon_rto(sk, &bbr->plb);
bbr->prev_ca_state = TCP_CA_Loss;
bbr->full_bw = 0;
if (!bbr2_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index da46357f501b..e05fde56cddf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3207,6 +3207,13 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+ /* Set default values for PLB */
+ net->ipv4.sysctl_tcp_plb_enabled = 0; /* Disabled by default */
+ net->ipv4.sysctl_tcp_plb_cong_thresh = 128; /* 50% congestion */
+ net->ipv4.sysctl_tcp_plb_idle_rehash_rounds = 3;
+ net->ipv4.sysctl_tcp_plb_rehash_rounds = 12;
+ net->ipv4.sysctl_tcp_plb_suspend_rto_sec = 60;
+
/* Reno is always built in */
if (!net_eq(net, &init_net) &&
bpf_try_module_get(init_net.ipv4.tcp_congestion_control,
diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c
new file mode 100644
index 000000000000..71b02c0404ce
--- /dev/null
+++ b/net/ipv4/tcp_plb.c
@@ -0,0 +1,100 @@
+/* Protective Load Balancing (PLB)
+ *
+ * PLB was designed to reduce link load imbalance across datacenter
+ * switches. PLB is a host-based optimization; it leverages congestion
+ * signals from the transport layer to randomly change the path of the
+ * connection experiencing sustained congestion. PLB prefers to repath
+ * after idle periods to minimize packet reordering. It repaths by
+ * changing the IPv6 Flow Label on the packets of a connection, which
+ * datacenter switches include as part of ECMP/WCMP hashing.
+ *
+ * PLB is described in detail in:
+ *
+ * Mubashir Adnan Qureshi, Yuchung Cheng, Qianwen Yin, Qiaobin Fu,
+ * Gautam Kumar, Masoud Moshref, Junhua Yan, Van Jacobson,
+ * David Wetherall,Abdul Kabbani:
+ * "PLB: Congestion Signals are Simple and Effective for
+ * Network Load Balancing"
+ * In ACM SIGCOMM 2022, Amsterdam Netherlands.
+ *
+ */
+
+#include <net/tcp.h>
+
+/* Called once per round-trip to update PLB state for a connection. */
+void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
+ const int cong_ratio)
+{
+ struct net *net = sock_net(sk);
+
+ if (!plb->enabled)
+ return;
+
+ if (cong_ratio >= 0) {
+ if (cong_ratio < net->ipv4.sysctl_tcp_plb_cong_thresh)
+ plb->consec_cong_rounds = 0;
+ else if (plb->consec_cong_rounds <
+ net->ipv4.sysctl_tcp_plb_rehash_rounds)
+ plb->consec_cong_rounds++;
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_plb_update_state);
+
+/* Check whether recent congestion has been persistent enough to warrant
+ * a load balancing decision that switches the connection to another path.
+ */
+void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb)
+{
+ struct net *net = sock_net(sk);
+ bool can_idle_rehash, can_force_rehash;
+
+ if (!plb->enabled)
+ return;
+
+ /* Note that tcp_jiffies32 can wrap, so we clear pause_until
+ * to 0 to indicate there is no recent RTO event that constrains
+ * PLB rehashing.
+ */
+ if (plb->pause_until &&
+ !before(tcp_jiffies32, plb->pause_until))
+ plb->pause_until = 0;
+
+ can_idle_rehash = net->ipv4.sysctl_tcp_plb_idle_rehash_rounds &&
+ !tcp_sk(sk)->packets_out &&
+ plb->consec_cong_rounds >=
+ net->ipv4.sysctl_tcp_plb_idle_rehash_rounds;
+ can_force_rehash = plb->consec_cong_rounds >=
+ net->ipv4.sysctl_tcp_plb_rehash_rounds;
+
+ if (!plb->pause_until && (can_idle_rehash || can_force_rehash)) {
+ sk_rethink_txhash(sk);
+ plb->consec_cong_rounds = 0;
+ tcp_sk(sk)->ecn_rehash++;
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPECNREHASH);
+ }
+}
+EXPORT_SYMBOL_GPL(tcp_plb_check_rehash);
+
+/* Upon RTO, disallow load balancing for a while, to avoid having load
+ * balancing decisions switch traffic to a black-holed path that was
+ * previously avoided with a sk_rethink_txhash() call at RTO time.
+ */
+void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb)
+{
+ struct net *net = sock_net(sk);
+ u32 pause;
+
+ if (!plb->enabled)
+ return;
+
+ pause = net->ipv4.sysctl_tcp_plb_suspend_rto_sec * HZ;
+ pause += prandom_u32_max(pause);
+ plb->pause_until = tcp_jiffies32 + pause;
+
+ /* Reset PLB state upon RTO, since an RTO causes a sk_rethink_txhash() call
+ * that may switch this connection to a path with completely different
+ * congestion characteristics.
+ */
+ plb->consec_cong_rounds = 0;
+}
+EXPORT_SYMBOL_GPL(tcp_plb_update_state_upon_rto);
--
2.34.1
From bba01f8fcecc39dd89274d977508dd50ba620f65 Mon Sep 17 00:00:00 2001
From: Mubashir Adnan Qureshi <mubashirq@google.com>
Date: Thu, 21 Jul 2022 19:03:30 +0000
Subject: [PATCH 27/27] net-test: tcp: plb: Add PLB tests
Add tests to check the following operations of PLB
1. Forced rehashing
2. Idle rehashing when no packets in flight
3. PLB state reset upon RTO
Change-Id: If74c7fb0107c8f9a4f740dcb2e488c73dd6cc08d
---
.../net/tcp/plb/bbr2-ecn-plb-idle-rehash.pkt | 59 +++++++
gtests/net/tcp/plb/bbr2-ecn-plb-rehash.pkt | 146 ++++++++++++++++++
.../tcp/plb/bbr2-ecn-plb-rto-suspend-off.pkt | 71 +++++++++
3 files changed, 276 insertions(+)
create mode 100644 gtests/net/tcp/plb/bbr2-ecn-plb-idle-rehash.pkt
create mode 100644 gtests/net/tcp/plb/bbr2-ecn-plb-rehash.pkt
create mode 100644 gtests/net/tcp/plb/bbr2-ecn-plb-rto-suspend-off.pkt
diff --git a/gtests/net/tcp/plb/bbr2-ecn-plb-idle-rehash.pkt b/gtests/net/tcp/plb/bbr2-ecn-plb-idle-rehash.pkt
new file mode 100644
index 000000000000..706fbf72a66c
--- /dev/null
+++ b/gtests/net/tcp/plb/bbr2-ecn-plb-idle-rehash.pkt
@@ -0,0 +1,59 @@
+// Verify BBR v2 changes flowlabel in ipv6 header when there are
+// net.ipv4.tcp_plb_idle_rehash_rounds=3 consecutive rounds with the ECN mark rate >=
+// net.ipv4.tcp_plb_cong_thresh=64(25%) and there are no packets in flight.
+
+`../common/defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1 # fully enabled
+sysctl -q net.ipv4.tcp_congestion_control=bbr2
+sysctl -q net.ipv4.tcp_plb_enabled=1
+sysctl -q net.ipv4.tcp_plb_cong_thresh=64
+sysctl -q net.ipv4.tcp_plb_idle_rehash_rounds=3
+sysctl -q net.ipv4.tcp_plb_rehash_rounds=12
+echo 1 > /sys/module/tcp_bbr2/parameters/ecn_enable
+echo 0 > /sys/module/tcp_bbr2/parameters/ecn_max_rtt_us
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) ack 1 win 32792 <mss 1012,sackOK,TS val 200 ecr 0,nop,wscale 7>
+ +0 > (flowlabel 0x1) SE. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 200,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) P. 1:10001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 10001 win 1000 <nop,nop,TS val 200 ecr 100>
+// no ECN mark, flowlabel won't change.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) P. 10001:20001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 17001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 20001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 1 congested round
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 20001:30001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 27001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 30001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 2 consecutive congested rounds
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 30001:40001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 37001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 40001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 3 consecutive congested round
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x1) PW. 40001:41001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 41001 win 1000 <nop,nop,TS val 200 ecr 100>
+// This ACK started a new round. PLB observes that past three rounds were
+// congested and no packets are in flight so it changes the flowlabel.
+
+ +0 write(4, ..., 10000) = 10000
+// As expected, packets of idle write carry new flow label
+ +0 > (flowlabel 0x2) P. 41001:51001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 51001 win 1000 <nop,nop,TS val 200 ecr 100>
diff --git a/gtests/net/tcp/plb/bbr2-ecn-plb-rehash.pkt b/gtests/net/tcp/plb/bbr2-ecn-plb-rehash.pkt
new file mode 100644
index 000000000000..af500ef08768
--- /dev/null
+++ b/gtests/net/tcp/plb/bbr2-ecn-plb-rehash.pkt
@@ -0,0 +1,146 @@
+// Verify BBR v2 changes flowlabel in ipv6 header if and only if there are
+// net.ipv4.tcp_plb_rehash_rounds=3 consecutive rounds with the ECN mark rate >=
+// net.ipv4.tcp_plb_cong_thresh=64(25%).
+
+`../common/defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1 # fully enabled
+sysctl -q net.ipv4.tcp_congestion_control=bbr2
+sysctl -q net.ipv4.tcp_plb_enabled=1
+sysctl -q net.ipv4.tcp_plb_cong_thresh=64
+sysctl -q net.ipv4.tcp_plb_idle_rehash_rounds=0
+sysctl -q net.ipv4.tcp_plb_rehash_rounds=3
+echo 1 > /sys/module/tcp_bbr2/parameters/ecn_enable
+echo 0 > /sys/module/tcp_bbr2/parameters/ecn_max_rtt_us
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) ack 1 win 32792 <mss 1012,sackOK,TS val 200 ecr 0,nop,wscale 7>
+ +0 > (flowlabel 0x1) SE. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 200,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) P. 1:10001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 10001 win 1000 <nop,nop,TS val 200 ecr 100>
+// no ECN mark, flowlabel won't change.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) P. 10001:20001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 19001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 20001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 1/10 packets ECN-marked. Not enough.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 20001:30001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 28001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 30001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 2/10 packets ECN-marked. Not enough.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 30001:40001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 37001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 40001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, but only 1 round.
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x1) PW. 40001:41001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 41001 win 1000 <nop,nop,TS val 200 ecr 100>
+// no ECN mark, flowlabel won't change.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) P. 41001:51001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 48001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 51001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, but only 1 round.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 51001:61001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 58001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 61001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 2 consecutive rounds.
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x1) PW. 61001:62001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 62001 win 1000 <nop,nop,TS val 200 ecr 100>
+// no ECN mark, flowlabel won't change.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) P. 62001:72001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 69001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 72001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, but only 1 round.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 72001:82001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 79001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 82001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 2 consecutive rounds.
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x1) PW. 82001:83001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 83001 win 1000 <nop,nop,TS val 200 ecr 100>
+// no ECN mark, flowlabel won't change.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) P. 83001:93001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 90001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 93001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, but only 1 round.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 93001:103001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 100001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 103001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 2 consecutive rounds.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x1) PW. 103001:113001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 110001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 113001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 3 consecutive rounds.
+
+ +0 write(4, ..., 10000) = 10000
+// Since a new round haven't started, flowlabel is not updated.
+ +0 > (flowlabel 0x1) PW. 113001:123001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 120001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 123001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 1 consecutive rounds.
+
+ +0 write(4, ..., 10000) = 10000
+// Verify the new flowlabel
+ +0 > (flowlabel 0x2) PW. 123001:133001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 130001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 133001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 2 consecutive rounds.
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > (flowlabel 0x2) PW. 133001:143001(10000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 140001 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 < E. 1:1(0) ack 143001 win 1000 <nop,nop,TS val 200 ecr 100>
+// 3/10 packets ECN-marked, >25%, 3 consecutive rounds. flowlabel will change in a new round.
+
+ +0 write(4, ..., 1000) = 1000
+// Since a new round haven't started, flowlabel is not updated.
+ +0 > (flowlabel 0x2) PW. 143001:144001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 144001 win 1000 <nop,nop,TS val 200 ecr 100>
+// A new round starts after the ack above, new flowlabel assigned from here.
+
+// Verify the new flowlabel
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x3) P. 144001:145001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 145001 win 1000 <nop,nop,TS val 200 ecr 100>
+
+// Verify the new flowlabel sticks and does't change again
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x3) P. 145001:146001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 146001 win 1000 <nop,nop,TS val 200 ecr 100>
+
+// Verify the new flowlabel sticks and does't change again
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x3) P. 146001:147001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 147001 win 1000 <nop,nop,TS val 200 ecr 100>
diff --git a/gtests/net/tcp/plb/bbr2-ecn-plb-rto-suspend-off.pkt b/gtests/net/tcp/plb/bbr2-ecn-plb-rto-suspend-off.pkt
new file mode 100644
index 000000000000..d46e759e57e5
--- /dev/null
+++ b/gtests/net/tcp/plb/bbr2-ecn-plb-rto-suspend-off.pkt
@@ -0,0 +1,71 @@
+// Verify BBR v2 changes flowlabel in ipv6 header when there are
+// net.ipv4.tcp_plb_idle_rehash_rounds=3 consecutive rounds with
+// the ECN mark rate >= net.ipv4.tcp_plb_cong_thresh=64(25%) after
+// an RTO resets PLB state. This test confirms that PLB state is reset
+// after an RTO. TLP is disabled for this test.
+
+`../common/defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1 # fully enabled
+sysctl -q net.ipv4.tcp_congestion_control=bbr2
+sysctl -q net.ipv4.tcp_plb_enabled=1
+sysctl -q net.ipv4.tcp_plb_cong_thresh=64
+sysctl -q net.ipv4.tcp_plb_idle_rehash_rounds=0
+sysctl -q net.ipv4.tcp_plb_rehash_rounds=3
+sysctl -q net.ipv4.tcp_plb_suspend_rto_sec=0
+sysctl -q net.ipv4.tcp_early_retrans=0
+sysctl -q net.ipv4.tcp_recovery=0
+echo 1 > /sys/module/tcp_bbr2/parameters/ecn_enable
+echo 0 > /sys/module/tcp_bbr2/parameters/ecn_max_rtt_us
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) ack 1 win 32792 <mss 1012,sackOK,TS val 200 ecr 0,nop,wscale 7>
+ +0 > (flowlabel 0x1) SE. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 200,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 1000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x1) P. 1:1001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < E. 1:1(0) ack 1001 win 1000 <nop,nop,TS val 200 ecr 100>
+// ECN mark, 1 congested round
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x1) PW. 1001:2001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < E. 1:1(0) ack 2001 win 1000 <nop,nop,TS val 200 ecr 100>
+// ECN mark, 2 consecutive congested rounds.
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x1) PW. 2001:3001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+// Flowlabel should be changed after kernel retransmit
++.2~+.22 > (flowlabel 0x2) P. 2001:3001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < E. 1:1(0) ack 3001 win 1000 <nop,nop,TS val 200 ecr 100>
+// PLB state should reset, 1 congested round
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x2) PW. 3001:4001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < E. 1:1(0) ack 4001 win 1000 <nop,nop,TS val 200 ecr 100>
+// ECN mark, 2 consecutive congested rounds.
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x2) PW. 4001:5001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < E. 1:1(0) ack 5001 win 1000 <nop,nop,TS val 200 ecr 100>
+// ECN mark, 3 consecutive congested rounds. Flowlabel should change in next round.
+
+ +0 write(4, ..., 1000) = 1000
+ +0 > (flowlabel 0x3) PW. 5001:6001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 6001 win 1000 <nop,nop,TS val 200 ecr 100>
+
+ +0 write(4, ..., 1000) = 1000
+// Verify new flow label
+ +0 > (flowlabel 0x3) P. 6001:7001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 7001 win 1000 <nop,nop,TS val 200 ecr 100>
+
+ +0 write(4, ..., 1000) = 1000
+// Verify new flow label sticks
+ +0 > (flowlabel 0x3) P. 7001:8001(1000) ack 1 <nop,nop,TS val 100 ecr 200>
+ +0 < . 1:1(0) ack 8001 win 1000 <nop,nop,TS val 200 ecr 100>
--
2.34.1