diff --git a/6.10/target/linux/generic/hack-6.10/200-tools_portability.patch b/6.10/target/linux/generic/hack-6.10/200-tools_portability.patch deleted file mode 100644 index 5d2b20dc..00000000 --- a/6.10/target/linux/generic/hack-6.10/200-tools_portability.patch +++ /dev/null @@ -1,97 +0,0 @@ -From a7ae4ed0a3951c45d4a59ee575951b64ae4a23fb Mon Sep 17 00:00:00 2001 -From: Felix Fietkau -Date: Tue, 7 May 2024 12:22:15 +0200 -Subject: [PATCH] kernel: fix tools build breakage on macos with x86 - -Signed-off-by: Felix Fietkau ---- ---- a/tools/scripts/Makefile.include -+++ b/tools/scripts/Makefile.include -@@ -72,8 +72,6 @@ $(call allow-override,CXX,$(CROSS_COMPIL - $(call allow-override,STRIP,$(CROSS_COMPILE)strip) - endif - --CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) -- - ifneq ($(LLVM),) - HOSTAR ?= $(LLVM_PREFIX)llvm-ar$(LLVM_SUFFIX) - HOSTCC ?= $(LLVM_PREFIX)clang$(LLVM_SUFFIX) -@@ -84,6 +82,9 @@ HOSTCC ?= gcc - HOSTLD ?= ld - endif - -+CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) -+HOSTCC_NO_CLANG := $(shell $(HOSTCC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?) -+ - # Some tools require Clang, LLC and/or LLVM utils - CLANG ?= clang - LLC ?= llc -@@ -92,8 +93,9 @@ LLVM_OBJCOPY ?= llvm-objcopy - LLVM_STRIP ?= llvm-strip - - ifeq ($(CC_NO_CLANG), 1) --EXTRA_WARNINGS += -Wstrict-aliasing=3 -- -+ ifeq ($(HOSTCC_NO_CLANG), 1) -+ EXTRA_WARNINGS += -Wstrict-aliasing=3 -+ endif - else ifneq ($(CROSS_COMPILE),) - # Allow userspace to override CLANG_CROSS_FLAGS to specify their own - # sysroots and flags or to avoid the GCC call in pure Clang builds. ---- a/tools/include/linux/types.h -+++ b/tools/include/linux/types.h -@@ -56,6 +56,7 @@ typedef __s8 s8; - #define __user - #endif - #define __must_check -+#undef __cold - #define __cold - - typedef __u16 __bitwise __le16; ---- a/tools/objtool/include/objtool/objtool.h -+++ b/tools/objtool/include/objtool/objtool.h -@@ -12,6 +12,7 @@ - - #include - -+#undef __weak - #define __weak __attribute__((weak)) - - struct pv_state { ---- a/tools/include/asm-generic/bitops/fls.h -+++ b/tools/include/asm-generic/bitops/fls.h -@@ -2,6 +2,8 @@ - #ifndef _ASM_GENERIC_BITOPS_FLS_H_ - #define _ASM_GENERIC_BITOPS_FLS_H_ - -+#include -+ - /** - * fls - find last (most-significant) bit set - * @x: the word to search -@@ -10,6 +12,7 @@ - * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. - */ - -+#define fls __linux_fls - static __always_inline int fls(unsigned int x) - { - int r = 32; ---- a/tools/lib/string.c -+++ b/tools/lib/string.c -@@ -96,6 +96,7 @@ int strtobool(const char *s, bool *res) - * If libc has strlcpy() then that version will override this - * implementation: - */ -+#ifndef __APPLE__ - #ifdef __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wignored-attributes" -@@ -114,6 +115,7 @@ size_t __weak strlcpy(char *dest, const - #ifdef __clang__ - #pragma clang diagnostic pop - #endif -+#endif - - /** - * skip_spaces - Removes leading whitespace from @str. diff --git a/6.10/target/linux/generic/hack-6.10/420-mtd-support-OpenWrt-s-MTD_ROOTFS_ROOT_DEV.patch b/6.10/target/linux/generic/hack-6.10/420-mtd-support-OpenWrt-s-MTD_ROOTFS_ROOT_DEV.patch deleted file mode 100644 index c32d8ec1..00000000 --- a/6.10/target/linux/generic/hack-6.10/420-mtd-support-OpenWrt-s-MTD_ROOTFS_ROOT_DEV.patch +++ /dev/null @@ -1,24 +0,0 @@ -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= -Date: Mon, 7 Nov 2022 23:48:24 +0100 -Subject: [PATCH] mtd: support OpenWrt's MTD_ROOTFS_ROOT_DEV -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This allows setting ROOT_DEV to MTD partition named "rootfs". - -Signed-off-by: Rafał Miłecki ---- - ---- a/drivers/mtd/mtdcore.c -+++ b/drivers/mtd/mtdcore.c -@@ -801,7 +801,8 @@ int add_mtd_device(struct mtd_info *mtd) - - mutex_unlock(&mtd_table_mutex); - -- if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { -+ if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs") || -+ (IS_ENABLED(CONFIG_MTD_ROOTFS_ROOT_DEV) && !strcmp(mtd->name, "rootfs") && ROOT_DEV == 0)) { - if (IS_BUILTIN(CONFIG_MTD)) { - pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name); - ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); diff --git a/6.10/target/linux/generic/hack-6.10/700-swconfig_switch_drivers.patch b/6.10/target/linux/generic/hack-6.10/700-swconfig_switch_drivers.patch deleted file mode 100644 index 4428ebbb..00000000 --- a/6.10/target/linux/generic/hack-6.10/700-swconfig_switch_drivers.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 36e516290611e613aa92996cb4339561452695b4 Mon Sep 17 00:00:00 2001 -From: Felix Fietkau -Date: Fri, 7 Jul 2017 17:24:23 +0200 -Subject: net: swconfig: adds openwrt switch layer - -Signed-off-by: Felix Fietkau ---- - drivers/net/phy/Kconfig | 83 +++++++++++++++++++++++++++++++++++++++++++++++ - drivers/net/phy/Makefile | 15 +++++++++ - include/uapi/linux/Kbuild | 1 + - 3 files changed, 99 insertions(+) - ---- a/drivers/net/phy/Kconfig -+++ b/drivers/net/phy/Kconfig -@@ -66,6 +66,80 @@ config SFP - depends on HWMON || HWMON=n - select MDIO_I2C - -+comment "Switch configuration API + drivers" -+ -+config SWCONFIG -+ tristate "Switch configuration API" -+ help -+ Switch configuration API using netlink. This allows -+ you to configure the VLAN features of certain switches. -+ -+config SWCONFIG_LEDS -+ bool "Switch LED trigger support" -+ depends on (SWCONFIG && LEDS_TRIGGERS) -+ -+config ADM6996_PHY -+ tristate "Driver for ADM6996 switches" -+ select SWCONFIG -+ help -+ Currently supports the ADM6996FC and ADM6996M switches. -+ Support for FC is very limited. -+ -+config AR8216_PHY -+ tristate "Driver for Atheros AR8216/8327 switches" -+ select SWCONFIG -+ select ETHERNET_PACKET_MANGLE -+ -+config AR8216_PHY_LEDS -+ bool "Atheros AR8216 switch LED support" -+ depends on (AR8216_PHY && LEDS_CLASS) -+ -+source "drivers/net/phy/b53/Kconfig" -+ -+config IP17XX_PHY -+ tristate "Driver for IC+ IP17xx switches" -+ select SWCONFIG -+ -+config PSB6970_PHY -+ tristate "Lantiq XWAY Tantos (PSB6970) Ethernet switch" -+ select SWCONFIG -+ -+config RTL8306_PHY -+ tristate "Driver for Realtek RTL8306S switches" -+ select SWCONFIG -+ -+config RTL8366_SMI -+ tristate "Driver for the RTL8366 SMI interface" -+ depends on GPIOLIB -+ help -+ This module implements the SMI interface protocol which is used -+ by some RTL8366 ethernet switch devices via the generic GPIO API. -+ -+if RTL8366_SMI -+ -+config RTL8366_SMI_DEBUG_FS -+ bool "RTL8366 SMI interface debugfs support" -+ depends on DEBUG_FS -+ default n -+ -+config RTL8366S_PHY -+ tristate "Driver for the Realtek RTL8366S switch" -+ select SWCONFIG -+ -+config RTL8366RB_PHY -+ tristate "Driver for the Realtek RTL8366RB switch" -+ select SWCONFIG -+ -+config RTL8367_PHY -+ tristate "Driver for the Realtek RTL8367R/M switches" -+ select SWCONFIG -+ -+config RTL8367B_PHY -+ tristate "Driver fot the Realtek RTL8367R-VB switch" -+ select SWCONFIG -+ -+endif # RTL8366_SMI -+ - comment "MII PHY device drivers" - - config AIR_EN8811H_PHY ---- a/drivers/net/phy/Makefile -+++ b/drivers/net/phy/Makefile -@@ -26,6 +26,21 @@ libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_ - obj-$(CONFIG_PHYLINK) += phylink.o - obj-$(CONFIG_PHYLIB) += libphy.o - -+obj-$(CONFIG_SWCONFIG) += swconfig.o -+obj-$(CONFIG_ADM6996_PHY) += adm6996.o -+obj-$(CONFIG_AR8216_PHY) += ar8xxx.o -+ar8xxx-y += ar8216.o -+ar8xxx-y += ar8327.o -+obj-$(CONFIG_SWCONFIG_B53) += b53/ -+obj-$(CONFIG_IP17XX_PHY) += ip17xx.o -+obj-$(CONFIG_PSB6970_PHY) += psb6970.o -+obj-$(CONFIG_RTL8306_PHY) += rtl8306.o -+obj-$(CONFIG_RTL8366_SMI) += rtl8366_smi.o -+obj-$(CONFIG_RTL8366S_PHY) += rtl8366s.o -+obj-$(CONFIG_RTL8366RB_PHY) += rtl8366rb.o -+obj-$(CONFIG_RTL8367_PHY) += rtl8367.o -+obj-$(CONFIG_RTL8367B_PHY) += rtl8367b.o -+ - obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += mii_timestamper.o - - obj-$(CONFIG_SFP) += sfp.o ---- a/include/linux/platform_data/b53.h -+++ b/include/linux/platform_data/b53.h -@@ -29,6 +29,9 @@ struct b53_platform_data { - u32 chip_id; - u16 enabled_ports; - -+ /* allow to specify an ethX alias */ -+ const char *alias; -+ - /* only used by MMAP'd driver */ - unsigned big_endian:1; - void __iomem *regs; diff --git a/6.10/target/linux/generic/hack-6.10/721-net-add-packet-mangeling.patch b/6.10/target/linux/generic/hack-6.10/721-net-add-packet-mangeling.patch deleted file mode 100644 index e1d4367a..00000000 --- a/6.10/target/linux/generic/hack-6.10/721-net-add-packet-mangeling.patch +++ /dev/null @@ -1,167 +0,0 @@ -From ffe387740bbe88dd88bbe04d6375902708003d6e Mon Sep 17 00:00:00 2001 -From: Felix Fietkau -Date: Fri, 7 Jul 2017 17:25:00 +0200 -Subject: net: add packet mangeling - -ar8216 switches have a hardware bug, which renders normal 802.1q support -unusable. Packet mangling is required to fix up the vlan for incoming -packets. - -Signed-off-by: Felix Fietkau ---- - include/linux/netdevice.h | 11 +++++++++++ - include/linux/skbuff.h | 14 ++++---------- - net/Kconfig | 6 ++++++ - net/core/dev.c | 20 +++++++++++++++----- - net/core/skbuff.c | 17 +++++++++++++++++ - net/ethernet/eth.c | 6 ++++++ - 6 files changed, 59 insertions(+), 15 deletions(-) - ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -1758,6 +1758,7 @@ enum netdev_priv_flags { - IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), - IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), - IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), -+ IFF_NO_IP_ALIGN = BIT_ULL(34), - }; - - #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN -@@ -1791,6 +1792,7 @@ enum netdev_priv_flags { - #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE - #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER - #define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR -+#define IFF_NO_IP_ALIGN IFF_NO_IP_ALIGN - - /* Specifies the type of the struct net_device::ml_priv pointer */ - enum netdev_ml_priv_type { -@@ -2183,6 +2185,11 @@ struct net_device { - const struct tlsdev_ops *tlsdev_ops; - #endif - -+#ifdef CONFIG_ETHERNET_PACKET_MANGLE -+ void (*eth_mangle_rx)(struct net_device *dev, struct sk_buff *skb); -+ struct sk_buff *(*eth_mangle_tx)(struct net_device *dev, struct sk_buff *skb); -+#endif -+ - const struct header_ops *header_ops; - - unsigned char operstate; -@@ -2256,6 +2263,10 @@ struct net_device { - struct mctp_dev __rcu *mctp_ptr; - #endif - -+#ifdef CONFIG_ETHERNET_PACKET_MANGLE -+ void *phy_ptr; /* PHY device specific data */ -+#endif -+ - /* - * Cache lines mostly used on receive path (including eth_type_trans()) - */ ---- a/include/linux/skbuff.h -+++ b/include/linux/skbuff.h -@@ -3095,6 +3095,10 @@ static inline int pskb_trim(struct sk_bu - return (len < skb->len) ? __pskb_trim(skb, len) : 0; - } - -+extern struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, -+ unsigned int length, gfp_t gfp); -+ -+ - /** - * pskb_trim_unique - remove end from a paged unique (not cloned) buffer - * @skb: buffer to alter -@@ -3260,16 +3264,6 @@ static inline struct sk_buff *dev_alloc_ - } - - --static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, -- unsigned int length, gfp_t gfp) --{ -- struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); -- -- if (NET_IP_ALIGN && skb) -- skb_reserve(skb, NET_IP_ALIGN); -- return skb; --} -- - static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, - unsigned int length) - { ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -26,6 +26,12 @@ menuconfig NET - - if NET - -+config ETHERNET_PACKET_MANGLE -+ bool -+ help -+ This option can be selected by phy drivers that need to mangle -+ packets going in or out of an ethernet device. -+ - config WANT_COMPAT_NETLINK_MESSAGES - bool - help ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -3597,6 +3597,11 @@ static int xmit_one(struct sk_buff *skb, - if (dev_nit_active(dev)) - dev_queue_xmit_nit(skb, dev); - -+#ifdef CONFIG_ETHERNET_PACKET_MANGLE -+ if (dev->eth_mangle_tx && !(skb = dev->eth_mangle_tx(dev, skb))) -+ return NETDEV_TX_OK; -+#endif -+ - len = skb->len; - trace_net_dev_start_xmit(skb, dev); - rc = netdev_start_xmit(skb, dev, txq, more); ---- a/net/core/skbuff.c -+++ b/net/core/skbuff.c -@@ -62,6 +62,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -844,6 +845,22 @@ skb_fail: - } - EXPORT_SYMBOL(__napi_alloc_skb); - -+struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, -+ unsigned int length, gfp_t gfp) -+{ -+ struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); -+ -+#ifdef CONFIG_ETHERNET_PACKET_MANGLE -+ if (dev && (dev->priv_flags & IFF_NO_IP_ALIGN)) -+ return skb; -+#endif -+ -+ if (NET_IP_ALIGN && skb) -+ skb_reserve(skb, NET_IP_ALIGN); -+ return skb; -+} -+EXPORT_SYMBOL(__netdev_alloc_skb_ip_align); -+ - void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize) - { ---- a/net/ethernet/eth.c -+++ b/net/ethernet/eth.c -@@ -159,6 +159,12 @@ __be16 eth_type_trans(struct sk_buff *sk - const struct ethhdr *eth; - - skb->dev = dev; -+ -+#ifdef CONFIG_ETHERNET_PACKET_MANGLE -+ if (dev->eth_mangle_rx) -+ dev->eth_mangle_rx(dev, skb); -+#endif -+ - skb_reset_mac_header(skb); - - eth = (struct ethhdr *)skb->data; diff --git a/6.10/target/linux/generic/hack-6.10/765-mxl-gpy-control-LED-reg-from-DT.patch b/6.10/target/linux/generic/hack-6.10/765-mxl-gpy-control-LED-reg-from-DT.patch deleted file mode 100644 index 51a03be2..00000000 --- a/6.10/target/linux/generic/hack-6.10/765-mxl-gpy-control-LED-reg-from-DT.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 94b90966095f3fa625897e8f53d215882f6e19b3 Mon Sep 17 00:00:00 2001 -From: David Bauer -Date: Sat, 11 Mar 2023 17:00:01 +0100 -Subject: [PATCH] mxl-gpy: control LED reg from DT - -Add dynamic configuration for the LED control registers on MXL PHYs. - -This patch has been tested with MaxLinear GPY211C. It is unlikely to be -accepted upstream, as upstream plans on integrating their own framework -for handling these LEDs. - -For the time being, use this hack to configure PHY driven device-LEDs to -show the correct state. - -A possible alternative might be to expose the LEDs using the kernel LED -framework and bind it to the netdevice. This might also be upstreamable, -although it is a considerable extra amount of work. - -Signed-off-by: David Bauer ---- - drivers/net/phy/mxl-gpy.c | 37 ++++++++++++++++++++++++++++++++++++- - 1 file changed, 36 insertions(+), 1 deletion(-) - ---- a/drivers/net/phy/mxl-gpy.c -+++ b/drivers/net/phy/mxl-gpy.c -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -38,6 +39,7 @@ - #define PHY_MIISTAT 0x18 /* MII state */ - #define PHY_IMASK 0x19 /* interrupt mask */ - #define PHY_ISTAT 0x1A /* interrupt status */ -+#define PHY_LED 0x1B /* LED control */ - #define PHY_FWV 0x1E /* firmware version */ - - #define PHY_MIISTAT_SPD_MASK GENMASK(2, 0) -@@ -61,10 +63,15 @@ - PHY_IMASK_ADSC | \ - PHY_IMASK_ANC) - -+#define PHY_LED_NUM_LEDS 4 -+ - #define PHY_FWV_REL_MASK BIT(15) - #define PHY_FWV_MAJOR_MASK GENMASK(11, 8) - #define PHY_FWV_MINOR_MASK GENMASK(7, 0) - -+/* LED */ -+#define VSPEC1_LED(x) (0x1 + x) -+ - #define PHY_PMA_MGBT_POLARITY 0x82 - #define PHY_MDI_MDI_X_MASK GENMASK(1, 0) - #define PHY_MDI_MDI_X_NORMAL 0x3 -@@ -260,6 +267,35 @@ out: - return ret; - } - -+static int gpy_led_write(struct phy_device *phydev) -+{ -+ struct device_node *node = phydev->mdio.dev.of_node; -+ u32 led_regs[PHY_LED_NUM_LEDS]; -+ int i, ret; -+ u16 val = 0xff00; -+ -+ if (!IS_ENABLED(CONFIG_OF_MDIO)) -+ return 0; -+ -+ if (of_property_read_u32_array(node, "mxl,led-config", led_regs, PHY_LED_NUM_LEDS)) -+ return 0; -+ -+ if (of_property_read_bool(node, "mxl,led-drive-vdd")) -+ val &= 0x0fff; -+ -+ /* Enable LED function handling on all ports*/ -+ phy_write(phydev, PHY_LED, val); -+ -+ /* Write LED register values */ -+ for (i = 0; i < PHY_LED_NUM_LEDS; i++) { -+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_LED(i), (u16)led_regs[i]); -+ if (ret < 0) -+ return ret; -+ } -+ -+ return 0; -+} -+ - static int gpy_config_init(struct phy_device *phydev) - { - int ret; -@@ -271,7 +307,10 @@ static int gpy_config_init(struct phy_de - - /* Clear all pending interrupts */ - ret = phy_read(phydev, PHY_ISTAT); -- return ret < 0 ? ret : 0; -+ if (ret < 0) -+ return ret; -+ -+ return gpy_led_write(phydev); - } - - static int gpy_probe(struct phy_device *phydev) diff --git a/6.10/target/linux/generic/hack-6.10/790-SFP-GE-T-ignore-TX_FAULT.patch b/6.10/target/linux/generic/hack-6.10/790-SFP-GE-T-ignore-TX_FAULT.patch deleted file mode 100644 index d48f382c..00000000 --- a/6.10/target/linux/generic/hack-6.10/790-SFP-GE-T-ignore-TX_FAULT.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 7cc39a6bedbd85f3ff7e16845f310e4ce8d9833f Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Tue, 6 Sep 2022 00:31:19 +0100 -Subject: [PATCH] net: sfp: add quirk for ATS SFP-GE-T 1000Base-TX module -To: netdev@vger.kernel.org, - linux-kernel@vger.kernel.org, - Russell King , - Andrew Lunn , - Heiner Kallweit -Cc: David S. Miller , - Eric Dumazet , - Jakub Kicinski , - Paolo Abeni , - Josef Schlehofer - -This copper module comes with broken TX_FAULT indicator which must be -ignored for it to work. Implement ignoring TX_FAULT state bit also -during reset/insertion and mute the warning telling the user that the -module indicates TX_FAULT. - -Co-authored-by: Josef Schlehofer -Signed-off-by: Daniel Golle ---- - drivers/net/phy/sfp.c | 14 +++++++++++--- - 1 file changed, 11 insertions(+), 3 deletions(-) - ---- a/drivers/net/phy/sfp.c -+++ b/drivers/net/phy/sfp.c -@@ -471,6 +471,9 @@ static const struct sfp_quirk sfp_quirks - // FS 2.5G Base-T - SFP_QUIRK_M("FS", "SFP-2.5G-T", sfp_quirk_oem_2_5g), - -+ // OEM SFP-GE-T is 1000Base-T module -+ SFP_QUIRK_F("OEM", "SFP-GE-T", sfp_fixup_ignore_tx_fault), -+ - // Lantech 8330-262D-E can operate at 2500base-X, but incorrectly report - // 2500MBd NRZ in their EEPROM - SFP_QUIRK_M("Lantech", "8330-262D-E", sfp_quirk_2500basex), -@@ -2587,7 +2590,8 @@ static void sfp_sm_main(struct sfp *sfp, - * or t_start_up, so assume there is a fault. - */ - sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT, -- sfp->sm_fault_retries == N_FAULT_INIT); -+ !sfp->tx_fault_ignore && -+ (sfp->sm_fault_retries == N_FAULT_INIT)); - } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { - init_done: - /* Create mdiobus and start trying for PHY */ -@@ -2841,10 +2845,12 @@ static void sfp_check_state(struct sfp * - mutex_lock(&sfp->st_mutex); - state = sfp_get_state(sfp); - changed = state ^ sfp->state; -- if (sfp->tx_fault_ignore) -+ if (sfp->tx_fault_ignore) { - changed &= SFP_F_PRESENT | SFP_F_LOS; -- else -+ state &= ~SFP_F_TX_FAULT; -+ } else { - changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT; -+ } - - for (i = 0; i < GPIO_MAX; i++) - if (changed & BIT(i)) diff --git a/6.10/target/linux/generic/hack-6.10/800-GPIO-add-named-gpio-exports.patch b/6.10/target/linux/generic/hack-6.10/800-GPIO-add-named-gpio-exports.patch deleted file mode 100644 index 666dcfad..00000000 --- a/6.10/target/linux/generic/hack-6.10/800-GPIO-add-named-gpio-exports.patch +++ /dev/null @@ -1,173 +0,0 @@ -From cc809a441d8f2924f785eb863dfa6aef47a25b0b Mon Sep 17 00:00:00 2001 -From: John Crispin -Date: Tue, 12 Aug 2014 20:49:27 +0200 -Subject: [PATCH 30/36] GPIO: add named gpio exports - -Signed-off-by: John Crispin ---- a/drivers/gpio/gpiolib-of.c -+++ b/drivers/gpio/gpiolib-of.c -@@ -21,6 +21,8 @@ - - #include - #include -+#include -+#include - - #include "gpiolib.h" - #include "gpiolib-of.h" -@@ -1111,3 +1113,74 @@ void of_gpiochip_remove(struct gpio_chip - { - of_node_put(dev_of_node(&chip->gpiodev->dev)); - } -+ -+#ifdef CONFIG_GPIO_SYSFS -+ -+static struct of_device_id gpio_export_ids[] = { -+ { .compatible = "gpio-export" }, -+ { /* sentinel */ } -+}; -+ -+static int of_gpio_export_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ struct device_node *cnp; -+ u32 val; -+ int nb = 0; -+ -+ for_each_child_of_node(np, cnp) { -+ const char *name = NULL; -+ int gpio; -+ bool dmc; -+ int max_gpio = 1; -+ int i; -+ -+ of_property_read_string(cnp, "gpio-export,name", &name); -+ -+ if (!name) -+ max_gpio = of_gpio_named_count(cnp, "gpios"); -+ -+ for (i = 0; i < max_gpio; i++) { -+ struct gpio_desc *desc; -+ unsigned flags = 0; -+ enum of_gpio_flags of_flags; -+ -+ desc = of_get_named_gpiod_flags(cnp, "gpios", i, &of_flags); -+ if (IS_ERR(desc)) -+ return PTR_ERR(desc); -+ gpio = desc_to_gpio(desc); -+ -+ if (of_flags & OF_GPIO_ACTIVE_LOW) -+ flags |= GPIOF_ACTIVE_LOW; -+ -+ if (!of_property_read_u32(cnp, "gpio-export,output", &val)) -+ flags |= val ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW; -+ else -+ flags |= GPIOF_IN; -+ -+ if (devm_gpio_request_one(&pdev->dev, gpio, flags, name ? name : of_node_full_name(np))) -+ continue; -+ -+ dmc = of_property_read_bool(cnp, "gpio-export,direction_may_change"); -+ gpio_export_with_name(gpio_to_desc(gpio), dmc, name); -+ nb++; -+ } -+ } -+ -+ dev_info(&pdev->dev, "%d gpio(s) exported\n", nb); -+ -+ return 0; -+} -+ -+static struct platform_driver gpio_export_driver = { -+ .driver = { -+ .name = "gpio-export", -+ .owner = THIS_MODULE, -+ .of_match_table = of_match_ptr(gpio_export_ids), -+ }, -+ .probe = of_gpio_export_probe, -+}; -+ -+module_platform_driver(gpio_export_driver); -+ -+#endif ---- a/include/linux/gpio/consumer.h -+++ b/include/linux/gpio/consumer.h -@@ -644,7 +644,10 @@ static inline struct gpio_desc *acpi_get - - #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS) - -+int __gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name); - int gpiod_export(struct gpio_desc *desc, bool direction_may_change); -+int gpio_export_with_name(struct gpio_desc *desc, bool direction_may_change, -+ const char *name); - int gpiod_export_link(struct device *dev, const char *name, - struct gpio_desc *desc); - void gpiod_unexport(struct gpio_desc *desc); -@@ -653,11 +656,25 @@ void gpiod_unexport(struct gpio_desc *de - - #include - -+static inline int __gpiod_export(struct gpio_desc *desc, -+ bool direction_may_change, -+ const char *name) -+{ -+ return -ENOSYS; -+} -+ - static inline int gpiod_export(struct gpio_desc *desc, - bool direction_may_change) - { - return -ENOSYS; - } -+ -+static inline int gpio_export_with_name(struct gpio_desc *desc, -+ bool direction_may_change, -+ const char *name) -+{ -+ return -ENOSYS; -+} - - static inline int gpiod_export_link(struct device *dev, const char *name, - struct gpio_desc *desc) ---- a/drivers/gpio/gpiolib-sysfs.c -+++ b/drivers/gpio/gpiolib-sysfs.c -@@ -557,7 +557,7 @@ static struct class gpio_class = { - * - * Returns zero on success, else an error. - */ --int gpiod_export(struct gpio_desc *desc, bool direction_may_change) -+int __gpiod_export(struct gpio_desc *desc, bool direction_may_change, const char *name) - { - struct gpio_chip *chip; - struct gpio_device *gdev; -@@ -619,6 +619,8 @@ int gpiod_export(struct gpio_desc *desc, - offset = gpio_chip_hwgpio(desc); - if (chip->names && chip->names[offset]) - ioname = chip->names[offset]; -+ if (name) -+ ioname = name; - - dev = device_create_with_groups(&gpio_class, &gdev->dev, - MKDEV(0, 0), data, gpio_groups, -@@ -640,8 +642,21 @@ err_unlock: - gpiod_dbg(desc, "%s: status %d\n", __func__, status); - return status; - } -+EXPORT_SYMBOL_GPL(__gpiod_export); -+ -+int gpiod_export(struct gpio_desc *desc, bool direction_may_change) -+{ -+ return __gpiod_export(desc, direction_may_change, NULL); -+} - EXPORT_SYMBOL_GPL(gpiod_export); - -+int gpio_export_with_name(struct gpio_desc *desc, bool direction_may_change, -+ const char *name) -+{ -+ return __gpiod_export(desc, direction_may_change, name); -+} -+EXPORT_SYMBOL_GPL(gpio_export_with_name); -+ - static int match_export(struct device *dev, const void *desc) - { - struct gpiod_data *data = dev_get_drvdata(dev); diff --git a/6.10/target/linux/generic/hack-6.10/810-bcma-ssb-fallback-sprom.patch b/6.10/target/linux/generic/hack-6.10/810-bcma-ssb-fallback-sprom.patch deleted file mode 100644 index 9375a721..00000000 --- a/6.10/target/linux/generic/hack-6.10/810-bcma-ssb-fallback-sprom.patch +++ /dev/null @@ -1,187 +0,0 @@ -From e4d708702e6c98f2111e33201a264d6788564cb2 Mon Sep 17 00:00:00 2001 -From: OpenWrt community -Date: Fri, 12 May 2023 11:08:43 +0200 -Subject: [PATCH] ssb_sprom: add generic kernel support for Broadcom Fallback SPROMs - ---- - drivers/bcma/Kconfig | 4 ++++ - drivers/bcma/Makefile | 1 + - drivers/bcma/bcma_private.h | 4 ++++ - drivers/bcma/main.c | 8 ++++++++ - drivers/bcma/sprom.c | 23 ++++++++++++++--------- - drivers/ssb/Kconfig | 5 +++++ - drivers/ssb/Makefile | 1 + - drivers/ssb/main.c | 8 ++++++++ - drivers/ssb/sprom.c | 12 +++++++++++- - drivers/ssb/ssb_private.h | 4 ++++ - 10 files changed, 60 insertions(+), 10 deletions(-) - ---- a/drivers/bcma/Kconfig -+++ b/drivers/bcma/Kconfig -@@ -18,6 +18,10 @@ config BCMA_BLOCKIO - bool - default y - -+config BCMA_FALLBACK_SPROM -+ bool -+ default y -+ - config BCMA_HOST_PCI_POSSIBLE - bool - depends on PCI = y ---- a/drivers/bcma/Makefile -+++ b/drivers/bcma/Makefile -@@ -11,6 +11,7 @@ bcma-$(CONFIG_BCMA_DRIVER_PCI_HOSTMODE) - bcma-$(CONFIG_BCMA_DRIVER_MIPS) += driver_mips.o - bcma-$(CONFIG_BCMA_DRIVER_GMAC_CMN) += driver_gmac_cmn.o - bcma-$(CONFIG_BCMA_DRIVER_GPIO) += driver_gpio.o -+bcma-$(CONFIG_BCMA_FALLBACK_SPROM) += fallback-sprom.o - bcma-$(CONFIG_BCMA_HOST_PCI) += host_pci.o - bcma-$(CONFIG_BCMA_HOST_SOC) += host_soc.o - obj-$(CONFIG_BCMA) += bcma.o ---- a/drivers/bcma/bcma_private.h -+++ b/drivers/bcma/bcma_private.h -@@ -38,6 +38,10 @@ int bcma_bus_resume(struct bcma_bus *bus - void bcma_detect_chip(struct bcma_bus *bus); - int bcma_bus_scan(struct bcma_bus *bus); - -+/* fallback-sprom.c */ -+int __init bcma_fbs_register(void); -+int bcma_get_fallback_sprom(struct bcma_bus *dev, struct ssb_sprom *out); -+ - /* sprom.c */ - int bcma_sprom_get(struct bcma_bus *bus); - ---- a/drivers/bcma/main.c -+++ b/drivers/bcma/main.c -@@ -671,6 +671,14 @@ static int __init bcma_modinit(void) - { - int err; - -+#ifdef CONFIG_BCMA_FALLBACK_SPROM -+ err = bcma_fbs_register(); -+ if (err) { -+ pr_err("Fallback SPROM initialization failed\n"); -+ err = 0; -+ } -+#endif /* CONFIG_BCMA_FALLBACK_SPROM */ -+ - err = bcma_init_bus_register(); - if (err) - return err; ---- a/drivers/bcma/sprom.c -+++ b/drivers/bcma/sprom.c -@@ -51,21 +51,26 @@ static int bcma_fill_sprom_with_fallback - { - int err; - -- if (!get_fallback_sprom) { -+ if (get_fallback_sprom) -+ err = get_fallback_sprom(bus, out); -+ -+#ifdef CONFIG_BCMA_FALLBACK_SPROM -+ if (!get_fallback_sprom || err) -+ err = bcma_get_fallback_sprom(bus, out); -+#else -+ if (!get_fallback_sprom) - err = -ENOENT; -- goto fail; -- } -+#endif /* CONFIG_BCMA_FALLBACK_SPROM */ - -- err = get_fallback_sprom(bus, out); -- if (err) -- goto fail; -+ if (err) { -+ bcma_warn(bus, "Using fallback SPROM failed (err %d)\n", err); -+ return err; -+ } - - bcma_debug(bus, "Using SPROM revision %d provided by platform.\n", - bus->sprom.revision); -+ - return 0; --fail: -- bcma_warn(bus, "Using fallback SPROM failed (err %d)\n", err); -- return err; - } - - /************************************************** ---- a/drivers/ssb/Kconfig -+++ b/drivers/ssb/Kconfig -@@ -25,6 +25,11 @@ if SSB - config SSB_SPROM - bool - -+config SSB_FALLBACK_SPROM -+ bool -+ depends on SSB_PCIHOST -+ default y -+ - # Support for Block-I/O. SELECT this from the driver that needs it. - config SSB_BLOCKIO - bool ---- a/drivers/ssb/Makefile -+++ b/drivers/ssb/Makefile -@@ -2,6 +2,7 @@ - # core - ssb-y += main.o scan.o - ssb-$(CONFIG_SSB_EMBEDDED) += embedded.o -+ssb-$(CONFIG_SSB_FALLBACK_SPROM) += fallback-sprom.o - ssb-$(CONFIG_SSB_SPROM) += sprom.o - - # host support ---- a/drivers/ssb/main.c -+++ b/drivers/ssb/main.c -@@ -1287,6 +1287,14 @@ static int __init ssb_modinit(void) - { - int err; - -+#ifdef CONFIG_SSB_FALLBACK_SPROM -+ err = ssb_fbs_register(); -+ if (err) { -+ pr_err("Fallback SPROM initialization failed\n"); -+ err = 0; -+ } -+#endif /* CONFIG_SSB_FALLBACK_SPROM */ -+ - /* See the comment at the ssb_is_early_boot definition */ - ssb_is_early_boot = 0; - err = bus_register(&ssb_bustype); ---- a/drivers/ssb/sprom.c -+++ b/drivers/ssb/sprom.c -@@ -180,10 +180,20 @@ int ssb_arch_register_fallback_sprom(int - - int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out) - { -+ int err; -+ -+ if (get_fallback_sprom) -+ err = get_fallback_sprom(bus, out); -+ -+#ifdef CONFIG_SSB_FALLBACK_SPROM -+ if (!get_fallback_sprom || err) -+ err = ssb_get_fallback_sprom(bus, out); -+#else - if (!get_fallback_sprom) - return -ENOENT; -+#endif /* CONFIG_SSB_FALLBACK_SPROM */ - -- return get_fallback_sprom(bus, out); -+ return err; - } - - /* https://bcm-v4.sipsolutions.net/802.11/IsSpromAvailable */ ---- a/drivers/ssb/ssb_private.h -+++ b/drivers/ssb/ssb_private.h -@@ -143,6 +143,10 @@ extern int ssb_bus_scan(struct ssb_bus * - extern void ssb_iounmap(struct ssb_bus *ssb); - - -+/* fallback-sprom.c */ -+int __init ssb_fbs_register(void); -+int ssb_get_fallback_sprom(struct ssb_bus *dev, struct ssb_sprom *out); -+ - /* sprom.c */ - extern - ssize_t ssb_attr_sprom_show(struct ssb_bus *bus, char *buf, diff --git a/6.10/target/linux/generic/hack-6.10/902-debloat_proc.patch b/6.10/target/linux/generic/hack-6.10/902-debloat_proc.patch deleted file mode 100644 index 2a311d32..00000000 --- a/6.10/target/linux/generic/hack-6.10/902-debloat_proc.patch +++ /dev/null @@ -1,419 +0,0 @@ -From 9e3f1d0805b2d919904dd9a4ff0d956314cc3cba Mon Sep 17 00:00:00 2001 -From: Felix Fietkau -Date: Sat, 8 Jul 2017 08:20:09 +0200 -Subject: debloat: procfs - -Signed-off-by: Felix Fietkau ---- - fs/locks.c | 2 ++ - fs/proc/Kconfig | 5 +++++ - fs/proc/consoles.c | 3 +++ - fs/proc/proc_tty.c | 11 ++++++++++- - include/net/snmp.h | 18 +++++++++++++++++- - ipc/msg.c | 3 +++ - ipc/sem.c | 2 ++ - ipc/shm.c | 2 ++ - ipc/util.c | 3 +++ - kernel/exec_domain.c | 2 ++ - kernel/irq/proc.c | 9 +++++++++ - kernel/time/timer_list.c | 2 ++ - mm/vmalloc.c | 2 ++ - mm/vmstat.c | 8 +++++--- - net/8021q/vlanproc.c | 6 ++++++ - net/core/net-procfs.c | 18 ++++++++++++------ - net/core/sock.c | 2 ++ - net/ipv4/fib_trie.c | 18 ++++++++++++------ - net/ipv4/proc.c | 3 +++ - net/ipv4/route.c | 3 +++ - 20 files changed, 105 insertions(+), 17 deletions(-) - ---- a/fs/locks.c -+++ b/fs/locks.c -@@ -2897,6 +2897,8 @@ static const struct seq_operations locks - - static int __init proc_locks_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; - proc_create_seq_private("locks", 0, NULL, &locks_seq_operations, - sizeof(struct locks_iterator), NULL); - return 0; ---- a/fs/proc/Kconfig -+++ b/fs/proc/Kconfig -@@ -101,6 +101,11 @@ config PROC_CHILDREN - Say Y if you are running any user-space software which takes benefit from - this interface. For example, rkt is such a piece of software. - -+config PROC_STRIPPED -+ default n -+ depends on EXPERT -+ bool "Strip non-essential /proc functionality to reduce code size" -+ - config PROC_PID_ARCH_STATUS - def_bool n - depends on PROC_FS ---- a/fs/proc/consoles.c -+++ b/fs/proc/consoles.c -@@ -107,6 +107,9 @@ static const struct seq_operations conso - - static int __init proc_consoles_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; -+ - proc_create_seq("consoles", 0, NULL, &consoles_op); - return 0; - } ---- a/fs/proc/proc_tty.c -+++ b/fs/proc/proc_tty.c -@@ -131,7 +131,10 @@ static const struct seq_operations tty_d - void proc_tty_register_driver(struct tty_driver *driver) - { - struct proc_dir_entry *ent; -- -+ -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; -+ - if (!driver->driver_name || driver->proc_entry || - !driver->ops->proc_show) - return; -@@ -148,6 +151,9 @@ void proc_tty_unregister_driver(struct t - { - struct proc_dir_entry *ent; - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; -+ - ent = driver->proc_entry; - if (!ent) - return; -@@ -162,6 +168,9 @@ void proc_tty_unregister_driver(struct t - */ - void __init proc_tty_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; -+ - if (!proc_mkdir("tty", NULL)) - return; - proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */ ---- a/include/net/snmp.h -+++ b/include/net/snmp.h -@@ -124,6 +124,21 @@ struct linux_tls_mib { - #define DECLARE_SNMP_STAT(type, name) \ - extern __typeof__(type) __percpu *name - -+#ifdef CONFIG_PROC_STRIPPED -+#define __SNMP_STATS_DUMMY(mib) \ -+ do { (void) mib->mibs[0]; } while(0) -+ -+#define __SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib) -+#define SNMP_INC_STATS_ATOMIC_LONG(mib, field) __SNMP_STATS_DUMMY(mib) -+#define SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib) -+#define SNMP_DEC_STATS(mib, field) __SNMP_STATS_DUMMY(mib) -+#define __SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib) -+#define SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib) -+#define SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib) -+#define __SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib) -+ -+#else -+ - #define __SNMP_INC_STATS(mib, field) \ - __this_cpu_inc(mib->mibs[field]) - -@@ -154,8 +169,9 @@ struct linux_tls_mib { - __this_cpu_add(ptr[basefield##OCTETS], addend); \ - } while (0) - -+#endif - --#if BITS_PER_LONG==32 -+#if (BITS_PER_LONG==32) && !defined(CONFIG_PROC_STRIPPED) - - #define __SNMP_ADD_STATS64(mib, field, addend) \ - do { \ ---- a/ipc/msg.c -+++ b/ipc/msg.c -@@ -1370,6 +1370,9 @@ void __init msg_init(void) - { - msg_init_ns(&init_ipc_ns); - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; -+ - ipc_init_proc_interface("sysvipc/msg", - " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", - IPC_MSG_IDS, sysvipc_msg_proc_show); ---- a/ipc/sem.c -+++ b/ipc/sem.c -@@ -268,6 +268,8 @@ void sem_exit_ns(struct ipc_namespace *n - void __init sem_init(void) - { - sem_init_ns(&init_ipc_ns); -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; - ipc_init_proc_interface("sysvipc/sem", - " key semid perms nsems uid gid cuid cgid otime ctime\n", - IPC_SEM_IDS, sysvipc_sem_proc_show); ---- a/ipc/shm.c -+++ b/ipc/shm.c -@@ -154,6 +154,8 @@ pure_initcall(ipc_ns_init); - - void __init shm_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; - ipc_init_proc_interface("sysvipc/shm", - #if BITS_PER_LONG <= 32 - " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", ---- a/ipc/util.c -+++ b/ipc/util.c -@@ -141,6 +141,9 @@ void __init ipc_init_proc_interface(cons - struct proc_dir_entry *pde; - struct ipc_proc_iface *iface; - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; -+ - iface = kmalloc(sizeof(*iface), GFP_KERNEL); - if (!iface) - return; ---- a/kernel/exec_domain.c -+++ b/kernel/exec_domain.c -@@ -29,6 +29,8 @@ static int execdomains_proc_show(struct - - static int __init proc_execdomains_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; - proc_create_single("execdomains", 0, NULL, execdomains_proc_show); - return 0; - } ---- a/kernel/irq/proc.c -+++ b/kernel/irq/proc.c -@@ -341,6 +341,9 @@ void register_irq_proc(unsigned int irq, - void __maybe_unused *irqp = (void *)(unsigned long) irq; - char name [MAX_NAMELEN]; - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP)) -+ return; -+ - if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) - return; - -@@ -394,6 +397,9 @@ void unregister_irq_proc(unsigned int ir - { - char name [MAX_NAMELEN]; - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP)) -+ return; -+ - if (!root_irq_dir || !desc->dir) - return; - #ifdef CONFIG_SMP -@@ -432,6 +438,9 @@ void init_irq_proc(void) - unsigned int irq; - struct irq_desc *desc; - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP)) -+ return; -+ - /* create /proc/irq */ - root_irq_dir = proc_mkdir("irq", NULL); - if (!root_irq_dir) ---- a/kernel/time/timer_list.c -+++ b/kernel/time/timer_list.c -@@ -350,6 +350,8 @@ static int __init init_timer_list_procfs - { - struct proc_dir_entry *pe; - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; - pe = proc_create_seq_private("timer_list", 0400, NULL, &timer_list_sops, - sizeof(struct timer_list_iter), NULL); - if (!pe) ---- a/mm/vmalloc.c -+++ b/mm/vmalloc.c -@@ -4439,6 +4439,8 @@ static const struct seq_operations vmall - - static int __init proc_vmalloc_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; - if (IS_ENABLED(CONFIG_NUMA)) - proc_create_seq_private("vmallocinfo", 0400, NULL, - &vmalloc_op, ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -2135,10 +2135,12 @@ void __init init_mm_internals(void) - start_shepherd_timer(); - #endif - #ifdef CONFIG_PROC_FS -- proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); -- proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { -+ proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); -+ proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); -+ proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); -+ } - proc_create_seq("vmstat", 0444, NULL, &vmstat_op); -- proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); - #endif - } - ---- a/net/8021q/vlanproc.c -+++ b/net/8021q/vlanproc.c -@@ -93,6 +93,9 @@ void vlan_proc_cleanup(struct net *net) - { - struct vlan_net *vn = net_generic(net, vlan_net_id); - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return; -+ - if (vn->proc_vlan_conf) - remove_proc_entry(name_conf, vn->proc_vlan_dir); - -@@ -112,6 +115,9 @@ int __net_init vlan_proc_init(struct net - { - struct vlan_net *vn = net_generic(net, vlan_net_id); - -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; -+ - vn->proc_vlan_dir = proc_net_mkdir(net, name_root, net->proc_net); - if (!vn->proc_vlan_dir) - goto err; ---- a/net/core/net-procfs.c -+++ b/net/core/net-procfs.c -@@ -327,10 +327,12 @@ static int __net_init dev_proc_net_init( - if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, - sizeof(struct seq_net_private))) - goto out; -- if (!proc_create_seq("softnet_stat", 0444, net->proc_net, -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && -+ !proc_create_seq("softnet_stat", 0444, net->proc_net, - &softnet_seq_ops)) - goto out_dev; -- if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && -+ !proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, - sizeof(struct seq_net_private))) - goto out_softnet; - -@@ -340,9 +342,11 @@ static int __net_init dev_proc_net_init( - out: - return rc; - out_ptype: -- remove_proc_entry("ptype", net->proc_net); -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ remove_proc_entry("ptype", net->proc_net); - out_softnet: -- remove_proc_entry("softnet_stat", net->proc_net); -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ remove_proc_entry("softnet_stat", net->proc_net); - out_dev: - remove_proc_entry("dev", net->proc_net); - goto out; -@@ -352,8 +356,10 @@ static void __net_exit dev_proc_net_exit - { - wext_proc_exit(net); - -- remove_proc_entry("ptype", net->proc_net); -- remove_proc_entry("softnet_stat", net->proc_net); -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { -+ remove_proc_entry("ptype", net->proc_net); -+ remove_proc_entry("softnet_stat", net->proc_net); -+ } - remove_proc_entry("dev", net->proc_net); - } - ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -4145,6 +4145,8 @@ static __net_initdata struct pernet_oper - - static int __init proto_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; - return register_pernet_subsys(&proto_net_ops); - } - ---- a/net/ipv4/fib_trie.c -+++ b/net/ipv4/fib_trie.c -@@ -3036,11 +3036,13 @@ static const struct seq_operations fib_r - - int __net_init fib_proc_init(struct net *net) - { -- if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops, -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && -+ !proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops, - sizeof(struct fib_trie_iter))) - goto out1; - -- if (!proc_create_net_single("fib_triestat", 0444, net->proc_net, -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && -+ !proc_create_net_single("fib_triestat", 0444, net->proc_net, - fib_triestat_seq_show, NULL)) - goto out2; - -@@ -3051,17 +3053,21 @@ int __net_init fib_proc_init(struct net - return 0; - - out3: -- remove_proc_entry("fib_triestat", net->proc_net); -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ remove_proc_entry("fib_triestat", net->proc_net); - out2: -- remove_proc_entry("fib_trie", net->proc_net); -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ remove_proc_entry("fib_trie", net->proc_net); - out1: - return -ENOMEM; - } - - void __net_exit fib_proc_exit(struct net *net) - { -- remove_proc_entry("fib_trie", net->proc_net); -- remove_proc_entry("fib_triestat", net->proc_net); -+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { -+ remove_proc_entry("fib_trie", net->proc_net); -+ remove_proc_entry("fib_triestat", net->proc_net); -+ } - remove_proc_entry("route", net->proc_net); - } - ---- a/net/ipv4/proc.c -+++ b/net/ipv4/proc.c -@@ -557,5 +557,8 @@ static __net_initdata struct pernet_oper - - int __init ip_misc_proc_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; -+ - return register_pernet_subsys(&ip_proc_ops); - } ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -380,6 +380,9 @@ static struct pernet_operations ip_rt_pr - - static int __init ip_rt_proc_init(void) - { -+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) -+ return 0; -+ - return register_pernet_subsys(&ip_rt_proc_ops); - } - ---- a/net/ipv4/inet_timewait_sock.c -+++ b/net/ipv4/inet_timewait_sock.c -@@ -266,7 +266,7 @@ void __inet_twsk_schedule(struct inet_ti - */ - - if (!rearm) { -- bool kill = timeo <= 4*HZ; -+ bool __maybe_unused kill = timeo <= 4*HZ; - - __NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED : - LINUX_MIB_TIMEWAITED); diff --git a/6.10/target/linux/generic/hack-6.10/910-kobject_uevent.patch b/6.10/target/linux/generic/hack-6.10/910-kobject_uevent.patch deleted file mode 100644 index c4c41ca4..00000000 --- a/6.10/target/linux/generic/hack-6.10/910-kobject_uevent.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001 -From: Felix Fietkau -Date: Sun, 16 Jul 2017 16:56:10 +0200 -Subject: lib: add uevent_next_seqnum() - -Signed-off-by: Felix Fietkau ---- - include/linux/kobject.h | 5 +++++ - lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++ - 2 files changed, 42 insertions(+) - ---- a/lib/kobject_uevent.c -+++ b/lib/kobject_uevent.c -@@ -179,6 +179,18 @@ out: - return r; - } - -+u64 uevent_next_seqnum(void) -+{ -+ u64 seq; -+ -+ mutex_lock(&uevent_sock_mutex); -+ seq = ++uevent_seqnum; -+ mutex_unlock(&uevent_sock_mutex); -+ -+ return seq; -+} -+EXPORT_SYMBOL_GPL(uevent_next_seqnum); -+ - /** - * kobject_synth_uevent - send synthetic uevent with arguments - * diff --git a/6.10/target/linux/generic/hack-6.10/911-kobject_add_broadcast_uevent.patch b/6.10/target/linux/generic/hack-6.10/911-kobject_add_broadcast_uevent.patch deleted file mode 100644 index 7a21e73d..00000000 --- a/6.10/target/linux/generic/hack-6.10/911-kobject_add_broadcast_uevent.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001 -From: Felix Fietkau -Date: Sun, 16 Jul 2017 16:56:10 +0200 -Subject: lib: add uevent_next_seqnum() - -Signed-off-by: Felix Fietkau ---- - include/linux/kobject.h | 5 +++++ - lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++ - 2 files changed, 42 insertions(+) - ---- a/include/linux/kobject.h -+++ b/include/linux/kobject.h -@@ -32,6 +32,8 @@ - #define UEVENT_NUM_ENVP 64 /* number of env pointers */ - #define UEVENT_BUFFER_SIZE 2048 /* buffer for the variables */ - -+struct sk_buff; -+ - #ifdef CONFIG_UEVENT_HELPER - /* path to the userspace helper executed on an event */ - extern char uevent_helper[]; -@@ -219,4 +221,7 @@ int kobject_synth_uevent(struct kobject - __printf(2, 3) - int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); - -+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group, -+ gfp_t allocation); -+ - #endif /* _KOBJECT_H_ */ ---- a/lib/kobject_uevent.c -+++ b/lib/kobject_uevent.c -@@ -691,6 +691,43 @@ int add_uevent_var(struct kobj_uevent_en - EXPORT_SYMBOL_GPL(add_uevent_var); - - #if defined(CONFIG_NET) -+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group, -+ gfp_t allocation) -+{ -+ struct uevent_sock *ue_sk; -+ int err = 0; -+ -+ /* send netlink message */ -+ mutex_lock(&uevent_sock_mutex); -+ list_for_each_entry(ue_sk, &uevent_sock_list, list) { -+ struct sock *uevent_sock = ue_sk->sk; -+ struct sk_buff *skb2; -+ -+ skb2 = skb_clone(skb, allocation); -+ if (!skb2) -+ break; -+ -+ err = netlink_broadcast(uevent_sock, skb2, pid, group, -+ allocation); -+ if (err) -+ break; -+ } -+ mutex_unlock(&uevent_sock_mutex); -+ -+ kfree_skb(skb); -+ return err; -+} -+#else -+int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group, -+ gfp_t allocation) -+{ -+ kfree_skb(skb); -+ return 0; -+} -+#endif -+EXPORT_SYMBOL_GPL(broadcast_uevent); -+ -+#if defined(CONFIG_NET) - static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb, - struct netlink_ext_ack *extack) - { diff --git a/6.10/target/linux/generic/hack-6.10/930-Revert-Revert-Revert-driver-core-Set-fw_devlink-on-b.patch b/6.10/target/linux/generic/hack-6.10/930-Revert-Revert-Revert-driver-core-Set-fw_devlink-on-b.patch deleted file mode 100644 index 1c5fb11f..00000000 --- a/6.10/target/linux/generic/hack-6.10/930-Revert-Revert-Revert-driver-core-Set-fw_devlink-on-b.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= -Date: Tue, 19 Jul 2022 06:17:48 +0200 -Subject: [PATCH] Revert "Revert "Revert "driver core: Set fw_devlink=on by - default""" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This reverts commit ea718c699055c8566eb64432388a04974c43b2ea. - -With of_platform_populate() called for MTD partitions that commit breaks -probing devices which reference MTD in device tree. - -Link: https://lore.kernel.org/all/696cb2da-20b9-b3dd-46d9-de4bf91a1506@gmail.com/T/#u -Signed-off-by: Rafał Miłecki ---- - drivers/base/core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/drivers/base/core.c -+++ b/drivers/base/core.c -@@ -1657,7 +1657,7 @@ static void device_links_purge(struct de - #define FW_DEVLINK_FLAGS_RPM (FW_DEVLINK_FLAGS_ON | \ - DL_FLAG_PM_RUNTIME) - --static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_ON; -+static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_PERMISSIVE; - static int __init fw_devlink_setup(char *arg) - { - if (!arg) diff --git a/6.10/target/linux/generic/hack-6.10/997-BBRv3.patch b/6.10/target/linux/generic/hack-6.10/997-BBRv3.patch deleted file mode 100644 index 3b21125e..00000000 --- a/6.10/target/linux/generic/hack-6.10/997-BBRv3.patch +++ /dev/null @@ -1,4011 +0,0 @@ -From eb92cc2649fa5e8b31fe0577a7d2f6820699a9cc Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Tue, 11 Jun 2019 12:26:55 -0400 -Subject: [PATCH 01/18] net-tcp_bbr: broaden app-limited rate sample detection - -This commit is a bug fix for the Linux TCP app-limited -(application-limited) logic that is used for collecting rate -(bandwidth) samples. - -Previously the app-limited logic only looked for "bubbles" of -silence in between application writes, by checking at the start -of each sendmsg. But "bubbles" of silence can also happen before -retransmits: e.g. bubbles can happen between an application write -and a retransmit, or between two retransmits. - -Retransmits are triggered by ACKs or timers. So this commit checks -for bubbles of app-limited silence upon ACKs or timers. - -Why does this commit check for app-limited state at the start of -ACKs and timer handling? Because at that point we know whether -inflight was fully using the cwnd. During processing the ACK or -timer event we often change the cwnd; after changing the cwnd we -can't know whether inflight was fully using the old cwnd. - -Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc -Change-Id: I37221506f5166877c2b110753d39bb0757985e68 -Signed-off-by: Alexandre Frade ---- - net/ipv4/tcp_input.c | 1 + - net/ipv4/tcp_timer.c | 1 + - 2 files changed, 2 insertions(+) - ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -3841,6 +3841,7 @@ static int tcp_ack(struct sock *sk, cons - - prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; - rs.prior_in_flight = tcp_packets_in_flight(tp); -+ tcp_rate_check_app_limited(sk); - - /* ts_recent update must be made after we are sure that the packet - * is in window. ---- a/net/ipv4/tcp_timer.c -+++ b/net/ipv4/tcp_timer.c -@@ -653,6 +653,7 @@ void tcp_write_timer_handler(struct sock - return; - } - -+ tcp_rate_check_app_limited(sk); - tcp_mstamp_refresh(tcp_sk(sk)); - event = icsk->icsk_pending; - -From 94abfc4e52198e003444ef5139df915514b8c207 Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Sun, 24 Jun 2018 21:55:59 -0400 -Subject: [PATCH 02/18] net-tcp_bbr: v2: shrink delivered_mstamp, - first_tx_mstamp to u32 to free up 8 bytes - -Free up some space for tracking inflight and losses for each -bw sample, in upcoming commits. - -These timestamps are in microseconds, and are now stored in 32 -bits. So they can only hold time intervals up to roughly 2^12 = 4096 -seconds. But Linux TCP RTT and RTO tracking has the same 32-bit -microsecond implementation approach and resulting deployment -limitations. So this is not introducing a new limit. And these should -not be a limitation for the foreseeable future. - -Effort: net-tcp_bbr -Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55 -Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 9 +++++++-- - net/ipv4/tcp_rate.c | 7 ++++--- - 2 files changed, 11 insertions(+), 5 deletions(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -821,6 +821,11 @@ static inline u32 tcp_stamp_us_delta(u64 - return max_t(s64, t1 - t0, 0); - } - -+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) -+{ -+ return max_t(s32, t1 - t0, 0); -+} -+ - static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) - { - return tcp_ns_to_ts(skb->skb_mstamp_ns); -@@ -896,9 +901,9 @@ struct tcp_skb_cb { - /* pkts S/ACKed so far upon tx of skb, incl retrans: */ - __u32 delivered; - /* start of send pipeline phase */ -- u64 first_tx_mstamp; -+ u32 first_tx_mstamp; - /* when we reached the "delivered" count */ -- u64 delivered_mstamp; -+ u32 delivered_mstamp; - } tx; /* only used for outgoing skbs */ - union { - struct inet_skb_parm h4; ---- a/net/ipv4/tcp_rate.c -+++ b/net/ipv4/tcp_rate.c -@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock - /* Record send time of most recently ACKed packet: */ - tp->first_tx_mstamp = tx_tstamp; - /* Find the duration of the "send phase" of this window: */ -- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, -- scb->tx.first_tx_mstamp); -+ rs->interval_us = tcp_stamp32_us_delta( -+ tp->first_tx_mstamp, -+ scb->tx.first_tx_mstamp); - - } - /* Mark off the skb delivered once it's sacked to avoid being -@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 d - * longer phase. - */ - snd_us = rs->interval_us; /* send phase */ -- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, -+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, - rs->prior_mstamp); /* ack phase */ - rs->interval_us = max(snd_us, ack_us); - -From 497c9101c33baca0207cad3e7e328ccd72e3e62c Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Sat, 5 Aug 2017 11:49:50 -0400 -Subject: [PATCH 03/18] net-tcp_bbr: v2: snapshot packets in flight at transmit - time and pass in rate_sample - -CC algorithms may want to snapshot the number of packets in flight at -transmit time and pass in rate_sample, to understand the relationship -between inflight and losses or ECN signals, to try to find the highest -inflight value that has acceptable levels of loss/ECN marking. - -We split out the code to set an skb's tx.in_flight field into its own -function, so that this code can be used for the TCP_REPAIR "fake send" -code path that inserts skbs into the rtx queue without sending them. - -Effort: net-tcp_bbr -Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea -Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b -Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63 -Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 6 ++++++ - net/ipv4/tcp_output.c | 1 + - net/ipv4/tcp_rate.c | 20 ++++++++++++++++++++ - 3 files changed, 27 insertions(+) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -904,6 +904,10 @@ struct tcp_skb_cb { - u32 first_tx_mstamp; - /* when we reached the "delivered" count */ - u32 delivered_mstamp; -+#define TCPCB_IN_FLIGHT_BITS 20 -+#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) -+ u32 in_flight:20, /* packets in flight at transmit */ -+ unused2:12; - } tx; /* only used for outgoing skbs */ - union { - struct inet_skb_parm h4; -@@ -1051,6 +1055,7 @@ struct rate_sample { - u64 prior_mstamp; /* starting timestamp for interval */ - u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ - u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ -+ u32 tx_in_flight; /* packets in flight at starting timestamp */ - s32 delivered; /* number of packets delivered over interval */ - s32 delivered_ce; /* number of packets delivered w/ CE marks*/ - long interval_us; /* time for tp->delivered to incr "delivered" */ -@@ -1173,6 +1178,7 @@ static inline void tcp_ca_event(struct s - void tcp_set_ca_state(struct sock *sk, const u8 ca_state); - - /* From tcp_rate.c */ -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); - void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, - struct rate_sample *rs); ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -2701,6 +2701,7 @@ static bool tcp_write_xmit(struct sock * - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); - list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); - tcp_init_tso_segs(skb, mss_now); -+ tcp_set_tx_in_flight(sk, skb); - goto repair; /* Skip network transmission */ - } - ---- a/net/ipv4/tcp_rate.c -+++ b/net/ipv4/tcp_rate.c -@@ -34,6 +34,24 @@ - * ready to send in the write queue. - */ - -+void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ u32 in_flight; -+ -+ /* Check, sanitize, and record packets in flight after skb was sent. */ -+ in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); -+ if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, -+ "insane in_flight %u cc %s mss %u " -+ "cwnd %u pif %u %u %u %u\n", -+ in_flight, inet_csk(sk)->icsk_ca_ops->name, -+ tp->mss_cache, tp->snd_cwnd, -+ tp->packets_out, tp->retrans_out, -+ tp->sacked_out, tp->lost_out)) -+ in_flight = TCPCB_IN_FLIGHT_MAX; -+ TCP_SKB_CB(skb)->tx.in_flight = in_flight; -+} -+ - /* Snapshot the current delivery information in the skb, to generate - * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). - */ -@@ -67,6 +85,7 @@ void tcp_rate_skb_sent(struct sock *sk, - TCP_SKB_CB(skb)->tx.delivered = tp->delivered; - TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; - TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; -+ tcp_set_tx_in_flight(sk, skb); - } - - /* When an skb is sacked or acked, we fill in the rate sample with the (prior) -@@ -96,6 +115,7 @@ void tcp_rate_skb_delivered(struct sock - rs->prior_mstamp = scb->tx.delivered_mstamp; - rs->is_app_limited = scb->tx.is_app_limited; - rs->is_retrans = scb->sacked & TCPCB_RETRANS; -+ rs->tx_in_flight = scb->tx.in_flight; - rs->last_end_seq = scb->end_seq; - - /* Record send time of most recently ACKed packet: */ -From 9e07a8f79e42db42adcc961baabc6e142dd891ed Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Thu, 12 Oct 2017 23:44:27 -0400 -Subject: [PATCH 04/18] net-tcp_bbr: v2: count packets lost over TCP rate - sampling interval - -For understanding the relationship between inflight and packet loss -signals, to try to find the highest inflight value that has acceptable -levels of packet losses. - -Effort: net-tcp_bbr -Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab -Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5 -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 5 ++++- - net/ipv4/tcp_rate.c | 3 +++ - 2 files changed, 7 insertions(+), 1 deletion(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -908,6 +908,7 @@ struct tcp_skb_cb { - #define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) - u32 in_flight:20, /* packets in flight at transmit */ - unused2:12; -+ u32 lost; /* packets lost so far upon tx of skb */ - } tx; /* only used for outgoing skbs */ - union { - struct inet_skb_parm h4; -@@ -1053,11 +1054,13 @@ struct ack_sample { - */ - struct rate_sample { - u64 prior_mstamp; /* starting timestamp for interval */ -+ u32 prior_lost; /* tp->lost at "prior_mstamp" */ - u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ - u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ - u32 tx_in_flight; /* packets in flight at starting timestamp */ -+ s32 lost; /* number of packets lost over interval */ - s32 delivered; /* number of packets delivered over interval */ -- s32 delivered_ce; /* number of packets delivered w/ CE marks*/ -+ s32 delivered_ce; /* packets delivered w/ CE mark over interval */ - long interval_us; /* time for tp->delivered to incr "delivered" */ - u32 snd_interval_us; /* snd interval for delivered packets */ - u32 rcv_interval_us; /* rcv interval for delivered packets */ ---- a/net/ipv4/tcp_rate.c -+++ b/net/ipv4/tcp_rate.c -@@ -84,6 +84,7 @@ void tcp_rate_skb_sent(struct sock *sk, - TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; - TCP_SKB_CB(skb)->tx.delivered = tp->delivered; - TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; -+ TCP_SKB_CB(skb)->tx.lost = tp->lost; - TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; - tcp_set_tx_in_flight(sk, skb); - } -@@ -110,6 +111,7 @@ void tcp_rate_skb_delivered(struct sock - if (!rs->prior_delivered || - tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, - scb->end_seq, rs->last_end_seq)) { -+ rs->prior_lost = scb->tx.lost; - rs->prior_delivered_ce = scb->tx.delivered_ce; - rs->prior_delivered = scb->tx.delivered; - rs->prior_mstamp = scb->tx.delivered_mstamp; -@@ -165,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 d - return; - } - rs->delivered = tp->delivered - rs->prior_delivered; -+ rs->lost = tp->lost - rs->prior_lost; - - rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; - /* delivered_ce occupies less than 32 bits in the skb control block */ -From 5f7df19fe56d5ddaa7d3ba34ded446e26a5725a1 Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Mon, 19 Nov 2018 13:48:36 -0500 -Subject: [PATCH 05/18] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece - -For understanding the relationship between inflight and ECN signals, -to try to find the highest inflight value that has acceptable levels -ECN marking. - -Effort: net-tcp_bbr -Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8 -Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3 -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 1 + - net/ipv4/tcp_input.c | 1 + - 2 files changed, 2 insertions(+) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1072,6 +1072,7 @@ struct rate_sample { - bool is_app_limited; /* is sample from packet with bubble in pipe? */ - bool is_retrans; /* is sample from retransmission? */ - bool is_ack_delayed; /* is this (likely) a delayed ACK? */ -+ bool is_ece; /* did this ACK have ECN marked? */ - }; - - struct tcp_congestion_ops { ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -3940,6 +3940,7 @@ static int tcp_ack(struct sock *sk, cons - delivered = tcp_newly_delivered(sk, delivered, flag); - lost = tp->lost - lost; /* freshly marked lost */ - rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); -+ rs.is_ece = !!(flag & FLAG_ECE); - tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); - tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); - tcp_xmit_recovery(sk, rexmit); -From 36f2ad7500c7fd665efbf38482fa838ba070acc0 Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Tue, 7 Aug 2018 21:52:06 -0400 -Subject: [PATCH 06/18] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC - module callback API - -For connections experiencing reordering, RACK can mark packets lost -long after we receive the SACKs/ACKs hinting that the packets were -actually lost. - -This means that CC modules cannot easily learn the volume of inflight -data at which packet loss happens by looking at the current inflight -or even the packets in flight when the most recently SACKed packet was -sent. To learn this, CC modules need to know how many packets were in -flight at the time lost packets were sent. This new callback, combined -with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this. - -This also provides a consistent callback that is invoked whether -packets are marked lost upon ACK processing, using the RACK reordering -timer, or at RTO time. - -Effort: net-tcp_bbr -Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4 -Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 3 +++ - net/ipv4/tcp_input.c | 5 +++++ - 2 files changed, 8 insertions(+) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1099,6 +1099,9 @@ struct tcp_congestion_ops { - /* override sysctl_tcp_min_tso_segs */ - u32 (*min_tso_segs)(struct sock *sk); - -+ /* react to a specific lost skb (optional) */ -+ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); -+ - /* call when packets are delivered to update cwnd and pacing rate, - * after all the ca_state processing. (optional) - */ ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -1096,7 +1096,12 @@ static void tcp_verify_retransmit_hint(s - */ - static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) - { -+ struct sock *sk = (struct sock *)tp; -+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -+ - tp->lost += tcp_skb_pcount(skb); -+ if (ca_ops->skb_marked_lost) -+ ca_ops->skb_marked_lost(sk, skb); - } - - void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) -From 8c74d21f1869f7bc7a5c8700172181b4a6e4f04d Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Wed, 1 May 2019 20:16:33 -0400 -Subject: [PATCH 07/18] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in - tcp_shifted_skb() - -When tcp_shifted_skb() updates state as adjacent SACKed skbs are -coalesced, previously the tx.in_flight was not adjusted, so we could -get contradictory state where the skb's recorded pcount was bigger -than the tx.in_flight (the number of segments that were in_flight -after sending the skb). - -Normally have a SACKed skb with contradictory pcount/tx.in_flight -would not matter. However, with SACK reneging, the SACKed bit is -removed, and an skb once again becomes eligible for retransmitting, -fragmenting, SACKing, etc. Packetdrill testing verified the following -sequence is possible in a kernel that does not have this commit: - - - skb N is SACKed - - skb N+1 is SACKed and combined with skb N using tcp_shifted_skb() - - tcp_shifted_skb() will increase the pcount of prev, - but leave tx.in_flight as-is - - so prev skb can have pcount > tx.in_flight - - RTO, tcp_timeout_mark_lost(), detect reneg, - remove "SACKed" bit, mark skb N as lost - - find pcount of skb N is greater than its tx.in_flight - -I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb(): - WARN_ON_ONCE(inflight_prev < 0) -to fire in production machines using bbr2. - -Effort: net-tcp_bbr -Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715 -Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55 -Signed-off-by: Alexandre Frade ---- - net/ipv4/tcp_input.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -1482,6 +1482,17 @@ static bool tcp_shifted_skb(struct sock - WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); - tcp_skb_pcount_add(skb, -pcount); - -+ /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ -+ if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, -+ "prev in_flight: %u skb in_flight: %u pcount: %u", -+ TCP_SKB_CB(prev)->tx.in_flight, -+ TCP_SKB_CB(skb)->tx.in_flight, -+ pcount)) -+ TCP_SKB_CB(skb)->tx.in_flight = 0; -+ else -+ TCP_SKB_CB(skb)->tx.in_flight -= pcount; -+ TCP_SKB_CB(prev)->tx.in_flight += pcount; -+ - /* When we're adding to gso_segs == 1, gso_size will be zero, - * in theory this shouldn't be necessary but as long as DSACK - * code can come after this skb later on it's better to keep -From 545f96f640c4ff7f485ef4314b990b5a380aef2e Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Wed, 1 May 2019 20:16:25 -0400 -Subject: [PATCH 08/18] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in - tcp_fragment() - -When we fragment an skb that has already been sent, we need to update -the tx.in_flight for the first skb in the resulting pair ("buff"). - -Because we were not updating the tx.in_flight, the tx.in_flight value -was inconsistent with the pcount of the "buff" skb (tx.in_flight would -be too high). That meant that if the "buff" skb was lost, then -bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value -that is too high. This could result in longer queues and higher packet -loss. - -Packetdrill testing verified that without this commit, when the second -half of an skb is SACKed and then later the first half of that skb is -marked lost, the calculated inflight_hi was incorrect. - -Effort: net-tcp_bbr -Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874 -Origin-9xx-SHA1: a0eb099690af net-tcp_bbr: v2: fix tcp_fragment() tx.in_flight recomputation [prod feb 8 2021; use as a fixup] -Origin-9xx-SHA1: 885503228153ff0c9114e net-tcp_bbr: v2: introduce tcp_skb_tx_in_flight_is_suspicious() helper for warnings -Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 15 +++++++++++++++ - net/ipv4/tcp_output.c | 26 +++++++++++++++++++++++++- - 2 files changed, 40 insertions(+), 1 deletion(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1198,6 +1198,21 @@ static inline bool tcp_skb_sent_after(u6 - return t1 > t2 || (t1 == t2 && after(seq1, seq2)); - } - -+/* If a retransmit failed due to local qdisc congestion or other local issues, -+ * then we may have called tcp_set_skb_tso_segs() to increase the number of -+ * segments in the skb without increasing the tx.in_flight. In all other cases, -+ * the tx.in_flight should be at least as big as the pcount of the sk_buff. We -+ * do not have the state to know whether a retransmit failed due to local qdisc -+ * congestion or other local issues, so to avoid spurious warnings we consider -+ * that any skb marked lost may have suffered that fate. -+ */ -+static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount, -+ u32 skb_sacked_flags, -+ u32 tx_in_flight) -+{ -+ return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST); -+} -+ - /* These functions determine how the current flow behaves in respect of SACK - * handling. SACK is negotiated with the peer, and therefore it can vary - * between different flows. ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -1546,7 +1546,7 @@ int tcp_fragment(struct sock *sk, enum t - { - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *buff; -- int old_factor; -+ int old_factor, inflight_prev; - long limit; - int nlen; - u8 flags; -@@ -1621,6 +1621,30 @@ int tcp_fragment(struct sock *sk, enum t - - if (diff) - tcp_adjust_pcount(sk, skb, diff); -+ -+ inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; -+ if (inflight_prev < 0) { -+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( -+ old_factor, -+ TCP_SKB_CB(skb)->sacked, -+ TCP_SKB_CB(skb)->tx.in_flight), -+ "inconsistent: tx.in_flight: %u " -+ "old_factor: %d mss: %u sacked: %u " -+ "1st pcount: %d 2nd pcount: %d " -+ "1st len: %u 2nd len: %u ", -+ TCP_SKB_CB(skb)->tx.in_flight, old_factor, -+ mss_now, TCP_SKB_CB(skb)->sacked, -+ tcp_skb_pcount(skb), tcp_skb_pcount(buff), -+ skb->len, buff->len); -+ inflight_prev = 0; -+ } -+ /* Set 1st tx.in_flight as if 1st were sent by itself: */ -+ TCP_SKB_CB(skb)->tx.in_flight = inflight_prev + -+ tcp_skb_pcount(skb); -+ /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */ -+ TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + -+ tcp_skb_pcount(skb) + -+ tcp_skb_pcount(buff); - } - - /* Link BUFF into the send queue. */ -From 4baad1b6a9c1e9f84e0e0a40d789382e0826e49a Mon Sep 17 00:00:00 2001 -From: Yousuk Seung -Date: Wed, 23 May 2018 17:55:54 -0700 -Subject: [PATCH 09/18] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS - -Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a -congestion control module to receive CE events. - -Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN -bit in opts flag to receive CE events but this may incur changes in ECN -behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS -that allows congestion control modules to receive CE events -independently of TCP_CONG_NEEDS_ECN. - -Effort: net-tcp -Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b -Change-Id: I2255506985242f376d910c6fd37daabaf4744f24 -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 14 +++++++++++++- - net/ipv4/tcp_input.c | 4 ++-- - 2 files changed, 15 insertions(+), 3 deletions(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1034,7 +1034,11 @@ enum tcp_ca_ack_event_flags { - #define TCP_CONG_NON_RESTRICTED 0x1 - /* Requires ECN/ECT set on all packets */ - #define TCP_CONG_NEEDS_ECN 0x2 --#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) -+/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ -+#define TCP_CONG_WANTS_CE_EVENTS 0x4 -+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ -+ TCP_CONG_NEEDS_ECN | \ -+ TCP_CONG_WANTS_CE_EVENTS) - - union tcp_cc_info; - -@@ -1166,6 +1170,14 @@ static inline char *tcp_ca_get_name_by_k - } - #endif - -+static inline bool tcp_ca_wants_ce_events(const struct sock *sk) -+{ -+ const struct inet_connection_sock *icsk = inet_csk(sk); -+ -+ return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | -+ TCP_CONG_WANTS_CE_EVENTS); -+} -+ - static inline bool tcp_ca_needs_ecn(const struct sock *sk) - { - const struct inet_connection_sock *icsk = inet_csk(sk); ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -371,7 +371,7 @@ static void __tcp_ecn_check_ce(struct so - tcp_enter_quickack_mode(sk, 2); - break; - case INET_ECN_CE: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); - - if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { -@@ -382,7 +382,7 @@ static void __tcp_ecn_check_ce(struct so - tp->ecn_flags |= TCP_ECN_SEEN; - break; - default: -- if (tcp_ca_needs_ecn(sk)) -+ if (tcp_ca_wants_ce_events(sk)) - tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); - tp->ecn_flags |= TCP_ECN_SEEN; - break; -From d703f42b8914209f615f18cd2ba296f4f25d66a3 Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Fri, 27 Sep 2019 17:10:26 -0400 -Subject: [PATCH 10/18] net-tcp: re-generalize TSO sizing in TCP CC module API - -Reorganize the API for CC modules so that the CC module once again -gets complete control of the TSO sizing decision. This is how the API -was set up around 2016 and the initial BBRv1 upstreaming. Later Eric -Dumazet simplified it. But with wider testing it now seems that to -avoid CPU regressions BBR needs to have a different TSO sizing -function. - -This is necessary to handle cases where there are many flows -bottlenecked on the sender host's NIC, in which case BBR's pacing rate -is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because -BBR's pacing rate adapts to the low bandwidth share each flow sees. By -contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very -large cwnd, and thus large pacing rate and large TSO burst size. - -Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 4 ++-- - net/ipv4/tcp_bbr.c | 37 ++++++++++++++++++++++++++----------- - net/ipv4/tcp_output.c | 11 +++++------ - 3 files changed, 33 insertions(+), 19 deletions(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1100,8 +1100,8 @@ struct tcp_congestion_ops { - /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - -- /* override sysctl_tcp_min_tso_segs */ -- u32 (*min_tso_segs)(struct sock *sk); -+ /* pick target number of segments per TSO/GSO skb (optional): */ -+ u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); - - /* react to a specific lost skb (optional) */ - void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); ---- a/net/ipv4/tcp_bbr.c -+++ b/net/ipv4/tcp_bbr.c -@@ -300,20 +300,35 @@ __bpf_kfunc static u32 bbr_min_tso_segs( - return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; - } - -+/* Return the number of segments BBR would like in a TSO/GSO skb, given -+ * a particular max gso size as a constraint. -+ */ -+static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, -+ u32 gso_max_size) -+{ -+ u32 segs; -+ u64 bytes; -+ -+ /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ -+ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; -+ -+ bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); -+ segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); -+ return segs; -+} -+ -+/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ -+static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) -+{ -+ return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); -+} -+ -+/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ - static u32 bbr_tso_segs_goal(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -- u32 segs, bytes; -- -- /* Sort of tcp_tso_autosize() but ignoring -- * driver provided sk_gso_max_size. -- */ -- bytes = min_t(unsigned long, -- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), -- GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); -- segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - -- return min(segs, 0x7FU); -+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); - } - - /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ -@@ -1149,7 +1164,7 @@ static struct tcp_congestion_ops tcp_bbr - .undo_cwnd = bbr_undo_cwnd, - .cwnd_event = bbr_cwnd_event, - .ssthresh = bbr_ssthresh, -- .min_tso_segs = bbr_min_tso_segs, -+ .tso_segs = bbr_tso_segs, - .get_info = bbr_get_info, - .set_state = bbr_set_state, - }; ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -2020,13 +2020,12 @@ static u32 tcp_tso_autosize(const struct - static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) - { - const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -- u32 min_tso, tso_segs; -+ u32 tso_segs; - -- min_tso = ca_ops->min_tso_segs ? -- ca_ops->min_tso_segs(sk) : -- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); -- -- tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); -+ tso_segs = ca_ops->tso_segs ? -+ ca_ops->tso_segs(sk, mss_now) : -+ tcp_tso_autosize(sk, mss_now, -+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); - return min_t(u32, tso_segs, sk->sk_gso_max_segs); - } - -From d40e81b25a4ebe05eee16c335e9698c09a9dfb14 Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Sat, 16 Nov 2019 13:16:25 -0500 -Subject: [PATCH 11/18] net-tcp: add fast_ack_mode=1: skip rwin check in - tcp_fast_ack_mode__tcp_ack_snd_check() - -Add logic for an experimental TCP connection behavior, enabled with -tp->fast_ack_mode = 1, which disables checking the receive window -before sending an ack in __tcp_ack_snd_check(). If this behavior is -enabled, the data receiver sends an ACK if the amount of data is > -RCV.MSS. - -Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4 -Signed-off-by: Alexandre Frade ---- - include/linux/tcp.h | 3 ++- - net/ipv4/tcp.c | 1 + - net/ipv4/tcp_cong.c | 1 + - net/ipv4/tcp_input.c | 5 +++-- - 4 files changed, 7 insertions(+), 3 deletions(-) - ---- a/include/linux/tcp.h -+++ b/include/linux/tcp.h -@@ -257,7 +257,8 @@ struct tcp_sock { - u8 compressed_ack; - u8 dup_ack_counter:2, - tlp_retrans:1, /* TLP is a retransmission */ -- unused:5; -+ fast_ack_mode:2, /* which fast ack mode ? */ -+ unused:3; - u32 chrono_start; /* Start time in jiffies of a TCP chrono */ - u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ - u8 chrono_type:2, /* current chronograph type */ ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -3090,6 +3090,7 @@ int tcp_disconnect(struct sock *sk, int - tp->rx_opt.dsack = 0; - tp->rx_opt.num_sacks = 0; - tp->rcv_ooopack = 0; -+ tp->fast_ack_mode = 0; - - - /* Clean up fastopen related fields */ ---- a/net/ipv4/tcp_cong.c -+++ b/net/ipv4/tcp_cong.c -@@ -241,6 +241,7 @@ void tcp_init_congestion_control(struct - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_sk(sk)->prior_ssthresh = 0; -+ tcp_sk(sk)->fast_ack_mode = 0; - if (icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); - if (tcp_ca_needs_ecn(sk)) ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -5578,13 +5578,14 @@ static void __tcp_ack_snd_check(struct s - - /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && -+ (tp->fast_ack_mode == 1 || - /* ... and right edge of window advances far enough. - * (tcp_recvmsg() will send ACK otherwise). - * If application uses SO_RCVLOWAT, we want send ack now if - * we have not received enough bytes to satisfy the condition. - */ -- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -- __tcp_select_window(sk) >= tp->rcv_wnd)) || -+ (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -+ __tcp_select_window(sk) >= tp->rcv_wnd))) || - /* We ACK each frame or... */ - tcp_in_quickack_mode(sk) || - /* Protocol state mandates a one-time immediate ACK */ -From f59611bea9c062fb44ecb9040f074cac359e2993 Mon Sep 17 00:00:00 2001 -From: Jianfeng Wang -Date: Fri, 19 Jun 2020 17:33:45 +0000 -Subject: [PATCH 12/18] net-tcp_bbr: v2: record app-limited status of - TLP-repaired flight - -When sending a TLP retransmit, record whether the outstanding flight -of data is application limited. This is important for congestion -control modules that want to respond to losses repaired by TLP -retransmits. This is important because the following scenarios convey -very different information: - (1) a packet loss with a small number of packets in flight; - (2) a packet loss with the maximum amount of data in flight allowed - by the CC module; - -Effort: net-tcp_bbr -Change-Id: Ic8ae567caa4e4bfd5fd82c3d4be12a5d9171655e -Signed-off-by: Alexandre Frade ---- - include/linux/tcp.h | 3 ++- - net/ipv4/tcp_output.c | 1 + - 2 files changed, 3 insertions(+), 1 deletion(-) - ---- a/include/linux/tcp.h -+++ b/include/linux/tcp.h -@@ -258,7 +258,8 @@ struct tcp_sock { - u8 dup_ack_counter:2, - tlp_retrans:1, /* TLP is a retransmission */ - fast_ack_mode:2, /* which fast ack mode ? */ -- unused:3; -+ tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */ -+ unused:2; - u32 chrono_start; /* Start time in jiffies of a TCP chrono */ - u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ - u8 chrono_type:2, /* current chronograph type */ ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -2938,6 +2938,7 @@ void tcp_send_loss_probe(struct sock *sk - if (WARN_ON(!skb || !tcp_skb_pcount(skb))) - goto rearm_timer; - -+ tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited; - if (__tcp_retransmit_skb(sk, skb, 1)) - goto rearm_timer; - -From 46024649ecd254cf303f2104406c6ffff3dce343 Mon Sep 17 00:00:00 2001 -From: Jianfeng Wang -Date: Tue, 16 Jun 2020 17:41:19 +0000 -Subject: [PATCH 13/18] net-tcp_bbr: v2: inform CC module of losses repaired by - TLP probe - -Before this commit, when there is a packet loss that creates a sequence -hole that is filled by a TLP loss probe, then tcp_process_tlp_ack() -only informs the congestion control (CC) module via a back-to-back entry -and exit of CWR. But some congestion control modules (e.g. BBR) do not -respond to CWR events. - -This commit adds a new CA event with which the core TCP stack notifies -the CC module when a loss is repaired by a TLP. This will allow CC -modules that do not use the CWR mechanism to have a custom handler for -such TLP recoveries. - -Effort: net-tcp_bbr -Change-Id: Ieba72332b401b329bff5a641d2b2043a3fb8f632 -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 1 + - net/ipv4/tcp_input.c | 1 + - 2 files changed, 2 insertions(+) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1012,6 +1012,7 @@ enum tcp_ca_event { - CA_EVENT_LOSS, /* loss timeout */ - CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ - CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ -+ CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */ - }; - - /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -3739,6 +3739,7 @@ static void tcp_process_tlp_ack(struct s - /* ACK advances: there was a loss, so reduce cwnd. Reset - * tlp_high_seq in tcp_init_cwnd_reduction() - */ -+ tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY); - tcp_init_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); -From 3996591ce3544d9defec725579123f5d4867524c Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Mon, 21 Sep 2020 14:46:26 -0400 -Subject: [PATCH 14/18] net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq - into rate_sample - -Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will -export to the CC module the knowledge of whether the current ACK -matched a TLP retransmit. - -Note that when this bool is true, we cannot yet tell (in general) whether -this ACK is for the original or the TLP retransmit. - -Effort: net-tcp_bbr -Change-Id: I2e6494332167e75efcbdc99bd5c119034e9c39b4 -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 1 + - net/ipv4/tcp_input.c | 12 +++++++++--- - 2 files changed, 10 insertions(+), 3 deletions(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -1076,6 +1076,7 @@ struct rate_sample { - u32 last_end_seq; /* end_seq of most recently ACKed packet */ - bool is_app_limited; /* is sample from packet with bubble in pipe? */ - bool is_retrans; /* is sample from retransmission? */ -+ bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */ - bool is_ack_delayed; /* is this (likely) a delayed ACK? */ - bool is_ece; /* did this ACK have ECN marked? */ - }; ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -3722,7 +3722,8 @@ static void tcp_replace_ts_recent(struct - /* This routine deals with acks during a TLP episode and ends an episode by - * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack - */ --static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) -+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag, -+ struct rate_sample *rs) - { - struct tcp_sock *tp = tcp_sk(sk); - -@@ -3750,6 +3751,11 @@ static void tcp_process_tlp_ack(struct s - FLAG_NOT_DUP | FLAG_DATA_SACKED))) { - /* Pure dupack: original and TLP probe arrived; no loss */ - tp->tlp_high_seq = 0; -+ } else { -+ /* This ACK matches a TLP retransmit. We cannot yet tell if -+ * this ACK is for the original or the TLP retransmit. -+ */ -+ rs->is_acking_tlp_retrans_seq = 1; - } - } - -@@ -3933,7 +3939,7 @@ static int tcp_ack(struct sock *sk, cons - tcp_rack_update_reo_wnd(sk, &rs); - - if (tp->tlp_high_seq) -- tcp_process_tlp_ack(sk, ack, flag); -+ tcp_process_tlp_ack(sk, ack, flag, &rs); - - if (tcp_ack_is_dubious(sk, flag)) { - if (!(flag & (FLAG_SND_UNA_ADVANCED | -@@ -3977,7 +3983,7 @@ no_queue: - tcp_ack_probe(sk); - - if (tp->tlp_high_seq) -- tcp_process_tlp_ack(sk, ack, flag); -+ tcp_process_tlp_ack(sk, ack, flag, &rs); - return 1; - - old_ack: -From bcdadb3893c94dfde67954ec71eb983b6bdb08c1 Mon Sep 17 00:00:00 2001 -From: David Morley -Date: Fri, 14 Jul 2023 11:07:56 -0400 -Subject: [PATCH 15/18] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW - -Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW. - -This feature indicates that the given destination network is a -low-latency ECN environment, meaning both that ECN CE marks are -applied by the network using a low-latency marking threshold and also -that TCP endpoints provide precise per-data-segment ECN feedback in -ACKs (where the ACK ECE flag echoes the received CE status of all -newly-acknowledged data segments). This feature indication can be used -by congestion control algorithms to decide how to interpret ECN -signals over the given destination network. - -This feature is appropriate for datacenter-style ECN marking, such as -the ECN marking approach expected by DCTCP or BBR congestion control -modules. - -Signed-off-by: David Morley -Signed-off-by: Neal Cardwell -Signed-off-by: Yuchung Cheng -Tested-by: David Morley -Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3 -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 10 ++++++++++ - include/uapi/linux/rtnetlink.h | 4 +++- - net/ipv4/tcp_minisocks.c | 2 ++ - net/ipv4/tcp_output.c | 6 ++++-- - 4 files changed, 19 insertions(+), 3 deletions(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -372,6 +372,7 @@ static inline void tcp_dec_quickack_mode - #define TCP_ECN_QUEUE_CWR 2 - #define TCP_ECN_DEMAND_CWR 4 - #define TCP_ECN_SEEN 8 -+#define TCP_ECN_LOW 16 - - enum tcp_tw_status { - TCP_TW_SUCCESS = 0, -@@ -725,6 +726,15 @@ static inline void tcp_fast_path_check(s - - u32 tcp_delack_max(const struct sock *sk); - -+static inline void tcp_set_ecn_low_from_dst(struct sock *sk, -+ const struct dst_entry *dst) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (dst_feature(dst, RTAX_FEATURE_ECN_LOW)) -+ tp->ecn_flags |= TCP_ECN_LOW; -+} -+ - /* Compute the actual rto_min value */ - static inline u32 tcp_rto_min(struct sock *sk) - { ---- a/include/uapi/linux/rtnetlink.h -+++ b/include/uapi/linux/rtnetlink.h -@@ -506,9 +506,11 @@ enum { - #define RTAX_FEATURE_SACK (1 << 1) - #define RTAX_FEATURE_TIMESTAMP (1 << 2) - #define RTAX_FEATURE_ALLFRAG (1 << 3) -+#define RTAX_FEATURE_ECN_LOW (1 << 4) - - #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \ -- RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG) -+ RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG \ -+ | RTAX_FEATURE_ECN_LOW) - - struct rta_session { - __u8 proto; ---- a/net/ipv4/tcp_minisocks.c -+++ b/net/ipv4/tcp_minisocks.c -@@ -439,6 +439,8 @@ void tcp_ca_openreq_child(struct sock *s - u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); - bool ca_got_dst = false; - -+ tcp_set_ecn_low_from_dst(sk, dst); -+ - if (ca_key != TCP_CA_UNSPEC) { - const struct tcp_congestion_ops *ca; - ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -332,10 +332,9 @@ static void tcp_ecn_send_syn(struct sock - bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || - tcp_ca_needs_ecn(sk) || bpf_needs_ecn; -+ const struct dst_entry *dst = __sk_dst_get(sk); - - if (!use_ecn) { -- const struct dst_entry *dst = __sk_dst_get(sk); -- - if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) - use_ecn = true; - } -@@ -347,6 +346,9 @@ static void tcp_ecn_send_syn(struct sock - tp->ecn_flags = TCP_ECN_OK; - if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) - INET_ECN_xmit(sk); -+ -+ if (dst) -+ tcp_set_ecn_low_from_dst(sk, dst); - } - } - -From 6caa9d6b181856844e351866f186a1da3321c2b3 Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Tue, 11 Jun 2019 12:54:22 -0400 -Subject: [PATCH 16/18] net-tcp_bbr: v3: update TCP "bbr" congestion control - module to BBRv3 - -BBR v3 is an enhacement to the BBR v1 algorithm. It's designed to aim for lower -queues, lower loss, and better Reno/CUBIC coexistence than BBR v1. - -BBR v3 maintains the core of BBR v1: an explicit model of the network -path that is two-dimensional, adapting to estimate the (a) maximum -available bandwidth and (b) maximum safe volume of data a flow can -keep in-flight in the network. It maintains the estimated BDP as a -core guide for estimating an appropriate level of in-flight data. - -BBR v3 makes several key enhancements: - -o Its bandwidth-probing time scale is adapted, within bounds, to allow improved -coexistence with Reno and CUBIC. The bandwidth-probing time scale is (a) -extended dynamically based on estimated BDP to improve coexistence with -Reno/CUBIC; (b) bounded by an interactive wall-clock time-scale to be more -scalable and responsive than Reno and CUBIC. - -o Rather than being largely agnostic to loss and ECN marks, it explicitly uses -loss and (DCTCP-style) ECN signals to maintain its model. - -o It aims for lower losses than v1 by adjusting its model to attempt to stay -within loss rate and ECN mark rate bounds (loss_thresh and ecn_thresh, -respectively). - -o It adapts to loss/ECN signals even when the application is running out of -data ("application-limited"), in case the "application-limited" flow is also -"network-limited" (the bw and/or inflight available to this flow is lower than -previously estimated when the flow ran out of data). - -o It has a three-part model: the model explicit three tracks operating points, -where an operating point is a tuple: (bandwidth, inflight). The three operating -points are: - - o latest: the latest measurement from the current round trip - o upper bound: robust, optimistic, long-term upper bound - o lower bound: robust, conservative, short-term lower bound - -These are stored in the following state variables: - - o latest: bw_latest, inflight_latest - o lo: bw_lo, inflight_lo - o hi: bw_hi[2], inflight_hi - -To gain intuition about the meaning of the three operating points, it -may help to consider the analogs in CUBIC, which has a somewhat -analogous three-part model used by its probing state machine: - - BBR param CUBIC param - ----------- ------------- - latest ~ cwnd - lo ~ ssthresh - hi ~ last_max_cwnd - -The analogy is only a loose one, though, since the BBR operating -points are calculated differently, and are 2-dimensional (bw,inflight) -rather than CUBIC's one-dimensional notion of operating point -(inflight). - -o It uses the three-part model to adapt the magnitude of its bandwidth -to match the estimated space available in the buffer, rather than (as -in BBR v1) assuming that it was always acceptable to place 0.25*BDP in -the bottleneck buffer when probing (commodity datacenter switches -commonly do not have that much buffer for WAN flows). When BBR v3 -estimates it hit a buffer limit during probing, its bandwidth probing -then starts gently in case little space is still available in the -buffer, and the accelerates, slowly at first and then rapidly if it -can grow inflight without seeing congestion signals. In such cases, -probing is bounded by inflight_hi + inflight_probe, where -inflight_probe grows as: [0, 1, 2, 4, 8, 16,...]. This allows BBR to -keep losses low and bounded if a bottleneck remains congested, while -rapidly/scalably utilizing free bandwidth when it becomes available. - -o It has a slightly revised state machine, to achieve the goals above. - BBR_BW_PROBE_UP: pushes up inflight to probe for bw/vol - BBR_BW_PROBE_DOWN: drain excess inflight from the queue - BBR_BW_PROBE_CRUISE: use pipe, w/ headroom in queue/pipe - BBR_BW_PROBE_REFILL: try refill the pipe again to 100%, leaving queue empty - -o The estimated BDP: BBR v3 continues to maintain an estimate of the -path's two-way propagation delay, by tracking a windowed min_rtt, and -coordinating (on an as-ndeeded basis) to try to expose the two-way -propagation delay by draining the bottleneck queue. - -BBR v3 continues to use its min_rtt and (currently-applicable) bandwidth -estimate to estimate the current bandwidth-delay product. The estimated BDP -still provides one important guideline for bounding inflight data. However, -because any min-filtered RTT and max-filtered bw inherently tend to both -overestimate, the estimated BDP is often too high; in this case loss or ECN -marks can ensue, in which case BBR v3 adjusts inflight_hi and inflight_lo to -adapt its sending rate and inflight down to match the available capacity of the -path. - -o Space: Note that ICSK_CA_PRIV_SIZE increased. This is because BBR v3 -requires more space. Note that much of the space is due to support for -per-socket parameterization and debugging in this release for research -and debugging. With that state removed, the full "struct bbr" is 140 -bytes, or 144 with padding. This is an increase of 40 bytes over the -existing ca_priv space. - -o Code: BBR v3 reuses many pieces from BBR v1. But it omits the following - significant pieces: - - o "packet conservation" (bbr_set_cwnd_to_recover_or_restore(), - bbr_can_grow_inflight()) - o long-term bandwidth estimator ("policer mode") - - The code layout tries to keep BBR v3 code near the bottom of the - file, so that v1-applicable code in the top does not accidentally - refer to v3 code. - -o Docs: - See the following docs for more details and diagrams decsribing the BBR v3 - algorithm: - https://datatracker.ietf.org/meeting/104/materials/slides-104-iccrg-an-update-on-bbr-00 - https://datatracker.ietf.org/meeting/102/materials/slides-102-iccrg-an-update-on-bbr-work-at-google-00 - -o Internal notes: - For this upstream rebase, Neal started from: - git show fed518041ac6:net/ipv4/tcp_bbr.c > net/ipv4/tcp_bbr.c - then removed dev instrumentation (dynamic get/set for parameters) - and code that was only used by BBRv1 - -Effort: net-tcp_bbr -Origin-9xx-SHA1: 2c84098e60bed6d67dde23cd7538c51dee273102 -Change-Id: I125cf26ba2a7a686f2fa5e87f4c2afceb65f7a05 -Signed-off-by: Alexandre Frade ---- - include/net/inet_connection_sock.h | 4 +- - include/net/tcp.h | 2 +- - include/uapi/linux/inet_diag.h | 23 + - net/ipv4/Kconfig | 21 +- - net/ipv4/tcp_bbr.c | 2217 +++++++++++++++++++++------- - 5 files changed, 1742 insertions(+), 525 deletions(-) - ---- a/include/net/inet_connection_sock.h -+++ b/include/net/inet_connection_sock.h -@@ -135,8 +135,8 @@ struct inet_connection_sock { - u32 icsk_probes_tstamp; - u32 icsk_user_timeout; - -- u64 icsk_ca_priv[104 / sizeof(u64)]; --#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) -+#define ICSK_CA_PRIV_SIZE (144) -+ u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)]; - }; - - #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -2267,7 +2267,7 @@ struct tcp_plb_state { - u8 consec_cong_rounds:5, /* consecutive congested rounds */ - unused:3; - u32 pause_until; /* jiffies32 when PLB can resume rerouting */ --}; -+} __attribute__ ((__packed__)); - - static inline void tcp_plb_init(const struct sock *sk, - struct tcp_plb_state *plb) ---- a/include/uapi/linux/inet_diag.h -+++ b/include/uapi/linux/inet_diag.h -@@ -229,6 +229,29 @@ struct tcp_bbr_info { - __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ - __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ - __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ -+ __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ -+ __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ -+ __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ -+ __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ -+ __u8 bbr_mode; /* current bbr_mode in state machine */ -+ __u8 bbr_phase; /* current state machine phase */ -+ __u8 unused1; /* alignment padding; not used yet */ -+ __u8 bbr_version; /* BBR algorithm version */ -+ __u32 bbr_inflight_lo; /* lower short-term data volume bound */ -+ __u32 bbr_inflight_hi; /* higher long-term data volume bound */ -+ __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ -+}; -+ -+/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */ -+enum tcp_bbr_phase { -+ BBR_PHASE_INVALID = 0, -+ BBR_PHASE_STARTUP = 1, -+ BBR_PHASE_DRAIN = 2, -+ BBR_PHASE_PROBE_RTT = 3, -+ BBR_PHASE_PROBE_BW_UP = 4, -+ BBR_PHASE_PROBE_BW_DOWN = 5, -+ BBR_PHASE_PROBE_BW_CRUISE = 6, -+ BBR_PHASE_PROBE_BW_REFILL = 7, - }; - - union tcp_cc_info { ---- a/net/ipv4/Kconfig -+++ b/net/ipv4/Kconfig -@@ -668,15 +668,18 @@ config TCP_CONG_BBR - default n - help - -- BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to -- maximize network utilization and minimize queues. It builds an explicit -- model of the bottleneck delivery rate and path round-trip propagation -- delay. It tolerates packet loss and delay unrelated to congestion. It -- can operate over LAN, WAN, cellular, wifi, or cable modem links. It can -- coexist with flows that use loss-based congestion control, and can -- operate with shallow buffers, deep buffers, bufferbloat, policers, or -- AQM schemes that do not provide a delay signal. It requires the fq -- ("Fair Queue") pacing packet scheduler. -+ BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a -+ model-based congestion control algorithm that aims to maximize -+ network utilization, keep queues and retransmit rates low, and to be -+ able to coexist with Reno/CUBIC in common scenarios. It builds an -+ explicit model of the network path. It tolerates a targeted degree -+ of random packet loss and delay. It can operate over LAN, WAN, -+ cellular, wifi, or cable modem links, and can use shallow-threshold -+ ECN signals. It can coexist to some degree with flows that use -+ loss-based congestion control, and can operate with shallow buffers, -+ deep buffers, bufferbloat, policers, or AQM schemes that do not -+ provide a delay signal. It requires pacing, using either TCP internal -+ pacing or the fq ("Fair Queue") pacing packet scheduler. - - choice - prompt "Default TCP congestion control" ---- a/net/ipv4/tcp_bbr.c -+++ b/net/ipv4/tcp_bbr.c -@@ -1,18 +1,19 @@ --/* Bottleneck Bandwidth and RTT (BBR) congestion control -+/* BBR (Bottleneck Bandwidth and RTT) congestion control - * -- * BBR congestion control computes the sending rate based on the delivery -- * rate (throughput) estimated from ACKs. In a nutshell: -+ * BBR is a model-based congestion control algorithm that aims for low queues, -+ * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the -+ * network path, it uses measurements of bandwidth and RTT, as well as (if they -+ * occur) packet loss and/or shallow-threshold ECN signals. Note that although -+ * it can use ECN or loss signals explicitly, it does not require either; it -+ * can bound its in-flight data based on its estimate of the BDP. - * -- * On each ACK, update our model of the network path: -- * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) -- * min_rtt = windowed_min(rtt, 10 seconds) -- * pacing_rate = pacing_gain * bottleneck_bandwidth -- * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) -- * -- * The core algorithm does not react directly to packet losses or delays, -- * although BBR may adjust the size of next send per ACK when loss is -- * observed, or adjust the sending rate if it estimates there is a -- * traffic policer, in order to keep the drop rate reasonable. -+ * The model has both higher and lower bounds for the operating range: -+ * lo: bw_lo, inflight_lo: conservative short-term lower bound -+ * hi: bw_hi, inflight_hi: robust long-term upper bound -+ * The bandwidth-probing time scale is (a) extended dynamically based on -+ * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by -+ * an interactive wall-clock time-scale to be more scalable and responsive -+ * than Reno and CUBIC. - * - * Here is a state transition diagram for BBR: - * -@@ -65,6 +66,13 @@ - #include - #include - -+#include -+#include "tcp_dctcp.h" -+ -+#define BBR_VERSION 3 -+ -+#define bbr_param(sk,name) (bbr_ ## name) -+ - /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth - * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. - * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. -@@ -85,36 +93,41 @@ enum bbr_mode { - BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ - }; - -+/* How does the incoming ACK stream relate to our bandwidth probing? */ -+enum bbr_ack_phase { -+ BBR_ACKS_INIT, /* not probing; not getting probe feedback */ -+ BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ -+ BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ -+ BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ -+ BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ -+}; -+ - /* BBR congestion control block */ - struct bbr { - u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ - u32 min_rtt_stamp; /* timestamp of min_rtt_us */ - u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ -- struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ -- u32 rtt_cnt; /* count of packet-timed rounds elapsed */ -+ u32 probe_rtt_min_us; /* min RTT in probe_rtt_win_ms win */ -+ u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ - u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ - u64 cycle_mstamp; /* time of this cycle phase start */ -- u32 mode:3, /* current bbr_mode in state machine */ -+ u32 mode:2, /* current bbr_mode in state machine */ - prev_ca_state:3, /* CA state on previous ACK */ -- packet_conservation:1, /* use packet conservation? */ - round_start:1, /* start of packet-timed tx->ack round? */ -+ ce_state:1, /* If most recent data has CE bit set */ -+ bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ -+ try_fast_path:1, /* can we take fast path? */ - idle_restart:1, /* restarting after idle? */ - probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ -- unused:13, -- lt_is_sampling:1, /* taking long-term ("LT") samples now? */ -- lt_rtt_cnt:7, /* round trips in long-term interval */ -- lt_use_bw:1; /* use lt_bw as our bw estimate? */ -- u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ -- u32 lt_last_delivered; /* LT intvl start: tp->delivered */ -- u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ -- u32 lt_last_lost; /* LT intvl start: tp->lost */ -+ init_cwnd:7, /* initial cwnd */ -+ unused_1:10; - u32 pacing_gain:10, /* current gain for setting pacing rate */ - cwnd_gain:10, /* current gain for setting cwnd */ - full_bw_reached:1, /* reached full bw in Startup? */ - full_bw_cnt:2, /* number of rounds without large bw gains */ -- cycle_idx:3, /* current index in pacing_gain cycle array */ -+ cycle_idx:2, /* current index in pacing_gain cycle array */ - has_seen_rtt:1, /* have we seen an RTT sample yet? */ -- unused_b:5; -+ unused_2:6; - u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ - u32 full_bw; /* recent bw, to estimate if pipe is full */ - -@@ -124,19 +137,67 @@ struct bbr { - u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ - extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ - extra_acked_win_idx:1, /* current index in extra_acked array */ -- unused_c:6; -+ /* BBR v3 state: */ -+ full_bw_now:1, /* recently reached full bw plateau? */ -+ startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ -+ loss_in_cycle:1, /* packet loss in this cycle? */ -+ ecn_in_cycle:1, /* ECN in this cycle? */ -+ unused_3:1; -+ u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ -+ u32 undo_bw_lo; /* bw_lo before latest losses */ -+ u32 undo_inflight_lo; /* inflight_lo before latest losses */ -+ u32 undo_inflight_hi; /* inflight_hi before latest losses */ -+ u32 bw_latest; /* max delivered bw in last round trip */ -+ u32 bw_lo; /* lower bound on sending bandwidth */ -+ u32 bw_hi[2]; /* max recent measured bw sample */ -+ u32 inflight_latest; /* max delivered data in last round trip */ -+ u32 inflight_lo; /* lower bound of inflight data range */ -+ u32 inflight_hi; /* upper bound of inflight data range */ -+ u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ -+ u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ -+ u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ -+ u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ -+ u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ -+ ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ -+ bw_probe_samples:1, /* rate samples reflect bw probing? */ -+ prev_probe_too_high:1, /* did last PROBE_UP go too high? */ -+ stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ -+ rounds_since_probe:8, /* packet-timed rounds since probed bw */ -+ loss_round_start:1, /* loss_round_delivered round trip? */ -+ loss_in_round:1, /* loss marked in this round trip? */ -+ ecn_in_round:1, /* ECN marked in this round trip? */ -+ ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ -+ loss_events_in_round:4,/* losses in STARTUP round */ -+ initialized:1; /* has bbr_init() been called? */ -+ u32 alpha_last_delivered; /* tp->delivered at alpha update */ -+ u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ -+ -+ u8 unused_4; /* to preserve alignment */ -+ struct tcp_plb_state plb; - }; - --#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ -+struct bbr_context { -+ u32 sample_bw; -+}; - --/* Window length of bw filter (in rounds): */ --static const int bbr_bw_rtts = CYCLE_LEN + 2; - /* Window length of min_rtt filter (in sec): */ - static const u32 bbr_min_rtt_win_sec = 10; - /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ - static const u32 bbr_probe_rtt_mode_ms = 200; --/* Skip TSO below the following bandwidth (bits/sec): */ --static const int bbr_min_tso_rate = 1200000; -+/* Window length of probe_rtt_min_us filter (in ms), and consequently the -+ * typical interval between PROBE_RTT mode entries. The default is 5000ms. -+ * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC -+ */ -+static const u32 bbr_probe_rtt_win_ms = 5000; -+/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */ -+static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; -+ -+/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting -+ * in bigger TSO bursts. We cut the RTT-based allowance in half -+ * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance -+ * is below 1500 bytes after 6 * ~500 usec = 3ms. -+ */ -+static const u32 bbr_tso_rtt_shift = 9; - - /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. - * In order to help drive the network toward lower queues and low latency while -@@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200 - */ - static const int bbr_pacing_margin_percent = 1; - --/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain -+/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value - * that will allow a smoothly increasing pacing rate that will double each RTT - * and send the same number of packets per RTT that an un-paced, slow-starting - * Reno or CUBIC flow would: - */ --static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; --/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain -+static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1; -+/* The gain for deriving startup cwnd: */ -+static const int bbr_startup_cwnd_gain = BBR_UNIT * 2; -+/* The pacing gain in BBR_DRAIN is calculated to typically drain - * the queue created in BBR_STARTUP in a single round: - */ - static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; -@@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UN - static const int bbr_cwnd_gain = BBR_UNIT * 2; - /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ - static const int bbr_pacing_gain[] = { -- BBR_UNIT * 5 / 4, /* probe for more available bw */ -- BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ -- BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ -- BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ -+ BBR_UNIT * 5 / 4, /* UP: probe for more available bw */ -+ BBR_UNIT * 91 / 100, /* DOWN: drain queue and/or yield bw */ -+ BBR_UNIT, /* CRUISE: try to use pipe w/ some headroom */ -+ BBR_UNIT, /* REFILL: refill pipe to estimated 100% */ -+}; -+enum bbr_pacing_gain_phase { -+ BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ -+ BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ -+ BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ -+ BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ - }; --/* Randomize the starting gain cycling phase over N phases: */ --static const u32 bbr_cycle_rand = 7; - - /* Try to keep at least this many packets in flight, if things go smoothly. For - * smooth functioning, a sliding window protocol ACKing every other packet -@@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7; - */ - static const u32 bbr_cwnd_min_target = 4; - --/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ -+/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */ - /* If bw has increased significantly (1.25x), there may be more bw available: */ - static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; - /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ - static const u32 bbr_full_bw_cnt = 3; - --/* "long-term" ("LT") bandwidth estimator parameters... */ --/* The minimum number of rounds in an LT bw sampling interval: */ --static const u32 bbr_lt_intvl_min_rtts = 4; --/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ --static const u32 bbr_lt_loss_thresh = 50; --/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ --static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; --/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ --static const u32 bbr_lt_bw_diff = 4000 / 8; --/* If we estimate we're policed, use lt_bw for this many round trips: */ --static const u32 bbr_lt_bw_max_rtts = 48; -- - /* Gain factor for adding extra_acked to target cwnd: */ - static const int bbr_extra_acked_gain = BBR_UNIT; - /* Window length of extra_acked window. */ -@@ -201,8 +256,121 @@ static const u32 bbr_ack_epoch_acked_res - /* Time period for clamping cwnd increment due to ack aggregation */ - static const u32 bbr_extra_acked_max_us = 100 * 1000; - -+/* Flags to control BBR ECN-related behavior... */ -+ -+/* Ensure ACKs only ACK packets with consistent ECN CE status? */ -+static const bool bbr_precise_ece_ack = true; -+ -+/* Max RTT (in usec) at which to use sender-side ECN logic. -+ * Disabled when 0 (ECN allowed at any RTT). -+ */ -+static const u32 bbr_ecn_max_rtt_us = 5000; -+ -+/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. -+ * No loss response when 0. -+ */ -+static const u32 bbr_beta = BBR_UNIT * 30 / 100; -+ -+/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */ -+static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; -+ -+/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly -+ * to congestion if the bottleneck is congested when the flow starts up. -+ */ -+static const u32 bbr_ecn_alpha_init = BBR_UNIT; -+ -+/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. -+ * No ECN based bounding when 0. -+ */ -+static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ -+ -+/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. -+ * Scaled by BBR_SCALE. Disabled when 0. -+ */ -+static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ -+ -+/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN -+ * clears then make the first round's increment to inflight_hi the following -+ * fraction of inflight_hi. -+ */ -+static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2; -+ -+/* Estimate bw probing has gone too far if loss rate exceeds this level. */ -+static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ -+ -+/* Slow down for a packet loss recovered by TLP? */ -+static const bool bbr_loss_probe_recovery = true; -+ -+/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, -+ * and loss rate is higher than bbr_loss_thresh. -+ * Disabled if 0. -+ */ -+static const u32 bbr_full_loss_cnt = 6; -+ -+/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh -+ * meets this count. -+ */ -+static const u32 bbr_full_ecn_cnt = 2; -+ -+/* Fraction of unutilized headroom to try to leave in path upon high loss. */ -+static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; -+ -+/* How much do we increase cwnd_gain when probing for bandwidth in -+ * BBR_BW_PROBE_UP? This specifies the increment in units of -+ * BBR_UNIT/4. The default is 1, meaning 0.25. -+ * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75). -+ */ -+static const u32 bbr_bw_probe_cwnd_gain = 1; -+ -+/* Max number of packet-timed rounds to wait before probing for bandwidth. If -+ * we want to tolerate 1% random loss per round, and not have this cut our -+ * inflight too much, we must probe for bw periodically on roughly this scale. -+ * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. -+ * We aim to be fair with Reno/CUBIC up to a BDP of at least: -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ */ -+static const u32 bbr_bw_probe_max_rounds = 63; -+ -+/* Max amount of randomness to inject in round counting for Reno-coexistence. -+ */ -+static const u32 bbr_bw_probe_rand_rounds = 2; -+ -+/* Use BBR-native probe time scale starting at this many usec. -+ * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: -+ * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs -+ */ -+static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ -+ -+/* Use BBR-native probes spread over this many usec: */ -+static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ -+ -+/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ -+static const bool bbr_fast_path = true; -+ -+/* Use fast ack mode? */ -+static const bool bbr_fast_ack_mode = true; -+ -+static u32 bbr_max_bw(const struct sock *sk); -+static u32 bbr_bw(const struct sock *sk); -+static void bbr_exit_probe_rtt(struct sock *sk); -+static void bbr_reset_congestion_signals(struct sock *sk); -+static void bbr_run_loss_probe_recovery(struct sock *sk); -+ - static void bbr_check_probe_rtt_done(struct sock *sk); - -+/* This connection can use ECN if both endpoints have signaled ECN support in -+ * the handshake and the per-route settings indicated this is a -+ * shallow-threshold ECN environment, meaning both: -+ * (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and -+ * (b) TCP endpoints provide precise ACKs that only ACK data segments -+ * with consistent ECN CE status -+ */ -+static bool bbr_can_use_ecn(const struct sock *sk) -+{ -+ return (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) && -+ (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW); -+} -+ - /* Do we estimate that STARTUP filled the pipe? */ - static bool bbr_full_bw_reached(const struct sock *sk) - { -@@ -214,17 +382,17 @@ static bool bbr_full_bw_reached(const st - /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ - static u32 bbr_max_bw(const struct sock *sk) - { -- struct bbr *bbr = inet_csk_ca(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); - -- return minmax_get(&bbr->bw); -+ return max(bbr->bw_hi[0], bbr->bw_hi[1]); - } - - /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ - static u32 bbr_bw(const struct sock *sk) - { -- struct bbr *bbr = inet_csk_ca(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); - -- return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); -+ return min(bbr_max_bw(sk), bbr->bw_lo); - } - - /* Return maximum extra acked in past k-2k round trips, -@@ -241,15 +409,23 @@ static u16 bbr_extra_acked(const struct - * The order here is chosen carefully to avoid overflow of u64. This should - * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. - */ --static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) -+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, -+ int margin) - { - unsigned int mss = tcp_sk(sk)->mss_cache; - - rate *= mss; - rate *= gain; - rate >>= BBR_SCALE; -- rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent); -- return rate >> BW_SCALE; -+ rate *= USEC_PER_SEC / 100 * (100 - margin); -+ rate >>= BW_SCALE; -+ rate = max(rate, 1ULL); -+ return rate; -+} -+ -+static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) -+{ -+ return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); - } - - /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ -@@ -257,12 +433,13 @@ static unsigned long bbr_bw_to_pacing_ra - { - u64 rate = bw; - -- rate = bbr_rate_bytes_per_sec(sk, rate, gain); -+ rate = bbr_rate_bytes_per_sec(sk, rate, gain, -+ bbr_pacing_margin_percent); - rate = min_t(u64, rate, sk->sk_max_pacing_rate); - return rate; - } - --/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ -+/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */ - static void bbr_init_pacing_rate_from_rtt(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -@@ -278,7 +455,8 @@ static void bbr_init_pacing_rate_from_rt - } - bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; - do_div(bw, rtt_us); -- sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); -+ sk->sk_pacing_rate = -+ bbr_bw_to_pacing_rate(sk, bw, bbr_param(sk, startup_pacing_gain)); - } - - /* Pace using current bw estimate and a gain factor. */ -@@ -294,31 +472,38 @@ static void bbr_set_pacing_rate(struct s - sk->sk_pacing_rate = rate; - } - --/* override sysctl_tcp_min_tso_segs */ --__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) --{ -- return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; --} -- --/* Return the number of segments BBR would like in a TSO/GSO skb, given -- * a particular max gso size as a constraint. -+/* Return the number of segments BBR would like in a TSO/GSO skb, given a -+ * particular max gso size as a constraint. TODO: make this simpler and more -+ * consistent by switching bbr to just call tcp_tso_autosize(). - */ - static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, - u32 gso_max_size) - { -- u32 segs; -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 segs, r; - u64 bytes; - - /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ - bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; - -+ /* Budget a TSO/GSO burst size allowance based on min_rtt. For every -+ * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. -+ * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) -+ */ -+ if (bbr_param(sk, tso_rtt_shift)) { -+ r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift); -+ if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ -+ bytes += GSO_LEGACY_MAX_SIZE >> r; -+ } -+ - bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); -- segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); -+ segs = max_t(u32, div_u64(bytes, mss_now), -+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); - return segs; - } - - /* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ --static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) -+__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) - { - return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); - } -@@ -328,7 +513,7 @@ static u32 bbr_tso_segs_goal(struct sock - { - struct tcp_sock *tp = tcp_sk(sk); - -- return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); -+ return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE); - } - - /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ -@@ -348,7 +533,9 @@ __bpf_kfunc static void bbr_cwnd_event(s - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - -- if (event == CA_EVENT_TX_START && tp->app_limited) { -+ if (event == CA_EVENT_TX_START) { -+ if (!tp->app_limited) -+ return; - bbr->idle_restart = 1; - bbr->ack_epoch_mstamp = tp->tcp_mstamp; - bbr->ack_epoch_acked = 0; -@@ -359,6 +546,16 @@ __bpf_kfunc static void bbr_cwnd_event(s - bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); - else if (bbr->mode == BBR_PROBE_RTT) - bbr_check_probe_rtt_done(sk); -+ } else if ((event == CA_EVENT_ECN_IS_CE || -+ event == CA_EVENT_ECN_NO_CE) && -+ bbr_can_use_ecn(sk) && -+ bbr_param(sk, precise_ece_ack)) { -+ u32 state = bbr->ce_state; -+ dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); -+ bbr->ce_state = state; -+ } else if (event == CA_EVENT_TLP_RECOVERY && -+ bbr_param(sk, loss_probe_recovery)) { -+ bbr_run_loss_probe_recovery(sk); - } - } - -@@ -381,10 +578,10 @@ static u32 bbr_bdp(struct sock *sk, u32 - * default. This should only happen when the connection is not using TCP - * timestamps and has retransmitted all of the SYN/SYNACK/data packets - * ACKed so far. In this case, an RTO can cut cwnd to 1, in which -- * case we need to slow-start up toward something safe: TCP_INIT_CWND. -+ * case we need to slow-start up toward something safe: initial cwnd. - */ - if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ -- return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ -+ return bbr->init_cwnd; /* be safe: cap at initial cwnd */ - - w = (u64)bw * bbr->min_rtt_us; - -@@ -401,23 +598,23 @@ static u32 bbr_bdp(struct sock *sk, u32 - * - one skb in sending host Qdisc, - * - one skb in sending host TSO/GSO engine - * - one skb being received by receiver host LRO/GRO/delayed-ACK engine -- * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because -- * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, -+ * Don't worry, at low rates this won't bloat cwnd because -+ * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets, - * which allows 2 outstanding 2-packet sequences, to try to keep pipe - * full even with ACK-every-other-packet delayed ACKs. - */ - static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) - { - struct bbr *bbr = inet_csk_ca(sk); -+ u32 tso_segs_goal; - -- /* Allow enough full-sized skbs in flight to utilize end systems. */ -- cwnd += 3 * bbr_tso_segs_goal(sk); -- -- /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ -- cwnd = (cwnd + 1) & ~1U; -+ tso_segs_goal = 3 * bbr_tso_segs_goal(sk); - -+ /* Allow enough full-sized skbs in flight to utilize end systems. */ -+ cwnd = max_t(u32, cwnd, tso_segs_goal); -+ cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); - /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ -- if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0) -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) - cwnd += 2; - - return cwnd; -@@ -472,10 +669,10 @@ static u32 bbr_ack_aggregation_cwnd(stru - { - u32 max_aggr_cwnd, aggr_cwnd = 0; - -- if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { -+ if (bbr_param(sk, extra_acked_gain)) { - max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) - / BW_UNIT; -- aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) -+ aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk)) - >> BBR_SCALE; - aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); - } -@@ -483,66 +680,27 @@ static u32 bbr_ack_aggregation_cwnd(stru - return aggr_cwnd; - } - --/* An optimization in BBR to reduce losses: On the first round of recovery, we -- * follow the packet conservation principle: send P packets per P packets acked. -- * After that, we slow-start and send at most 2*P packets per P packets acked. -- * After recovery finishes, or upon undo, we restore the cwnd we had when -- * recovery started (capped by the target cwnd based on estimated BDP). -- * -- * TODO(ycheng/ncardwell): implement a rate-based approach. -- */ --static bool bbr_set_cwnd_to_recover_or_restore( -- struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) -+/* Returns the cwnd for PROBE_RTT mode. */ -+static u32 bbr_probe_rtt_cwnd(struct sock *sk) - { -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; -- u32 cwnd = tcp_snd_cwnd(tp); -- -- /* An ACK for P pkts should release at most 2*P packets. We do this -- * in two steps. First, here we deduct the number of lost packets. -- * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. -- */ -- if (rs->losses > 0) -- cwnd = max_t(s32, cwnd - rs->losses, 1); -- -- if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { -- /* Starting 1st round of Recovery, so do packet conservation. */ -- bbr->packet_conservation = 1; -- bbr->next_rtt_delivered = tp->delivered; /* start round now */ -- /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ -- cwnd = tcp_packets_in_flight(tp) + acked; -- } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { -- /* Exiting loss recovery; restore cwnd saved before recovery. */ -- cwnd = max(cwnd, bbr->prior_cwnd); -- bbr->packet_conservation = 0; -- } -- bbr->prev_ca_state = state; -- -- if (bbr->packet_conservation) { -- *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); -- return true; /* yes, using packet conservation */ -- } -- *new_cwnd = cwnd; -- return false; -+ return max_t(u32, bbr_param(sk, cwnd_min_target), -+ bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain))); - } - - /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss - * has drawn us down below target), or snap down to target if we're above it. - */ - static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, -- u32 acked, u32 bw, int gain) -+ u32 acked, u32 bw, int gain, u32 cwnd, -+ struct bbr_context *ctx) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; -+ u32 target_cwnd = 0; - - if (!acked) - goto done; /* no packet fully ACKed; just apply caps */ - -- if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) -- goto done; -- - target_cwnd = bbr_bdp(sk, bw, gain); - - /* Increment the cwnd to account for excess ACKed data that seems -@@ -551,74 +709,26 @@ static void bbr_set_cwnd(struct sock *sk - target_cwnd += bbr_ack_aggregation_cwnd(sk); - target_cwnd = bbr_quantization_budget(sk, target_cwnd); - -- /* If we're below target cwnd, slow start cwnd toward target cwnd. */ -- if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ -- cwnd = min(cwnd + acked, target_cwnd); -- else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) -- cwnd = cwnd + acked; -- cwnd = max(cwnd, bbr_cwnd_min_target); -+ /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ -+ bbr->try_fast_path = 0; -+ if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ -+ cwnd += acked; -+ if (cwnd >= target_cwnd) { -+ cwnd = target_cwnd; -+ bbr->try_fast_path = 1; -+ } -+ } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { -+ cwnd += acked; -+ } else { -+ bbr->try_fast_path = 1; -+ } - -+ cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); - done: -- tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ -+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* global cap */ - if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ -- tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); --} -- --/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ --static bool bbr_is_next_cycle_phase(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- bool is_full_length = -- tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > -- bbr->min_rtt_us; -- u32 inflight, bw; -- -- /* The pacing_gain of 1.0 paces at the estimated bw to try to fully -- * use the pipe without increasing the queue. -- */ -- if (bbr->pacing_gain == BBR_UNIT) -- return is_full_length; /* just use wall clock time */ -- -- inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); -- bw = bbr_max_bw(sk); -- -- /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at -- * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is -- * small (e.g. on a LAN). We do not persist if packets are lost, since -- * a path with small buffers may not hold that much. -- */ -- if (bbr->pacing_gain > BBR_UNIT) -- return is_full_length && -- (rs->losses || /* perhaps pacing_gain*BDP won't fit */ -- inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); -- -- /* A pacing_gain < 1.0 tries to drain extra queue we added if bw -- * probing didn't find more bw. If inflight falls to match BDP then we -- * estimate queue is drained; persisting would underutilize the pipe. -- */ -- return is_full_length || -- inflight <= bbr_inflight(sk, bw, BBR_UNIT); --} -- --static void bbr_advance_cycle_phase(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); -- bbr->cycle_mstamp = tp->delivered_mstamp; --} -- --/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ --static void bbr_update_cycle_phase(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) -- bbr_advance_cycle_phase(sk); -+ tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp), -+ bbr_probe_rtt_cwnd(sk))); - } - - static void bbr_reset_startup_mode(struct sock *sk) -@@ -628,191 +738,49 @@ static void bbr_reset_startup_mode(struc - bbr->mode = BBR_STARTUP; - } - --static void bbr_reset_probe_bw_mode(struct sock *sk) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->mode = BBR_PROBE_BW; -- bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand); -- bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ --} -- --static void bbr_reset_mode(struct sock *sk) --{ -- if (!bbr_full_bw_reached(sk)) -- bbr_reset_startup_mode(sk); -- else -- bbr_reset_probe_bw_mode(sk); --} -- --/* Start a new long-term sampling interval. */ --static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); -- bbr->lt_last_delivered = tp->delivered; -- bbr->lt_last_lost = tp->lost; -- bbr->lt_rtt_cnt = 0; --} -- --/* Completely reset long-term bandwidth sampling. */ --static void bbr_reset_lt_bw_sampling(struct sock *sk) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- bbr->lt_bw = 0; -- bbr->lt_use_bw = 0; -- bbr->lt_is_sampling = false; -- bbr_reset_lt_bw_sampling_interval(sk); --} -- --/* Long-term bw sampling interval is done. Estimate whether we're policed. */ --static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- u32 diff; -- -- if (bbr->lt_bw) { /* do we have bw from a previous interval? */ -- /* Is new bw close to the lt_bw from the previous interval? */ -- diff = abs(bw - bbr->lt_bw); -- if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || -- (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= -- bbr_lt_bw_diff)) { -- /* All criteria are met; estimate we're policed. */ -- bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ -- bbr->lt_use_bw = 1; -- bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ -- bbr->lt_rtt_cnt = 0; -- return; -- } -- } -- bbr->lt_bw = bw; -- bbr_reset_lt_bw_sampling_interval(sk); --} -- --/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of -- * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and -- * explicitly models their policed rate, to reduce unnecessary losses. We -- * estimate that we're policed if we see 2 consecutive sampling intervals with -- * consistent throughput and high packet loss. If we think we're being policed, -- * set lt_bw to the "long-term" average delivery rate from those 2 intervals. -+/* See if we have reached next round trip. Upon start of the new round, -+ * returns packets delivered since previous round start plus this ACK. - */ --static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) -+static u32 bbr_update_round_start(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u32 lost, delivered; -- u64 bw; -- u32 t; -- -- if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ -- if (bbr->mode == BBR_PROBE_BW && bbr->round_start && -- ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { -- bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ -- bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ -- } -- return; -- } -- -- /* Wait for the first loss before sampling, to let the policer exhaust -- * its tokens and estimate the steady-state rate allowed by the policer. -- * Starting samples earlier includes bursts that over-estimate the bw. -- */ -- if (!bbr->lt_is_sampling) { -- if (!rs->losses) -- return; -- bbr_reset_lt_bw_sampling_interval(sk); -- bbr->lt_is_sampling = true; -- } -- -- /* To avoid underestimates, reset sampling if we run out of data. */ -- if (rs->is_app_limited) { -- bbr_reset_lt_bw_sampling(sk); -- return; -- } -- -- if (bbr->round_start) -- bbr->lt_rtt_cnt++; /* count round trips in this interval */ -- if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) -- return; /* sampling interval needs to be longer */ -- if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { -- bbr_reset_lt_bw_sampling(sk); /* interval is too long */ -- return; -- } -- -- /* End sampling interval when a packet is lost, so we estimate the -- * policer tokens were exhausted. Stopping the sampling before the -- * tokens are exhausted under-estimates the policed rate. -- */ -- if (!rs->losses) -- return; -- -- /* Calculate packets lost and delivered in sampling interval. */ -- lost = tp->lost - bbr->lt_last_lost; -- delivered = tp->delivered - bbr->lt_last_delivered; -- /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ -- if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) -- return; -- -- /* Find average delivery rate in this sampling interval. */ -- t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; -- if ((s32)t < 1) -- return; /* interval is less than one ms, so wait */ -- /* Check if can multiply without overflow */ -- if (t >= ~0U / USEC_PER_MSEC) { -- bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ -- return; -- } -- t *= USEC_PER_MSEC; -- bw = (u64)delivered * BW_UNIT; -- do_div(bw, t); -- bbr_lt_bw_interval_done(sk, bw); --} -- --/* Estimate the bandwidth based on how fast packets are delivered */ --static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) --{ -- struct tcp_sock *tp = tcp_sk(sk); -- struct bbr *bbr = inet_csk_ca(sk); -- u64 bw; -+ u32 round_delivered = 0; - - bbr->round_start = 0; -- if (rs->delivered < 0 || rs->interval_us <= 0) -- return; /* Not a valid observation */ - - /* See if we've reached the next RTT */ -- if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { -+ if (rs->interval_us > 0 && -+ !before(rs->prior_delivered, bbr->next_rtt_delivered)) { -+ round_delivered = tp->delivered - bbr->next_rtt_delivered; - bbr->next_rtt_delivered = tp->delivered; -- bbr->rtt_cnt++; - bbr->round_start = 1; -- bbr->packet_conservation = 0; - } -+ return round_delivered; -+} - -- bbr_lt_bw_sampling(sk, rs); -+/* Calculate the bandwidth based on how fast packets are delivered */ -+static void bbr_calculate_bw_sample(struct sock *sk, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ u64 bw = 0; - - /* Divide delivered by the interval to find a (lower bound) bottleneck - * bandwidth sample. Delivered is in packets and interval_us in uS and - * ratio will be <<1 for most connections. So delivered is first scaled. -+ * Round up to allow growth at low rates, even with integer division. - */ -- bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); -+ if (rs->interval_us > 0) { -+ if (WARN_ONCE(rs->delivered < 0, -+ "negative delivered: %d interval_us: %ld\n", -+ rs->delivered, rs->interval_us)) -+ return; - -- /* If this sample is application-limited, it is likely to have a very -- * low delivered count that represents application behavior rather than -- * the available network rate. Such a sample could drag down estimated -- * bw, causing needless slow-down. Thus, to continue to send at the -- * last measured network rate, we filter out app-limited samples unless -- * they describe the path bw at least as well as our bw model. -- * -- * So the goal during app-limited phase is to proceed with the best -- * network rate no matter how long. We automatically leave this -- * phase when app writes faster than the network can deliver :) -- */ -- if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { -- /* Incorporate new sample into our max bw filter. */ -- minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); -+ bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); - } -+ -+ ctx->sample_bw = bw; - } - - /* Estimates the windowed max degree of ack aggregation. -@@ -826,7 +794,7 @@ static void bbr_update_bw(struct sock *s - * - * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). - * Max filter is an approximate sliding window of 5-10 (packet timed) round -- * trips. -+ * trips for non-startup phase, and 1-2 round trips for startup. - */ - static void bbr_update_ack_aggregation(struct sock *sk, - const struct rate_sample *rs) -@@ -834,15 +802,19 @@ static void bbr_update_ack_aggregation(s - u32 epoch_us, expected_acked, extra_acked; - struct bbr *bbr = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); -+ u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts); - -- if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || -+ if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 || - rs->delivered < 0 || rs->interval_us <= 0) - return; - - if (bbr->round_start) { - bbr->extra_acked_win_rtts = min(0x1F, - bbr->extra_acked_win_rtts + 1); -- if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { -+ if (!bbr_full_bw_reached(sk)) -+ extra_acked_win_rtts_thresh = 1; -+ if (bbr->extra_acked_win_rtts >= -+ extra_acked_win_rtts_thresh) { - bbr->extra_acked_win_rtts = 0; - bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? - 0 : 1; -@@ -876,49 +848,6 @@ static void bbr_update_ack_aggregation(s - bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; - } - --/* Estimate when the pipe is full, using the change in delivery rate: BBR -- * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by -- * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited -- * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the -- * higher rwin, 3: we get higher delivery rate samples. Or transient -- * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar -- * design goal, but uses delay and inter-ACK spacing instead of bandwidth. -- */ --static void bbr_check_full_bw_reached(struct sock *sk, -- const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- u32 bw_thresh; -- -- if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) -- return; -- -- bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; -- if (bbr_max_bw(sk) >= bw_thresh) { -- bbr->full_bw = bbr_max_bw(sk); -- bbr->full_bw_cnt = 0; -- return; -- } -- ++bbr->full_bw_cnt; -- bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; --} -- --/* If pipe is probably full, drain the queue and then enter steady-state. */ --static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) --{ -- struct bbr *bbr = inet_csk_ca(sk); -- -- if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { -- bbr->mode = BBR_DRAIN; /* drain queue we created */ -- tcp_sk(sk)->snd_ssthresh = -- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -- } /* fall through to check if in-flight is already small: */ -- if (bbr->mode == BBR_DRAIN && -- bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= -- bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) -- bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ --} -- - static void bbr_check_probe_rtt_done(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); -@@ -928,9 +857,9 @@ static void bbr_check_probe_rtt_done(str - after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) - return; - -- bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ - tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); -- bbr_reset_mode(sk); -+ bbr_exit_probe_rtt(sk); - } - - /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and -@@ -956,23 +885,35 @@ static void bbr_update_min_rtt(struct so - { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- bool filter_expired; -+ bool probe_rtt_expired, min_rtt_expired; -+ u32 expire; - -- /* Track min RTT seen in the min_rtt_win_sec filter window: */ -- filter_expired = after(tcp_jiffies32, -- bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); -+ /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ -+ expire = bbr->probe_rtt_min_stamp + -+ msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms)); -+ probe_rtt_expired = after(tcp_jiffies32, expire); - if (rs->rtt_us >= 0 && -- (rs->rtt_us < bbr->min_rtt_us || -- (filter_expired && !rs->is_ack_delayed))) { -- bbr->min_rtt_us = rs->rtt_us; -- bbr->min_rtt_stamp = tcp_jiffies32; -+ (rs->rtt_us < bbr->probe_rtt_min_us || -+ (probe_rtt_expired && !rs->is_ack_delayed))) { -+ bbr->probe_rtt_min_us = rs->rtt_us; -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; -+ } -+ /* Track min RTT seen in the min_rtt_win_sec filter window: */ -+ expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ; -+ min_rtt_expired = after(tcp_jiffies32, expire); -+ if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || -+ min_rtt_expired) { -+ bbr->min_rtt_us = bbr->probe_rtt_min_us; -+ bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; - } - -- if (bbr_probe_rtt_mode_ms > 0 && filter_expired && -+ if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired && - !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { - bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ - bbr_save_cwnd(sk); /* note cwnd so we can restore it */ - bbr->probe_rtt_done_stamp = 0; -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; - } - - if (bbr->mode == BBR_PROBE_RTT) { -@@ -981,9 +922,9 @@ static void bbr_update_min_rtt(struct so - (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; - /* Maintain min packets in flight for max(200 ms, 1 round). */ - if (!bbr->probe_rtt_done_stamp && -- tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { -+ tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { - bbr->probe_rtt_done_stamp = tcp_jiffies32 + -- msecs_to_jiffies(bbr_probe_rtt_mode_ms); -+ msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms)); - bbr->probe_rtt_round_done = 0; - bbr->next_rtt_delivered = tp->delivered; - } else if (bbr->probe_rtt_done_stamp) { -@@ -1004,18 +945,20 @@ static void bbr_update_gains(struct sock - - switch (bbr->mode) { - case BBR_STARTUP: -- bbr->pacing_gain = bbr_high_gain; -- bbr->cwnd_gain = bbr_high_gain; -+ bbr->pacing_gain = bbr_param(sk, startup_pacing_gain); -+ bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); - break; - case BBR_DRAIN: -- bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ -- bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ -+ bbr->pacing_gain = bbr_param(sk, drain_gain); /* slow, to drain */ -+ bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); /* keep cwnd */ - break; - case BBR_PROBE_BW: -- bbr->pacing_gain = (bbr->lt_use_bw ? -- BBR_UNIT : -- bbr_pacing_gain[bbr->cycle_idx]); -- bbr->cwnd_gain = bbr_cwnd_gain; -+ bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; -+ bbr->cwnd_gain = bbr_param(sk, cwnd_gain); -+ if (bbr_param(sk, bw_probe_cwnd_gain) && -+ bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr->cwnd_gain += -+ BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4; - break; - case BBR_PROBE_RTT: - bbr->pacing_gain = BBR_UNIT; -@@ -1027,27 +970,1108 @@ static void bbr_update_gains(struct sock - } - } - --static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) -+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) -+{ -+ /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ -+ return 3; -+} -+ -+/* Incorporate a new bw sample into the current window of our max filter. */ -+static void bbr_take_max_bw_sample(struct sock *sk, u32 bw) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); -+} -+ -+/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ -+static void bbr_advance_max_bw_filter(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (!bbr->bw_hi[1]) -+ return; /* no samples in this window; remember old window */ -+ bbr->bw_hi[0] = bbr->bw_hi[1]; -+ bbr->bw_hi[1] = 0; -+} -+ -+/* Reset the estimator for reaching full bandwidth based on bw plateau. */ -+static void bbr_reset_full_bw(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->full_bw = 0; -+ bbr->full_bw_cnt = 0; -+ bbr->full_bw_now = 0; -+} -+ -+/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ -+static u32 bbr_target_inflight(struct sock *sk) -+{ -+ u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); -+ -+ return min(bdp, tcp_sk(sk)->snd_cwnd); -+} -+ -+static bool bbr_is_probing_bandwidth(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return (bbr->mode == BBR_STARTUP) || -+ (bbr->mode == BBR_PROBE_BW && -+ (bbr->cycle_idx == BBR_BW_PROBE_REFILL || -+ bbr->cycle_idx == BBR_BW_PROBE_UP)); -+} -+ -+/* Has the given amount of time elapsed since we marked the phase start? */ -+static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ const struct bbr *bbr = inet_csk_ca(sk); -+ -+ return tcp_stamp_us_delta(tp->tcp_mstamp, -+ bbr->cycle_mstamp + interval_us) > 0; -+} -+ -+static void bbr_handle_queue_too_high_in_startup(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bdp; /* estimated BDP in packets, with quantization budget */ -+ -+ bbr->full_bw_reached = 1; -+ -+ bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+ bbr->inflight_hi = max(bdp, bbr->inflight_latest); -+} -+ -+/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ -+static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) - { -- bbr_update_bw(sk, rs); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || -+ !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh)) -+ return; -+ -+ if (ce_ratio >= bbr_param(sk, ecn_thresh)) -+ bbr->startup_ecn_rounds++; -+ else -+ bbr->startup_ecn_rounds = 0; -+ -+ if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) { -+ bbr_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+} -+ -+/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */ -+static int bbr_update_ecn_alpha(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct net *net = sock_net(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ s32 delivered, delivered_ce; -+ u64 alpha, ce_ratio; -+ u32 gain; -+ bool want_ecn_alpha; -+ -+ /* See if we should use ECN sender logic for this connection. */ -+ if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) && -+ bbr_param(sk, ecn_factor) && -+ (bbr->min_rtt_us <= bbr_ecn_max_rtt_us || -+ !bbr_ecn_max_rtt_us)) -+ bbr->ecn_eligible = 1; -+ -+ /* Skip updating alpha only if not ECN-eligible and PLB is disabled. */ -+ want_ecn_alpha = (bbr->ecn_eligible || -+ (bbr_can_use_ecn(sk) && -+ READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))); -+ if (!want_ecn_alpha) -+ return -1; -+ -+ delivered = tp->delivered - bbr->alpha_last_delivered; -+ delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; -+ -+ if (delivered == 0 || /* avoid divide by zero */ -+ WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ -+ return -1; -+ -+ BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE); -+ ce_ratio = (u64)delivered_ce << BBR_SCALE; -+ do_div(ce_ratio, delivered); -+ -+ gain = bbr_param(sk, ecn_alpha_gain); -+ alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; -+ alpha += (gain * ce_ratio) >> BBR_SCALE; -+ bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); -+ -+ bbr->alpha_last_delivered = tp->delivered; -+ bbr->alpha_last_delivered_ce = tp->delivered_ce; -+ -+ bbr_check_ecn_too_high_in_startup(sk, ce_ratio); -+ return (int)ce_ratio; -+} -+ -+/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6 -+ * flow label) if it encounters sustained congestion in the form of ECN marks. -+ */ -+static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->round_start && ce_ratio >= 0) -+ tcp_plb_update_state(sk, &bbr->plb, ce_ratio); -+ -+ tcp_plb_check_rehash(sk, &bbr->plb); -+} -+ -+/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ -+static void bbr_raise_inflight_hi_slope(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 growth_this_round, cnt; -+ -+ /* Calculate "slope": packets S/Acked per inflight_hi increment. */ -+ growth_this_round = 1 << bbr->bw_probe_up_rounds; -+ bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); -+ cnt = tcp_snd_cwnd(tp) / growth_this_round; -+ cnt = max(cnt, 1U); -+ bbr->bw_probe_up_cnt = cnt; -+} -+ -+/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ -+static void bbr_probe_inflight_hi_upward(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 delta; -+ -+ if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi) -+ return; /* not fully using inflight_hi, so don't grow it */ -+ -+ /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ -+ bbr->bw_probe_up_acks += rs->acked_sacked; -+ if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { -+ delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; -+ bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; -+ bbr->inflight_hi += delta; -+ bbr->try_fast_path = 0; /* Need to update cwnd */ -+ } -+ -+ if (bbr->round_start) -+ bbr_raise_inflight_hi_slope(sk); -+} -+ -+/* Does loss/ECN rate for this sample say inflight is "too high"? -+ * This is used by both the bbr_check_loss_too_high_in_startup() function, -+ * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which -+ * uses it to notice when loss/ECN rates suggest inflight is too high. -+ */ -+static bool bbr_is_inflight_too_high(const struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ const struct bbr *bbr = inet_csk_ca(sk); -+ u32 loss_thresh, ecn_thresh; -+ -+ if (rs->lost > 0 && rs->tx_in_flight) { -+ loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >> -+ BBR_SCALE; -+ if (rs->lost > loss_thresh) { -+ return true; -+ } -+ } -+ -+ if (rs->delivered_ce > 0 && rs->delivered > 0 && -+ bbr->ecn_eligible && bbr_param(sk, ecn_thresh)) { -+ ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >> -+ BBR_SCALE; -+ if (rs->delivered_ce > ecn_thresh) { -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+/* Calculate the tx_in_flight level that corresponded to excessive loss. -+ * We find "lost_prefix" segs of the skb where loss rate went too high, -+ * by solving for "lost_prefix" in the following equation: -+ * lost / inflight >= loss_thresh -+ * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh -+ * Then we take that equation, convert it to fixed point, and -+ * round up to the nearest packet. -+ */ -+static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk, -+ const struct rate_sample *rs, -+ const struct sk_buff *skb) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ u32 loss_thresh = bbr_param(sk, loss_thresh); -+ u32 pcount, divisor, inflight_hi; -+ s32 inflight_prev, lost_prev; -+ u64 loss_budget, lost_prefix; -+ -+ pcount = tcp_skb_pcount(skb); -+ -+ /* How much data was in flight before this skb? */ -+ inflight_prev = rs->tx_in_flight - pcount; -+ if (inflight_prev < 0) { -+ WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( -+ pcount, -+ TCP_SKB_CB(skb)->sacked, -+ rs->tx_in_flight), -+ "tx_in_flight: %u pcount: %u reneg: %u", -+ rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg); -+ return ~0U; -+ } -+ -+ /* How much inflight data was marked lost before this skb? */ -+ lost_prev = rs->lost - pcount; -+ if (WARN_ONCE(lost_prev < 0, -+ "cwnd: %u ca: %d out: %u lost: %u pif: %u " -+ "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d " -+ "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u", -+ tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state, -+ tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp), -+ rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost, -+ rs->lost, lost_prev, pcount, -+ TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, -+ tp->is_sack_reneg)) -+ return ~0U; -+ -+ /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ -+ loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; -+ loss_budget >>= BBR_SCALE; -+ if (lost_prev >= loss_budget) { -+ lost_prefix = 0; /* previous losses crossed loss_thresh */ -+ } else { -+ lost_prefix = loss_budget - lost_prev; -+ lost_prefix <<= BBR_SCALE; -+ divisor = BBR_UNIT - loss_thresh; -+ if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ -+ return ~0U; -+ do_div(lost_prefix, divisor); -+ } -+ -+ inflight_hi = inflight_prev + lost_prefix; -+ return inflight_hi; -+} -+ -+/* If loss/ECN rates during probing indicated we may have overfilled a -+ * buffer, return an operating point that tries to leave unutilized headroom in -+ * the path for other flows, for fairness convergence and lower RTTs and loss. -+ */ -+static u32 bbr_inflight_with_headroom(const struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 headroom, headroom_fraction; -+ -+ if (bbr->inflight_hi == ~0U) -+ return ~0U; -+ -+ headroom_fraction = bbr_param(sk, inflight_headroom); -+ headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; -+ headroom = max(headroom, 1U); -+ return max_t(s32, bbr->inflight_hi - headroom, -+ bbr_param(sk, cwnd_min_target)); -+} -+ -+/* Bound cwnd to a sensible level, based on our current probing state -+ * machine phase and model of a good inflight level (inflight_lo, inflight_hi). -+ */ -+static void bbr_bound_cwnd_for_inflight_model(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 cap; -+ -+ /* tcp_rcv_synsent_state_process() currently calls tcp_ack() -+ * and thus cong_control() without first initializing us(!). -+ */ -+ if (!bbr->initialized) -+ return; -+ -+ cap = ~0U; -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { -+ /* Probe to see if more packets fit in the path. */ -+ cap = bbr->inflight_hi; -+ } else { -+ if (bbr->mode == BBR_PROBE_RTT || -+ (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) -+ cap = bbr_inflight_with_headroom(sk); -+ } -+ /* Adapt to any loss/ECN since our last bw probe. */ -+ cap = min(cap, bbr->inflight_lo); -+ -+ cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target)); -+ tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp))); -+} -+ -+/* How should we multiplicatively cut bw or inflight limits based on ECN? */ -+u32 bbr_ecn_cut(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ return BBR_UNIT - -+ ((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE); -+} -+ -+/* Init lower bounds if have not inited yet. */ -+static void bbr_init_lower_bounds(struct sock *sk, bool init_bw) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (init_bw && bbr->bw_lo == ~0U) -+ bbr->bw_lo = bbr_max_bw(sk); -+ if (bbr->inflight_lo == ~0U) -+ bbr->inflight_lo = tcp_snd_cwnd(tp); -+} -+ -+/* Reduce bw and inflight to (1 - beta). */ -+static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight) -+{ -+ struct bbr* bbr = inet_csk_ca(sk); -+ u32 loss_cut = BBR_UNIT - bbr_param(sk, beta); -+ -+ *bw = max_t(u32, bbr->bw_latest, -+ (u64)bbr->bw_lo * loss_cut >> BBR_SCALE); -+ *inflight = max_t(u32, bbr->inflight_latest, -+ (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE); -+} -+ -+/* Reduce inflight to (1 - alpha*ecn_factor). */ -+static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 ecn_cut = bbr_ecn_cut(sk); -+ -+ *inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; -+} -+ -+/* Estimate a short-term lower bound on the capacity available now, based -+ * on measurements of the current delivery process and recent history. When we -+ * are seeing loss/ECN at times when we are not probing bw, then conservatively -+ * move toward flow balance by multiplicatively cutting our short-term -+ * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a -+ * multiplicative decrease in order to converge to a lower capacity in time -+ * logarithmic in the magnitude of the decrease. -+ * -+ * However, we do not cut our short-term estimates lower than the current rate -+ * and volume of delivered data from this round trip, since from the current -+ * delivery process we can estimate the measured capacity available now. -+ * -+ * Anything faster than that approach would knowingly risk high loss, which can -+ * cause low bw for Reno/CUBIC and high loss recovery latency for -+ * request/response flows using any congestion control. -+ */ -+static void bbr_adapt_lower_bounds(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 ecn_inflight_lo = ~0U; -+ -+ /* We only use lower-bound estimates when not probing bw. -+ * When probing we need to push inflight higher to probe bw. -+ */ -+ if (bbr_is_probing_bandwidth(sk)) -+ return; -+ -+ /* ECN response. */ -+ if (bbr->ecn_in_round && bbr_param(sk, ecn_factor)) { -+ bbr_init_lower_bounds(sk, false); -+ bbr_ecn_lower_bounds(sk, &ecn_inflight_lo); -+ } -+ -+ /* Loss response. */ -+ if (bbr->loss_in_round) { -+ bbr_init_lower_bounds(sk, true); -+ bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo); -+ } -+ -+ /* Adjust to the lower of the levels implied by loss/ECN. */ -+ bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); -+ bbr->bw_lo = max(1U, bbr->bw_lo); -+} -+ -+/* Reset any short-term lower-bound adaptation to congestion, so that we can -+ * push our inflight up. -+ */ -+static void bbr_reset_lower_bounds(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->bw_lo = ~0U; -+ bbr->inflight_lo = ~0U; -+} -+ -+/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state -+ * machine phase where we adapt our lower bound based on congestion signals. -+ */ -+static void bbr_reset_congestion_signals(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+ bbr->loss_in_cycle = 0; -+ bbr->ecn_in_cycle = 0; -+ bbr->bw_latest = 0; -+ bbr->inflight_latest = 0; -+} -+ -+static void bbr_exit_loss_recovery(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); -+ bbr->try_fast_path = 0; /* bound cwnd using latest model */ -+} -+ -+/* Update rate and volume of delivered data from latest round trip. */ -+static void bbr_update_latest_delivery_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->loss_round_start = 0; -+ if (rs->interval_us <= 0 || !rs->acked_sacked) -+ return; /* Not a valid observation */ -+ -+ bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); -+ bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); -+ -+ if (!before(rs->prior_delivered, bbr->loss_round_delivered)) { -+ bbr->loss_round_delivered = tp->delivered; -+ bbr->loss_round_start = 1; /* mark start of new round trip */ -+ } -+} -+ -+/* Once per round, reset filter for latest rate and volume of delivered data. */ -+static void bbr_advance_latest_delivery_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* If ACK matches a TLP retransmit, persist the filter. If we detect -+ * that a TLP retransmit plugged a tail loss, we'll want to remember -+ * how much data the path delivered before the tail loss. -+ */ -+ if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) { -+ bbr->bw_latest = ctx->sample_bw; -+ bbr->inflight_latest = rs->delivered; -+ } -+} -+ -+/* Update (most of) our congestion signals: track the recent rate and volume of -+ * delivered data, presence of loss, and EWMA degree of ECN marking. -+ */ -+static void bbr_update_congestion_signals( -+ struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u64 bw; -+ -+ if (rs->interval_us <= 0 || !rs->acked_sacked) -+ return; /* Not a valid observation */ -+ bw = ctx->sample_bw; -+ -+ if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) -+ bbr_take_max_bw_sample(sk, bw); -+ -+ bbr->loss_in_round |= (rs->losses > 0); -+ -+ if (!bbr->loss_round_start) -+ return; /* skip the per-round-trip updates */ -+ /* Now do per-round-trip updates. */ -+ bbr_adapt_lower_bounds(sk, rs); -+ -+ bbr->loss_in_round = 0; -+ bbr->ecn_in_round = 0; -+} -+ -+/* Bandwidth probing can cause loss. To help coexistence with loss-based -+ * congestion control we spread out our probing in a Reno-conscious way. Due to -+ * the shape of the Reno sawtooth, the time required between loss epochs for an -+ * idealized Reno flow is a number of round trips that is the BDP of that -+ * flow. We count packet-timed round trips directly, since measured RTT can -+ * vary widely, and Reno is driven by packet-timed round trips. -+ */ -+static bool bbr_is_reno_coexistence_probe_time(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 rounds; -+ -+ /* Random loss can shave some small percentage off of our inflight -+ * in each round. To survive this, flows need robust periodic probes. -+ */ -+ rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk)); -+ return bbr->rounds_since_probe >= rounds; -+} -+ -+/* How long do we want to wait before probing for bandwidth (and risking -+ * loss)? We randomize the wait, for better mixing and fairness convergence. -+ * -+ * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. -+ * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, -+ * (eg 4K video to a broadband user): -+ * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets -+ * -+ * We bound the BBR-native inter-bw-probe wall clock time to be: -+ * (a) higher than 2 sec: to try to avoid causing loss for a long enough time -+ * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must -+ * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs -+ * (b) lower than 3 sec: to ensure flows can start probing in a reasonable -+ * amount of time to discover unutilized bw on human-scale interactive -+ * time-scales (e.g. perhaps traffic from a web page download that we -+ * were competing with is now complete). -+ */ -+static void bbr_pick_probe_wait(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Decide the random round-trip bound for wait until probe: */ -+ bbr->rounds_since_probe = -+ get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds)); -+ /* Decide the random wall clock bound for wait until probe: */ -+ bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) + -+ get_random_u32_below(bbr_param(sk, bw_probe_rand_us)); -+} -+ -+static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->cycle_idx = cycle_idx; -+ /* New phase, so need to update cwnd and pacing rate. */ -+ bbr->try_fast_path = 0; -+} -+ -+/* Send at estimated bw to fill the pipe, but not queue. We need this phase -+ * before PROBE_UP, because as soon as we send faster than the available bw -+ * we will start building a queue, and if the buffer is shallow we can cause -+ * loss. If we do not fill the pipe before we cause this loss, our bw_hi and -+ * inflight_hi estimates will underestimate. -+ */ -+static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_lower_bounds(sk); -+ bbr->bw_probe_up_rounds = bw_probe_up_rounds; -+ bbr->bw_probe_up_acks = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_REFILLING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); -+} -+ -+/* Now probe max deliverable data rate and volume. */ -+static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr->ack_phase = BBR_ACKS_PROBE_STARTING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr->cycle_mstamp = tp->tcp_mstamp; -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP); -+ bbr_raise_inflight_hi_slope(sk); -+} -+ -+/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall -+ * clock time at which to probe beyond an inflight that we think to be -+ * safe. This will knowingly risk packet loss, so we want to do this rarely, to -+ * keep packet loss rates low. Also start a round-trip counter, to probe faster -+ * if we estimate a Reno flow at our BDP would probe faster. -+ */ -+static void bbr_start_bw_probe_down(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_congestion_signals(sk); -+ bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ -+ bbr_pick_probe_wait(sk); -+ bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ -+ bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; -+ bbr->next_rtt_delivered = tp->delivered; -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); -+} -+ -+/* Cruise: maintain what we estimate to be a neutral, conservative -+ * operating point, without attempting to probe up for bandwidth or down for -+ * RTT, and only reducing inflight in response to loss/ECN signals. -+ */ -+static void bbr_start_bw_probe_cruise(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->inflight_lo != ~0U) -+ bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); -+ -+ bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); -+} -+ -+/* Loss and/or ECN rate is too high while probing. -+ * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. -+ */ -+static void bbr_handle_inflight_too_high(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ const u32 beta = bbr_param(sk, beta); -+ -+ bbr->prev_probe_too_high = 1; -+ bbr->bw_probe_samples = 0; /* only react once per probe */ -+ /* If we are app-limited then we are not robustly -+ * probing the max volume of inflight data we think -+ * might be safe (analogous to how app-limited bw -+ * samples are not known to be robustly probing bw). -+ */ -+ if (!rs->is_app_limited) { -+ bbr->inflight_hi = max_t(u32, rs->tx_in_flight, -+ (u64)bbr_target_inflight(sk) * -+ (BBR_UNIT - beta) >> BBR_SCALE); -+ } -+ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr_start_bw_probe_down(sk); -+} -+ -+/* If we're seeing bw and loss samples reflecting our bw probing, adapt -+ * using the signals we see. If loss or ECN mark rate gets too high, then adapt -+ * inflight_hi downward. If we're able to push inflight higher without such -+ * signals, push higher: adapt inflight_hi upward. -+ */ -+static bool bbr_adapt_upper_bounds(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Track when we'll see bw/loss samples resulting from our bw probes. */ -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) -+ bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; -+ if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { -+ /* End of samples from bw probing phase. */ -+ bbr->bw_probe_samples = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ /* At this point in the cycle, our current bw sample is also -+ * our best recent chance at finding the highest available bw -+ * for this flow. So now is the best time to forget the bw -+ * samples from the previous cycle, by advancing the window. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) -+ bbr_advance_max_bw_filter(sk); -+ /* If we had an inflight_hi, then probed and pushed inflight all -+ * the way up to hit that inflight_hi without seeing any -+ * high loss/ECN in all the resulting ACKs from that probing, -+ * then probe up again, this time letting inflight persist at -+ * inflight_hi for a round trip, then accelerating beyond. -+ */ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { -+ bbr_start_bw_probe_refill(sk, 0); -+ return true; /* yes, decided state transition */ -+ } -+ } -+ if (bbr_is_inflight_too_high(sk, rs)) { -+ if (bbr->bw_probe_samples) /* sample is from bw probing? */ -+ bbr_handle_inflight_too_high(sk, rs); -+ } else { -+ /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ -+ -+ if (bbr->inflight_hi == ~0U) -+ return false; /* no excess queue signals yet */ -+ -+ /* To be resilient to random loss, we must raise bw/inflight_hi -+ * if we observe in any phase that a higher level is safe. -+ */ -+ if (rs->tx_in_flight > bbr->inflight_hi) { -+ bbr->inflight_hi = rs->tx_in_flight; -+ } -+ -+ if (bbr->mode == BBR_PROBE_BW && -+ bbr->cycle_idx == BBR_BW_PROBE_UP) -+ bbr_probe_inflight_hi_upward(sk, rs); -+ } -+ -+ return false; -+} -+ -+/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ -+static bool bbr_check_time_to_probe_bw(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 n; -+ -+ /* If we seem to be at an operating point where we are not seeing loss -+ * but we are seeing ECN marks, then when the ECN marks cease we reprobe -+ * quickly (in case cross-traffic has ceased and freed up bw). -+ */ -+ if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible && -+ bbr->ecn_in_cycle && !bbr->loss_in_cycle && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { -+ /* Calculate n so that when bbr_raise_inflight_hi_slope() -+ * computes growth_this_round as 2^n it will be roughly the -+ * desired volume of data (inflight_hi*ecn_reprobe_gain). -+ */ -+ n = ilog2((((u64)bbr->inflight_hi * -+ bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE)); -+ bbr_start_bw_probe_refill(sk, n); -+ return true; -+ } -+ -+ if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) || -+ bbr_is_reno_coexistence_probe_time(sk)) { -+ bbr_start_bw_probe_refill(sk, 0); -+ return true; -+ } -+ return false; -+} -+ -+/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ -+static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) -+{ -+ /* Always need to pull inflight down to leave headroom in queue. */ -+ if (inflight > bbr_inflight_with_headroom(sk)) -+ return false; -+ -+ return inflight <= bbr_inflight(sk, bw, BBR_UNIT); -+} -+ -+/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ -+static void bbr_update_cycle_phase(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ bool is_bw_probe_done = false; -+ u32 inflight, bw; -+ -+ if (!bbr_full_bw_reached(sk)) -+ return; -+ -+ /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ -+ if (bbr_adapt_upper_bounds(sk, rs, ctx)) -+ return; /* already decided state transition */ -+ -+ if (bbr->mode != BBR_PROBE_BW) -+ return; -+ -+ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); -+ bw = bbr_max_bw(sk); -+ -+ switch (bbr->cycle_idx) { -+ /* First we spend most of our time cruising with a pacing_gain of 1.0, -+ * which paces at the estimated bw, to try to fully use the pipe -+ * without building queue. If we encounter loss/ECN marks, we adapt -+ * by slowing down. -+ */ -+ case BBR_BW_PROBE_CRUISE: -+ if (bbr_check_time_to_probe_bw(sk, rs)) -+ return; /* already decided state transition */ -+ break; -+ -+ /* After cruising, when it's time to probe, we first "refill": we send -+ * at the estimated bw to fill the pipe, before probing higher and -+ * knowingly risking overflowing the bottleneck buffer (causing loss). -+ */ -+ case BBR_BW_PROBE_REFILL: -+ if (bbr->round_start) { -+ /* After one full round trip of sending in REFILL, we -+ * start to see bw samples reflecting our REFILL, which -+ * may be putting too much data in flight. -+ */ -+ bbr->bw_probe_samples = 1; -+ bbr_start_bw_probe_up(sk, ctx); -+ } -+ break; -+ -+ /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to -+ * probe for bw. If we have not seen loss/ECN, we try to raise inflight -+ * to at least pacing_gain*BDP; note that this may take more than -+ * min_rtt if min_rtt is small (e.g. on a LAN). -+ * -+ * We terminate PROBE_UP bandwidth probing upon any of the following: -+ * -+ * (1) We've pushed inflight up to hit the inflight_hi target set in the -+ * most recent previous bw probe phase. Thus we want to start -+ * draining the queue immediately because it's very likely the most -+ * recently sent packets will fill the queue and cause drops. -+ * (2) If inflight_hi has not limited bandwidth growth recently, and -+ * yet delivered bandwidth has not increased much recently -+ * (bbr->full_bw_now). -+ * (3) Loss filter says loss rate is "too high". -+ * (4) ECN filter says ECN mark rate is "too high". -+ * -+ * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high() -+ */ -+ case BBR_BW_PROBE_UP: -+ if (bbr->prev_probe_too_high && -+ inflight >= bbr->inflight_hi) { -+ bbr->stopped_risky_probe = 1; -+ is_bw_probe_done = true; -+ } else { -+ if (tp->is_cwnd_limited && -+ tcp_snd_cwnd(tp) >= bbr->inflight_hi) { -+ /* inflight_hi is limiting bw growth */ -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ } else if (bbr->full_bw_now) { -+ /* Plateau in estimated bw. Pipe looks full. */ -+ is_bw_probe_done = true; -+ } -+ } -+ if (is_bw_probe_done) { -+ bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ -+ bbr_start_bw_probe_down(sk); /* restart w/ down */ -+ } -+ break; -+ -+ /* After probing in PROBE_UP, we have usually accumulated some data in -+ * the bottleneck buffer (if bw probing didn't find more bw). We next -+ * enter PROBE_DOWN to try to drain any excess data from the queue. To -+ * do this, we use a pacing_gain < 1.0. We hold this pacing gain until -+ * our inflight is less then that target cruising point, which is the -+ * minimum of (a) the amount needed to leave headroom, and (b) the -+ * estimated BDP. Once inflight falls to match the target, we estimate -+ * the queue is drained; persisting would underutilize the pipe. -+ */ -+ case BBR_BW_PROBE_DOWN: -+ if (bbr_check_time_to_probe_bw(sk, rs)) -+ return; /* already decided state transition */ -+ if (bbr_check_time_to_cruise(sk, inflight, bw)) -+ bbr_start_bw_probe_cruise(sk); -+ break; -+ -+ default: -+ WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); -+ } -+} -+ -+/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ -+static void bbr_exit_probe_rtt(struct sock *sk) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ bbr_reset_lower_bounds(sk); -+ if (bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_PROBE_BW; -+ /* Raising inflight after PROBE_RTT may cause loss, so reset -+ * the PROBE_BW clock and schedule the next bandwidth probe for -+ * a friendly and randomized future point in time. -+ */ -+ bbr_start_bw_probe_down(sk); -+ /* Since we are exiting PROBE_RTT, we know inflight is -+ * below our estimated BDP, so it is reasonable to cruise. -+ */ -+ bbr_start_bw_probe_cruise(sk); -+ } else { -+ bbr->mode = BBR_STARTUP; -+ } -+} -+ -+/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until -+ * the end of the round in recovery to get a good estimate of how many packets -+ * have been lost, and how many we need to drain with a low pacing rate. -+ */ -+static void bbr_check_loss_too_high_in_startup(struct sock *sk, -+ const struct rate_sample *rs) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr_full_bw_reached(sk)) -+ return; -+ -+ /* For STARTUP exit, check the loss rate at the end of each round trip -+ * of Recovery episodes in STARTUP. We check the loss rate at the end -+ * of the round trip to filter out noisy/low loss and have a better -+ * sense of inflight (extent of loss), so we can drain more accurately. -+ */ -+ if (rs->losses && bbr->loss_events_in_round < 0xf) -+ bbr->loss_events_in_round++; /* update saturating counter */ -+ if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start && -+ inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && -+ bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) && -+ bbr_is_inflight_too_high(sk, rs)) { -+ bbr_handle_queue_too_high_in_startup(sk); -+ return; -+ } -+ if (bbr->loss_round_start) -+ bbr->loss_events_in_round = 0; -+} -+ -+/* Estimate when the pipe is full, using the change in delivery rate: BBR -+ * estimates bw probing filled the pipe if the estimated bw hasn't changed by -+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited -+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the -+ * higher rwin, 3: we get higher delivery rate samples. Or transient -+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar -+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth. -+ */ -+static void bbr_check_full_bw_reached(struct sock *sk, -+ const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 bw_thresh, full_cnt, thresh; -+ -+ if (bbr->full_bw_now || rs->is_app_limited) -+ return; -+ -+ thresh = bbr_param(sk, full_bw_thresh); -+ full_cnt = bbr_param(sk, full_bw_cnt); -+ bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE; -+ if (ctx->sample_bw >= bw_thresh) { -+ bbr_reset_full_bw(sk); -+ bbr->full_bw = ctx->sample_bw; -+ return; -+ } -+ if (!bbr->round_start) -+ return; -+ ++bbr->full_bw_cnt; -+ bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt; -+ bbr->full_bw_reached |= bbr->full_bw_now; -+} -+ -+/* If pipe is probably full, drain the queue and then enter steady-state. */ -+static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { -+ bbr->mode = BBR_DRAIN; /* drain queue we created */ -+ /* Set ssthresh to export purely for monitoring, to signal -+ * completion of initial STARTUP by setting to a non- -+ * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR). -+ */ -+ tcp_sk(sk)->snd_ssthresh = -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); -+ bbr_reset_congestion_signals(sk); -+ } /* fall through to check if in-flight is already small: */ -+ if (bbr->mode == BBR_DRAIN && -+ bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= -+ bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) { -+ bbr->mode = BBR_PROBE_BW; -+ bbr_start_bw_probe_down(sk); -+ } -+} -+ -+static void bbr_update_model(struct sock *sk, const struct rate_sample *rs, -+ struct bbr_context *ctx) -+{ -+ bbr_update_congestion_signals(sk, rs, ctx); - bbr_update_ack_aggregation(sk, rs); -- bbr_update_cycle_phase(sk, rs); -- bbr_check_full_bw_reached(sk, rs); -- bbr_check_drain(sk, rs); -+ bbr_check_loss_too_high_in_startup(sk, rs); -+ bbr_check_full_bw_reached(sk, rs, ctx); -+ bbr_check_drain(sk, rs, ctx); -+ bbr_update_cycle_phase(sk, rs, ctx); - bbr_update_min_rtt(sk, rs); -- bbr_update_gains(sk); - } - --__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs) -+/* Fast path for app-limited case. -+ * -+ * On each ack, we execute bbr state machine, which primarily consists of: -+ * 1) update model based on new rate sample, and -+ * 2) update control based on updated model or state change. -+ * -+ * There are certain workload/scenarios, e.g. app-limited case, where -+ * either we can skip updating model or we can skip update of both model -+ * as well as control. This provides signifcant softirq cpu savings for -+ * processing incoming acks. -+ * -+ * In case of app-limited, if there is no congestion (loss/ecn) and -+ * if observed bw sample is less than current estimated bw, then we can -+ * skip some of the computation in bbr state processing: -+ * -+ * - if there is no rtt/mode/phase change: In this case, since all the -+ * parameters of the network model are constant, we can skip model -+ * as well control update. -+ * -+ * - else we can skip rest of the model update. But we still need to -+ * update the control to account for the new rtt/mode/phase. -+ * -+ * Returns whether we can take fast path or not. -+ */ -+static bool bbr_run_fast_path(struct sock *sk, bool *update_model, -+ const struct rate_sample *rs, struct bbr_context *ctx) -+{ -+ struct bbr *bbr = inet_csk_ca(sk); -+ u32 prev_min_rtt_us, prev_mode; -+ -+ if (bbr_param(sk, fast_path) && bbr->try_fast_path && -+ rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && -+ !bbr->loss_in_round && !bbr->ecn_in_round ) { -+ prev_mode = bbr->mode; -+ prev_min_rtt_us = bbr->min_rtt_us; -+ bbr_check_drain(sk, rs, ctx); -+ bbr_update_cycle_phase(sk, rs, ctx); -+ bbr_update_min_rtt(sk, rs); -+ -+ if (bbr->mode == prev_mode && -+ bbr->min_rtt_us == prev_min_rtt_us && -+ bbr->try_fast_path) { -+ return true; -+ } -+ -+ /* Skip model update, but control still needs to be updated */ -+ *update_model = false; -+ } -+ return false; -+} -+ -+__bpf_kfunc void bbr_main(struct sock *sk, const struct rate_sample *rs) - { -+ struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u32 bw; -+ struct bbr_context ctx = { 0 }; -+ bool update_model = true; -+ u32 bw, round_delivered; -+ int ce_ratio = -1; -+ -+ round_delivered = bbr_update_round_start(sk, rs, &ctx); -+ if (bbr->round_start) { -+ bbr->rounds_since_probe = -+ min_t(s32, bbr->rounds_since_probe + 1, 0xFF); -+ ce_ratio = bbr_update_ecn_alpha(sk); -+ } -+ bbr_plb(sk, rs, ce_ratio); -+ -+ bbr->ecn_in_round |= (bbr->ecn_eligible && rs->is_ece); -+ bbr_calculate_bw_sample(sk, rs, &ctx); -+ bbr_update_latest_delivery_signals(sk, rs, &ctx); - -- bbr_update_model(sk, rs); -+ if (bbr_run_fast_path(sk, &update_model, rs, &ctx)) -+ goto out; - -+ if (update_model) -+ bbr_update_model(sk, rs, &ctx); -+ -+ bbr_update_gains(sk); - bw = bbr_bw(sk); - bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); -- bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); -+ bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, -+ tcp_snd_cwnd(tp), &ctx); -+ bbr_bound_cwnd_for_inflight_model(sk); -+ -+out: -+ bbr_advance_latest_delivery_signals(sk, rs, &ctx); -+ bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; -+ bbr->loss_in_cycle |= rs->lost > 0; -+ bbr->ecn_in_cycle |= rs->delivered_ce > 0; - } - - __bpf_kfunc static void bbr_init(struct sock *sk) -@@ -1055,20 +2079,21 @@ __bpf_kfunc static void bbr_init(struct - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - -- bbr->prior_cwnd = 0; -+ bbr->initialized = 1; -+ -+ bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp)); -+ bbr->prior_cwnd = tp->prior_cwnd; - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -- bbr->rtt_cnt = 0; - bbr->next_rtt_delivered = tp->delivered; - bbr->prev_ca_state = TCP_CA_Open; -- bbr->packet_conservation = 0; - - bbr->probe_rtt_done_stamp = 0; - bbr->probe_rtt_round_done = 0; -+ bbr->probe_rtt_min_us = tcp_min_rtt(tp); -+ bbr->probe_rtt_min_stamp = tcp_jiffies32; - bbr->min_rtt_us = tcp_min_rtt(tp); - bbr->min_rtt_stamp = tcp_jiffies32; - -- minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ -- - bbr->has_seen_rtt = 0; - bbr_init_pacing_rate_from_rtt(sk); - -@@ -1079,7 +2104,7 @@ __bpf_kfunc static void bbr_init(struct - bbr->full_bw_cnt = 0; - bbr->cycle_mstamp = 0; - bbr->cycle_idx = 0; -- bbr_reset_lt_bw_sampling(sk); -+ - bbr_reset_startup_mode(sk); - - bbr->ack_epoch_mstamp = tp->tcp_mstamp; -@@ -1089,78 +2114,236 @@ __bpf_kfunc static void bbr_init(struct - bbr->extra_acked[0] = 0; - bbr->extra_acked[1] = 0; - -+ bbr->ce_state = 0; -+ bbr->prior_rcv_nxt = tp->rcv_nxt; -+ bbr->try_fast_path = 0; -+ - cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); -+ -+ /* Start sampling ECN mark rate after first full flight is ACKed: */ -+ bbr->loss_round_delivered = tp->delivered + 1; -+ bbr->loss_round_start = 0; -+ bbr->undo_bw_lo = 0; -+ bbr->undo_inflight_lo = 0; -+ bbr->undo_inflight_hi = 0; -+ bbr->loss_events_in_round = 0; -+ bbr->startup_ecn_rounds = 0; -+ bbr_reset_congestion_signals(sk); -+ bbr->bw_lo = ~0U; -+ bbr->bw_hi[0] = 0; -+ bbr->bw_hi[1] = 0; -+ bbr->inflight_lo = ~0U; -+ bbr->inflight_hi = ~0U; -+ bbr_reset_full_bw(sk); -+ bbr->bw_probe_up_cnt = ~0U; -+ bbr->bw_probe_up_acks = 0; -+ bbr->bw_probe_up_rounds = 0; -+ bbr->probe_wait_us = 0; -+ bbr->stopped_risky_probe = 0; -+ bbr->ack_phase = BBR_ACKS_INIT; -+ bbr->rounds_since_probe = 0; -+ bbr->bw_probe_samples = 0; -+ bbr->prev_probe_too_high = 0; -+ bbr->ecn_eligible = 0; -+ bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init); -+ bbr->alpha_last_delivered = 0; -+ bbr->alpha_last_delivered_ce = 0; -+ bbr->plb.pause_until = 0; -+ -+ tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; - } - --__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) -+/* BBR marks the current round trip as a loss round. */ -+static void bbr_note_loss(struct sock *sk) - { -- /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ -- return 3; -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ -+ /* Capture "current" data over the full round trip of loss, to -+ * have a better chance of observing the full capacity of the path. -+ */ -+ if (!bbr->loss_in_round) /* first loss in this round trip? */ -+ bbr->loss_round_delivered = tp->delivered; /* set round trip */ -+ bbr->loss_in_round = 1; -+ bbr->loss_in_cycle = 1; - } - --/* In theory BBR does not need to undo the cwnd since it does not -- * always reduce cwnd on losses (see bbr_main()). Keep it for now. -- */ -+/* Core TCP stack informs us that the given skb was just marked lost. */ -+__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk, -+ const struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb); -+ struct rate_sample rs = {}; -+ -+ bbr_note_loss(sk); -+ -+ if (!bbr->bw_probe_samples) -+ return; /* not an skb sent while probing for bandwidth */ -+ if (unlikely(!scb->tx.delivered_mstamp)) -+ return; /* skb was SACKed, reneged, marked lost; ignore it */ -+ /* We are probing for bandwidth. Construct a rate sample that -+ * estimates what happened in the flight leading up to this lost skb, -+ * then see if the loss rate went too high, and if so at which packet. -+ */ -+ rs.tx_in_flight = scb->tx.in_flight; -+ rs.lost = tp->lost - scb->tx.lost; -+ rs.is_app_limited = scb->tx.is_app_limited; -+ if (bbr_is_inflight_too_high(sk, &rs)) { -+ rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb); -+ bbr_handle_inflight_too_high(sk, &rs); -+ } -+} -+ -+static void bbr_run_loss_probe_recovery(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct bbr *bbr = inet_csk_ca(sk); -+ struct rate_sample rs = {0}; -+ -+ bbr_note_loss(sk); -+ -+ if (!bbr->bw_probe_samples) -+ return; /* not sent while probing for bandwidth */ -+ /* We are probing for bandwidth. Construct a rate sample that -+ * estimates what happened in the flight leading up to this -+ * loss, then see if the loss rate went too high. -+ */ -+ rs.lost = 1; /* TLP probe repaired loss of a single segment */ -+ rs.tx_in_flight = bbr->inflight_latest + rs.lost; -+ rs.is_app_limited = tp->tlp_orig_data_app_limited; -+ if (bbr_is_inflight_too_high(sk, &rs)) -+ bbr_handle_inflight_too_high(sk, &rs); -+} -+ -+/* Revert short-term model if current loss recovery event was spurious. */ - __bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) - { - struct bbr *bbr = inet_csk_ca(sk); - -- bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ -- bbr->full_bw_cnt = 0; -- bbr_reset_lt_bw_sampling(sk); -- return tcp_snd_cwnd(tcp_sk(sk)); -+ bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */ -+ bbr->loss_in_round = 0; -+ -+ /* Revert to cwnd and other state saved before loss episode. */ -+ bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); -+ bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); -+ bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); -+ bbr->try_fast_path = 0; /* take slow path to set proper cwnd, pacing */ -+ return bbr->prior_cwnd; - } - --/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ -+/* Entering loss recovery, so save state for when we undo recovery. */ - __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk) - { -+ struct bbr *bbr = inet_csk_ca(sk); -+ - bbr_save_cwnd(sk); -+ /* For undo, save state that adapts based on loss signal. */ -+ bbr->undo_bw_lo = bbr->bw_lo; -+ bbr->undo_inflight_lo = bbr->inflight_lo; -+ bbr->undo_inflight_hi = bbr->inflight_hi; - return tcp_sk(sk)->snd_ssthresh; - } - -+static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr) -+{ -+ switch (bbr->mode) { -+ case BBR_STARTUP: -+ return BBR_PHASE_STARTUP; -+ case BBR_DRAIN: -+ return BBR_PHASE_DRAIN; -+ case BBR_PROBE_BW: -+ break; -+ case BBR_PROBE_RTT: -+ return BBR_PHASE_PROBE_RTT; -+ default: -+ return BBR_PHASE_INVALID; -+ } -+ switch (bbr->cycle_idx) { -+ case BBR_BW_PROBE_UP: -+ return BBR_PHASE_PROBE_BW_UP; -+ case BBR_BW_PROBE_DOWN: -+ return BBR_PHASE_PROBE_BW_DOWN; -+ case BBR_BW_PROBE_CRUISE: -+ return BBR_PHASE_PROBE_BW_CRUISE; -+ case BBR_BW_PROBE_REFILL: -+ return BBR_PHASE_PROBE_BW_REFILL; -+ default: -+ return BBR_PHASE_INVALID; -+ } -+} -+ - static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, -- union tcp_cc_info *info) -+ union tcp_cc_info *info) - { - if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || - ext & (1 << (INET_DIAG_VEGASINFO - 1))) { -- struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); -- u64 bw = bbr_bw(sk); -- -- bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; -- memset(&info->bbr, 0, sizeof(info->bbr)); -- info->bbr.bbr_bw_lo = (u32)bw; -- info->bbr.bbr_bw_hi = (u32)(bw >> 32); -- info->bbr.bbr_min_rtt = bbr->min_rtt_us; -- info->bbr.bbr_pacing_gain = bbr->pacing_gain; -- info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; -+ u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); -+ u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); -+ u64 bw_lo = bbr->bw_lo == ~0U ? -+ ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); -+ struct tcp_bbr_info *bbr_info = &info->bbr; -+ -+ memset(bbr_info, 0, sizeof(*bbr_info)); -+ bbr_info->bbr_bw_lo = (u32)bw; -+ bbr_info->bbr_bw_hi = (u32)(bw >> 32); -+ bbr_info->bbr_min_rtt = bbr->min_rtt_us; -+ bbr_info->bbr_pacing_gain = bbr->pacing_gain; -+ bbr_info->bbr_cwnd_gain = bbr->cwnd_gain; -+ bbr_info->bbr_bw_hi_lsb = (u32)bw_hi; -+ bbr_info->bbr_bw_hi_msb = (u32)(bw_hi >> 32); -+ bbr_info->bbr_bw_lo_lsb = (u32)bw_lo; -+ bbr_info->bbr_bw_lo_msb = (u32)(bw_lo >> 32); -+ bbr_info->bbr_mode = bbr->mode; -+ bbr_info->bbr_phase = (__u8)bbr_get_phase(bbr); -+ bbr_info->bbr_version = (__u8)BBR_VERSION; -+ bbr_info->bbr_inflight_lo = bbr->inflight_lo; -+ bbr_info->bbr_inflight_hi = bbr->inflight_hi; -+ bbr_info->bbr_extra_acked = bbr_extra_acked(sk); - *attr = INET_DIAG_BBRINFO; -- return sizeof(info->bbr); -+ return sizeof(*bbr_info); - } - return 0; - } - - __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state) - { -+ struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - - if (new_state == TCP_CA_Loss) { -- struct rate_sample rs = { .losses = 1 }; - - bbr->prev_ca_state = TCP_CA_Loss; -- bbr->full_bw = 0; -- bbr->round_start = 1; /* treat RTO like end of a round */ -- bbr_lt_bw_sampling(sk, &rs); -+ tcp_plb_update_state_upon_rto(sk, &bbr->plb); -+ /* The tcp_write_timeout() call to sk_rethink_txhash() likely -+ * repathed this flow, so re-learn the min network RTT on the -+ * new path: -+ */ -+ bbr_reset_full_bw(sk); -+ if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { -+ /* bbr_adapt_lower_bounds() needs cwnd before -+ * we suffered an RTO, to update inflight_lo: -+ */ -+ bbr->inflight_lo = -+ max(tcp_snd_cwnd(tp), bbr->prior_cwnd); -+ } -+ } else if (bbr->prev_ca_state == TCP_CA_Loss && -+ new_state != TCP_CA_Loss) { -+ bbr_exit_loss_recovery(sk); - } - } - -+ - static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { -- .flags = TCP_CONG_NON_RESTRICTED, -+ .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, - .name = "bbr", - .owner = THIS_MODULE, - .init = bbr_init, - .cong_control = bbr_main, - .sndbuf_expand = bbr_sndbuf_expand, -+ .skb_marked_lost = bbr_skb_marked_lost, - .undo_cwnd = bbr_undo_cwnd, - .cwnd_event = bbr_cwnd_event, - .ssthresh = bbr_ssthresh, -@@ -1175,10 +2358,11 @@ BTF_SET8_START(tcp_bbr_check_kfunc_ids) - BTF_ID_FLAGS(func, bbr_init) - BTF_ID_FLAGS(func, bbr_main) - BTF_ID_FLAGS(func, bbr_sndbuf_expand) -+BTF_ID_FLAGS(func, bbr_skb_marked_lost) - BTF_ID_FLAGS(func, bbr_undo_cwnd) - BTF_ID_FLAGS(func, bbr_cwnd_event) - BTF_ID_FLAGS(func, bbr_ssthresh) --BTF_ID_FLAGS(func, bbr_min_tso_segs) -+BTF_ID_FLAGS(func, bbr_tso_segs) - BTF_ID_FLAGS(func, bbr_set_state) - #endif - #endif -@@ -1213,5 +2397,12 @@ MODULE_AUTHOR("Van Jacobson "); - MODULE_AUTHOR("Yuchung Cheng "); - MODULE_AUTHOR("Soheil Hassas Yeganeh "); -+MODULE_AUTHOR("Priyaranjan Jha "); -+MODULE_AUTHOR("Yousuk Seung "); -+MODULE_AUTHOR("Kevin Yang "); -+MODULE_AUTHOR("Arjun Roy "); -+MODULE_AUTHOR("David Morley "); -+ - MODULE_LICENSE("Dual BSD/GPL"); - MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); -+MODULE_VERSION(__stringify(BBR_VERSION)); -From 17d1b4acccbfb50826774eda03ac802aeb81c49e Mon Sep 17 00:00:00 2001 -From: Adithya Abraham Philip -Date: Fri, 11 Jun 2021 21:56:10 +0000 -Subject: [PATCH 17/18] net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT - on retransmits - -Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to -indicate that retransmitted packets and pure ACKs must have the -ECT bit set. This is necessary for BBR, which when using -ECN expects ECT to be set even on retransmitted packets and ACKs. - -Previous to this addition of TCP_ECN_ECT_PERMANENT, CCAs which can use -ECN but don't "need" it did not have a way to indicate that ECT should -be set on retransmissions/ACKs. - -Signed-off-by: Adithya Abraham Philip -Signed-off-by: Neal Cardwell -Change-Id: I8b048eaab35e136fe6501ef6cd89fd9faa15e6d2 -Signed-off-by: Alexandre Frade ---- - include/net/tcp.h | 1 + - net/ipv4/tcp_bbr.c | 3 +++ - net/ipv4/tcp_output.c | 3 ++- - 3 files changed, 6 insertions(+), 1 deletion(-) - ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -373,6 +373,7 @@ static inline void tcp_dec_quickack_mode - #define TCP_ECN_DEMAND_CWR 4 - #define TCP_ECN_SEEN 8 - #define TCP_ECN_LOW 16 -+#define TCP_ECN_ECT_PERMANENT 32 - - enum tcp_tw_status { - TCP_TW_SUCCESS = 0, ---- a/net/ipv4/tcp_bbr.c -+++ b/net/ipv4/tcp_bbr.c -@@ -2151,6 +2151,9 @@ __bpf_kfunc static void bbr_init(struct - bbr->plb.pause_until = 0; - - tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; -+ -+ if (bbr_can_use_ecn(sk)) -+ tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; - } - - /* BBR marks the current round trip as a loss round. */ ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -386,7 +386,8 @@ static void tcp_ecn_send(struct sock *sk - th->cwr = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - } -- } else if (!tcp_ca_needs_ecn(sk)) { -+ } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && -+ !tcp_ca_needs_ecn(sk)) { - /* ACK or retransmitted segment: clear ECT|CE */ - INET_ECN_dontxmit(sk); - } -From 54488b0d0855fc0e772fcdf4d7a2756219e3757c Mon Sep 17 00:00:00 2001 -From: Neal Cardwell -Date: Sun, 23 Jul 2023 23:25:34 -0400 -Subject: [PATCH 18/18] tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options - field - -Analogous to other important ECN information, export TCPI_OPT_ECN_LOW -in tcp_info tcpi_options field. - -Signed-off-by: Neal Cardwell -Change-Id: I08d8d8c7e8780e6e37df54038ee50301ac5a0320 -Signed-off-by: Alexandre Frade ---- - include/uapi/linux/tcp.h | 1 + - net/ipv4/tcp.c | 2 ++ - 2 files changed, 3 insertions(+) - ---- a/include/uapi/linux/tcp.h -+++ b/include/uapi/linux/tcp.h -@@ -170,6 +170,7 @@ enum tcp_fastopen_client_fail { - #define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ - #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ - #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ -+#define TCPI_OPT_ECN_LOW 64 /* Low-latency ECN configured at init */ - - /* - * Sender's congestion state indicating normal or abnormal situations ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -3782,6 +3782,8 @@ void tcp_get_info(struct sock *sk, struc - info->tcpi_options |= TCPI_OPT_ECN; - if (tp->ecn_flags & TCP_ECN_SEEN) - info->tcpi_options |= TCPI_OPT_ECN_SEEN; -+ if (tp->ecn_flags & TCP_ECN_LOW) -+ info->tcpi_options |= TCPI_OPT_ECN_LOW; - if (tp->syn_data_acked) - info->tcpi_options |= TCPI_OPT_SYN_DATA; - diff --git a/6.10/target/linux/generic/hack-6.10/999-mptcp-bpf.patch b/6.10/target/linux/generic/hack-6.10/999-mptcp-bpf.patch deleted file mode 100644 index 73cdec3a..00000000 --- a/6.10/target/linux/generic/hack-6.10/999-mptcp-bpf.patch +++ /dev/null @@ -1,427 +0,0 @@ -From f1dfe037fdf0c300f38bab0bb8f256d4195d45e8 Mon Sep 17 00:00:00 2001 -From: Geliang Tang -Date: Tue, 19 Dec 2023 13:27:59 +0100 -Subject: [PATCH] mptcp: add sched_data helpers - -Add a new helper mptcp_sched_data_set_contexts() to set the subflow -pointers array in struct mptcp_sched_data. Add a new helper -mptcp_subflow_ctx_by_pos() to get the given pos subflow from the -contexts array in struct mptcp_sched_data. They will be invoked by -the BPF schedulers to export the subflow pointers to the BPF contexts. - -Signed-off-by: Geliang Tang -Reviewed-by: Mat Martineau ---- - net/mptcp/bpf.c | 14 ++++++++++++++ - net/mptcp/protocol.h | 2 ++ - net/mptcp/sched.c | 22 ++++++++++++++++++++++ - 3 files changed, 38 insertions(+) - -diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c -index 8a16672b94e23..c3d62535eb0cf 100644 ---- a/net/mptcp/bpf.c -+++ b/net/mptcp/bpf.c -@@ -29,6 +29,20 @@ static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = { - .set = &bpf_mptcp_fmodret_ids, - }; - -+__diag_push(); -+__diag_ignore_all("-Wmissing-prototypes", -+ "kfuncs which will be used in BPF programs"); -+ -+__bpf_kfunc struct mptcp_subflow_context * -+bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) -+{ -+ if (pos >= MPTCP_SUBFLOWS_MAX) -+ return NULL; -+ return data->contexts[pos]; -+} -+ -+__diag_pop(); -+ - static int __init bpf_mptcp_kfunc_init(void) - { - return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); -diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h -index 3517f2d24a226..7cf5d2de74419 100644 ---- a/net/mptcp/protocol.h -+++ b/net/mptcp/protocol.h -@@ -636,6 +636,8 @@ void __mptcp_subflow_send_ack(struct sock *ssk); - void mptcp_subflow_reset(struct sock *ssk); - void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); - void mptcp_sock_graft(struct sock *sk, struct socket *parent); -+struct mptcp_subflow_context * -+bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); - struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); - bool __mptcp_close(struct sock *sk, long timeout); - void mptcp_cancel_work(struct sock *sk); -diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c -index 4ab0693c069c0..a7e1c10b19848 100644 ---- a/net/mptcp/sched.c -+++ b/net/mptcp/sched.c -@@ -121,6 +121,26 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, - WRITE_ONCE(subflow->scheduled, scheduled); - } - -+static void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk, -+ struct mptcp_sched_data *data) -+{ -+ struct mptcp_subflow_context *subflow; -+ int i = 0; -+ -+ mptcp_for_each_subflow(msk, subflow) { -+ if (i == MPTCP_SUBFLOWS_MAX) { -+ pr_warn_once("too many subflows"); -+ break; -+ } -+ mptcp_subflow_set_scheduled(subflow, false); -+ data->contexts[i++] = subflow; -+ } -+ data->subflows = i; -+ -+ for (; i < MPTCP_SUBFLOWS_MAX; i++) -+ data->contexts[i] = NULL; -+} -+ - int mptcp_sched_get_send(struct mptcp_sock *msk) - { - struct mptcp_subflow_context *subflow; -@@ -147,6 +167,7 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) - data.reinject = false; - if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_subflow(msk, &data); -+ mptcp_sched_data_set_contexts(msk, &data); - return msk->sched->get_subflow(msk, &data); - } - -@@ -169,5 +190,6 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) - data.reinject = true; - if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_subflow(msk, &data); -+ mptcp_sched_data_set_contexts(msk, &data); - return msk->sched->get_subflow(msk, &data); - } -From 229208a99e76be925541e898fd9a272984b5958c Mon Sep 17 00:00:00 2001 -From: Geliang Tang -Date: Tue, 19 Dec 2023 13:28:00 +0100 -Subject: [PATCH] bpf: Add bpf_mptcp_sched_ops - -This patch implements a new struct bpf_struct_ops: bpf_mptcp_sched_ops. -Register and unregister the bpf scheduler in .reg and .unreg. - -Add write access for the scheduled flag of struct mptcp_subflow_context -in .btf_struct_access. - -This MPTCP BPF scheduler implementation is similar to BPF TCP CC. And -net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch. - -Acked-by: Paolo Abeni -Reviewed-by: Mat Martineau -Co-developed-by: Matthieu Baerts -Signed-off-by: Matthieu Baerts -Signed-off-by: Geliang Tang ---- - kernel/bpf/bpf_struct_ops_types.h | 4 + - net/mptcp/bpf.c | 146 ++++++++++++++++++++++++++++++ - 2 files changed, 150 insertions(+) - -diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h -index 5678a9ddf8178..5a6b0c0d8d3db 100644 ---- a/kernel/bpf/bpf_struct_ops_types.h -+++ b/kernel/bpf/bpf_struct_ops_types.h -@@ -8,5 +8,9 @@ BPF_STRUCT_OPS_TYPE(bpf_dummy_ops) - #ifdef CONFIG_INET - #include - BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) -+#ifdef CONFIG_MPTCP -+#include -+BPF_STRUCT_OPS_TYPE(mptcp_sched_ops) -+#endif - #endif - #endif -diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c -index c3d62535eb0cf..dfcaaf0e07dd5 100644 ---- a/net/mptcp/bpf.c -+++ b/net/mptcp/bpf.c -@@ -10,8 +10,153 @@ - #define pr_fmt(fmt) "MPTCP: " fmt - - #include -+#include -+#include -+#include -+#include - #include "protocol.h" - -+#ifdef CONFIG_BPF_JIT -+extern struct bpf_struct_ops bpf_mptcp_sched_ops; -+static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly; -+static u32 mptcp_sock_id, mptcp_subflow_id; -+ -+static const struct bpf_func_proto * -+bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) -+{ -+ switch (func_id) { -+ case BPF_FUNC_sk_storage_get: -+ return &bpf_sk_storage_get_proto; -+ case BPF_FUNC_sk_storage_delete: -+ return &bpf_sk_storage_delete_proto; -+ case BPF_FUNC_skc_to_tcp6_sock: -+ return &bpf_skc_to_tcp6_sock_proto; -+ case BPF_FUNC_skc_to_tcp_sock: -+ return &bpf_skc_to_tcp_sock_proto; -+ default: -+ return bpf_base_func_proto(func_id); -+ } -+} -+ -+static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log, -+ const struct bpf_reg_state *reg, -+ int off, int size) -+{ -+ const struct btf_type *t; -+ size_t end; -+ -+ t = btf_type_by_id(reg->btf, reg->btf_id); -+ if (t != mptcp_sock_type && t != mptcp_subflow_type) { -+ bpf_log(log, "only access to mptcp sock or subflow is supported\n"); -+ return -EACCES; -+ } -+ -+ switch (off) { -+ case offsetof(struct mptcp_sock, snd_burst): -+ end = offsetofend(struct mptcp_sock, snd_burst); -+ break; -+ case offsetof(struct mptcp_subflow_context, scheduled): -+ end = offsetofend(struct mptcp_subflow_context, scheduled); -+ break; -+ case offsetof(struct mptcp_subflow_context, avg_pacing_rate): -+ end = offsetofend(struct mptcp_subflow_context, avg_pacing_rate); -+ break; -+ default: -+ bpf_log(log, "no write support to %s at off %d\n", -+ t == mptcp_sock_type ? "mptcp_sock" : "mptcp_subflow_context", off); -+ return -EACCES; -+ } -+ -+ if (off + size > end) { -+ bpf_log(log, "access beyond %s at off %u size %u ended at %zu", -+ t == mptcp_sock_type ? "mptcp_sock" : "mptcp_subflow_context", -+ off, size, end); -+ return -EACCES; -+ } -+ -+ return NOT_INIT; -+} -+ -+static const struct bpf_verifier_ops bpf_mptcp_sched_verifier_ops = { -+ .get_func_proto = bpf_mptcp_sched_get_func_proto, -+ .is_valid_access = bpf_tracing_btf_ctx_access, -+ .btf_struct_access = bpf_mptcp_sched_btf_struct_access, -+}; -+ -+static int bpf_mptcp_sched_reg(void *kdata) -+{ -+ return mptcp_register_scheduler(kdata); -+} -+ -+static void bpf_mptcp_sched_unreg(void *kdata) -+{ -+ mptcp_unregister_scheduler(kdata); -+} -+ -+static int bpf_mptcp_sched_check_member(const struct btf_type *t, -+ const struct btf_member *member, -+ const struct bpf_prog *prog) -+{ -+ return 0; -+} -+ -+static int bpf_mptcp_sched_init_member(const struct btf_type *t, -+ const struct btf_member *member, -+ void *kdata, const void *udata) -+{ -+ const struct mptcp_sched_ops *usched; -+ struct mptcp_sched_ops *sched; -+ u32 moff; -+ -+ usched = (const struct mptcp_sched_ops *)udata; -+ sched = (struct mptcp_sched_ops *)kdata; -+ -+ moff = __btf_member_bit_offset(t, member) / 8; -+ switch (moff) { -+ case offsetof(struct mptcp_sched_ops, name): -+ if (bpf_obj_name_cpy(sched->name, usched->name, -+ sizeof(sched->name)) <= 0) -+ return -EINVAL; -+ if (mptcp_sched_find(usched->name)) -+ return -EEXIST; -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static int bpf_mptcp_sched_init(struct btf *btf) -+{ -+ s32 type_id; -+ -+ type_id = btf_find_by_name_kind(btf, "mptcp_sock", -+ BTF_KIND_STRUCT); -+ if (type_id < 0) -+ return -EINVAL; -+ mptcp_sock_id = type_id; -+ mptcp_sock_type = btf_type_by_id(btf, mptcp_sock_id); -+ -+ type_id = btf_find_by_name_kind(btf, "mptcp_subflow_context", -+ BTF_KIND_STRUCT); -+ if (type_id < 0) -+ return -EINVAL; -+ mptcp_subflow_id = type_id; -+ mptcp_subflow_type = btf_type_by_id(btf, mptcp_subflow_id); -+ -+ return 0; -+} -+ -+struct bpf_struct_ops bpf_mptcp_sched_ops = { -+ .verifier_ops = &bpf_mptcp_sched_verifier_ops, -+ .reg = bpf_mptcp_sched_reg, -+ .unreg = bpf_mptcp_sched_unreg, -+ .check_member = bpf_mptcp_sched_check_member, -+ .init_member = bpf_mptcp_sched_init_member, -+ .init = bpf_mptcp_sched_init, -+ .name = "mptcp_sched_ops", -+}; -+#endif /* CONFIG_BPF_JIT */ -+ - struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) - { - if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) -From c128adc086aa390e8dba43bcad604fe223e50bf4 Mon Sep 17 00:00:00 2001 -From: Geliang Tang -Date: Tue, 19 Dec 2023 13:28:01 +0100 -Subject: [PATCH] bpf: Add bpf_mptcp_sched_kfunc_set - -This patch adds a new struct btf_kfunc_id_set for MPTCP scheduler. Add -mptcp_subflow_set_scheduled() and mptcp_sched_data_set_contexts() helpers -into this id_set, and register it in bpf_mptcp_kfunc_init() to make sure -these helpers can be accessed from the BPF context. - -Reviewed-by: Mat Martineau -Signed-off-by: Geliang Tang ---- - net/mptcp/bpf.c | 16 +++++++++++++++- - 1 file changed, 15 insertions(+), 1 deletion(-) - -diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c -index dfcaaf0e07dd..aec9515888f7 100644 ---- a/net/mptcp/bpf.c -+++ b/net/mptcp/bpf.c -@@ -189,8 +189,22 @@ bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int p - - __diag_pop(); - -+BTF_SET8_START(bpf_mptcp_sched_kfunc_ids) -+BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled) -+BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) -+BTF_SET8_END(bpf_mptcp_sched_kfunc_ids) -+ -+static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { -+ .owner = THIS_MODULE, -+ .set = &bpf_mptcp_sched_kfunc_ids, -+}; -+ - static int __init bpf_mptcp_kfunc_init(void) - { -- return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); -+ int ret; -+ -+ ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); -+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, -+ &bpf_mptcp_sched_kfunc_set); - } - late_initcall(bpf_mptcp_kfunc_init); -From f322294a8f32ddf7e40021d94c19665c302dbd79 Mon Sep 17 00:00:00 2001 -From: Geliang Tang -Date: Tue, 19 Dec 2023 13:28:12 +0100 -Subject: [PATCH] bpf: Export more bpf_burst related functions - -sk_stream_memory_free() and tcp_rtx_and_write_queues_empty() are needed -to export into the BPF context for bpf_burst scheduler. But these two -functions are inline ones. So this patch added two wrappers for them, -and export the wrappers in the BPF context. - -Add more bpf_burst related functions into bpf_mptcp_sched_kfunc_set to make -sure these helpers can be accessed from the BPF context. - -Signed-off-by: Geliang Tang -Reviewed-by: Mat Martineau ---- - net/mptcp/bpf.c | 11 +++++++++++ - net/mptcp/protocol.c | 4 ++-- - net/mptcp/protocol.h | 3 +++ - 3 files changed, 16 insertions(+), 2 deletions(-) - -diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c -index aec9515888f7..007c2034db65 100644 ---- a/net/mptcp/bpf.c -+++ b/net/mptcp/bpf.c -@@ -187,11 +187,22 @@ bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int p - return data->contexts[pos]; - } - -+__bpf_kfunc bool bpf_mptcp_subflow_queues_empty(struct sock *sk) -+{ -+ return tcp_rtx_queue_empty(sk); -+} -+ - __diag_pop(); - - BTF_SET8_START(bpf_mptcp_sched_kfunc_ids) - BTF_ID_FLAGS(func, mptcp_subflow_set_scheduled) - BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx_by_pos) -+BTF_ID_FLAGS(func, mptcp_subflow_active) -+BTF_ID_FLAGS(func, mptcp_set_timeout) -+BTF_ID_FLAGS(func, mptcp_wnd_end) -+BTF_ID_FLAGS(func, tcp_stream_memory_free) -+BTF_ID_FLAGS(func, bpf_mptcp_subflow_queues_empty) -+BTF_ID_FLAGS(func, mptcp_pm_subflow_chk_stale) - BTF_SET8_END(bpf_mptcp_sched_kfunc_ids) - - static const struct btf_kfunc_id_set bpf_mptcp_sched_kfunc_set = { -diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c -index 8bfd266f2754..c12bf17691d7 100644 ---- a/net/mptcp/protocol.c -+++ b/net/mptcp/protocol.c -@@ -50,7 +50,7 @@ DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); - static struct net_device mptcp_napi_dev; - - /* Returns end sequence number of the receiver's advertised window */ --static u64 mptcp_wnd_end(const struct mptcp_sock *msk) -+u64 mptcp_wnd_end(const struct mptcp_sock *msk) - { - return READ_ONCE(msk->wnd_end); - } -@@ -485,7 +485,7 @@ static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subfl - inet_csk(ssk)->icsk_timeout - jiffies : 0; - } - --static void mptcp_set_timeout(struct sock *sk) -+void mptcp_set_timeout(struct sock *sk) - { - struct mptcp_subflow_context *subflow; - long tout = 0; -diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h -index 7cf5d2de7441..f7b9c1b995df 100644 ---- a/net/mptcp/protocol.h -+++ b/net/mptcp/protocol.h -@@ -636,6 +636,9 @@ void __mptcp_subflow_send_ack(struct sock *ssk); - void mptcp_subflow_reset(struct sock *ssk); - void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); - void mptcp_sock_graft(struct sock *sk, struct socket *parent); -+u64 mptcp_wnd_end(const struct mptcp_sock *msk); -+void mptcp_set_timeout(struct sock *sk); -+bool bpf_mptcp_subflow_queues_empty(struct sock *sk); - struct mptcp_subflow_context * - bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos); - struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); diff --git a/6.10/target/linux/generic/pending-6.10/191-rtc-rs5c372-let_the_alarm_to_be_used_as_wakeup_source.patch b/6.10/target/linux/generic/pending-6.10/191-rtc-rs5c372-let_the_alarm_to_be_used_as_wakeup_source.patch deleted file mode 100644 index a29c548b..00000000 --- a/6.10/target/linux/generic/pending-6.10/191-rtc-rs5c372-let_the_alarm_to_be_used_as_wakeup_source.patch +++ /dev/null @@ -1,71 +0,0 @@ -From: Daniel González Cabanelas -Subject: [PATCH 2/2] rtc: rs5c372: let the alarm to be used as wakeup source - -Currently there is no use for the interrupts on the rs5c372 RTC and the -wakealarm isn't enabled. There are some devices like NASes which use this -RTC to wake up from the power off state when the INTR pin is activated by -the alarm clock. - -Enable the alarm and let to be used as a wakeup source. - -Tested on a Buffalo LS421DE NAS. - -Signed-off-by: Daniel González Cabanelas ---- - drivers/rtc/rtc-rs5c372.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - ---- a/drivers/rtc/rtc-rs5c372.c -+++ b/drivers/rtc/rtc-rs5c372.c -@@ -832,6 +832,7 @@ static int rs5c372_probe(struct i2c_clie - int err = 0; - int smbus_mode = 0; - struct rs5c372 *rs5c372; -+ bool rs5c372_can_wakeup_device = false; - - dev_dbg(&client->dev, "%s\n", __func__); - -@@ -868,6 +869,12 @@ static int rs5c372_probe(struct i2c_clie - rs5c372->type = id->driver_data; - } - -+#ifdef CONFIG_OF -+ if(of_property_read_bool(client->dev.of_node, -+ "wakeup-source")) -+ rs5c372_can_wakeup_device = true; -+#endif -+ - /* we read registers 0x0f then 0x00-0x0f; skip the first one */ - rs5c372->regs = &rs5c372->buf[1]; - rs5c372->smbus = smbus_mode; -@@ -901,6 +908,8 @@ static int rs5c372_probe(struct i2c_clie - goto exit; - } - -+ rs5c372->has_irq = 1; -+ - /* if the oscillator lost power and no other software (like - * the bootloader) set it up, do it here. - * -@@ -927,6 +936,10 @@ static int rs5c372_probe(struct i2c_clie - ); - - /* REVISIT use client->irq to register alarm irq ... */ -+ if (rs5c372_can_wakeup_device) { -+ device_init_wakeup(&client->dev, true); -+ } -+ - rs5c372->rtc = devm_rtc_device_register(&client->dev, - rs5c372_driver.driver.name, - &rs5c372_rtc_ops, THIS_MODULE); -@@ -940,6 +953,10 @@ static int rs5c372_probe(struct i2c_clie - if (err) - goto exit; - -+ /* the rs5c372 alarm only supports a minute accuracy */ -+ set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rs5c372->rtc->features); -+ clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rs5c372->rtc->features); -+ - return 0; - - exit: diff --git a/6.10/target/linux/generic/pending-6.10/270-platform-mikrotik-build-bits.patch b/6.10/target/linux/generic/pending-6.10/270-platform-mikrotik-build-bits.patch deleted file mode 100644 index 7ca84e04..00000000 --- a/6.10/target/linux/generic/pending-6.10/270-platform-mikrotik-build-bits.patch +++ /dev/null @@ -1,31 +0,0 @@ -From c2deb5ef01a0ef09088832744cbace9e239a6ee0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Thibaut=20VAR=C3=88NE?= -Date: Sat, 28 Mar 2020 12:11:50 +0100 -Subject: [PATCH] generic: platform/mikrotik build bits (5.4) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This patch adds platform/mikrotik kernel build bits - -Signed-off-by: Thibaut VARÈNE ---- - drivers/platform/Kconfig | 2 ++ - drivers/platform/Makefile | 1 + - 2 files changed, 3 insertions(+) - ---- a/drivers/platform/Kconfig -+++ b/drivers/platform/Kconfig -@@ -14,3 +14,5 @@ source "drivers/platform/olpc/Kconfig" - source "drivers/platform/surface/Kconfig" - - source "drivers/platform/x86/Kconfig" -+ -+source "drivers/platform/mikrotik/Kconfig" ---- a/drivers/platform/Makefile -+++ b/drivers/platform/Makefile -@@ -11,3 +11,4 @@ obj-$(CONFIG_OLPC_EC) += olpc/ - obj-$(CONFIG_GOLDFISH) += goldfish/ - obj-$(CONFIG_CHROME_PLATFORMS) += chrome/ - obj-$(CONFIG_SURFACE_PLATFORMS) += surface/ -+obj-$(CONFIG_MIKROTIK) += mikrotik/ diff --git a/6.10/target/linux/generic/pending-6.10/305-mips_module_reloc.patch b/6.10/target/linux/generic/pending-6.10/305-mips_module_reloc.patch deleted file mode 100644 index 6d13574b..00000000 --- a/6.10/target/linux/generic/pending-6.10/305-mips_module_reloc.patch +++ /dev/null @@ -1,370 +0,0 @@ -From: Felix Fietkau -Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to - -lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c -Signed-off-by: Felix Fietkau ---- - arch/mips/Makefile | 5 + - arch/mips/include/asm/module.h | 5 + - arch/mips/kernel/module.c | 279 ++++++++++++++++++++++++++++++++++++++++- - 3 files changed, 284 insertions(+), 5 deletions(-) - ---- a/arch/mips/Makefile -+++ b/arch/mips/Makefile -@@ -97,8 +97,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin - cflags-y += -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely - cflags-y += -msoft-float -Wa,-msoft-float - LDFLAGS_vmlinux += -G 0 -static -n -nostdlib -+ifdef CONFIG_64BIT - KBUILD_AFLAGS_MODULE += -mlong-calls - KBUILD_CFLAGS_MODULE += -mlong-calls -+else -+ ifdef CONFIG_DYNAMIC_FTRACE -+ KBUILD_AFLAGS_MODULE += -mlong-calls -+ KBUILD_CFLAGS_MODULE += -mlong-calls -+ else -+ KBUILD_AFLAGS_MODULE += -mno-long-calls -+ KBUILD_CFLAGS_MODULE += -mno-long-calls -+ endif -+endif - - ifeq ($(CONFIG_RELOCATABLE),y) - LDFLAGS_vmlinux += --emit-relocs ---- a/arch/mips/include/asm/module.h -+++ b/arch/mips/include/asm/module.h -@@ -12,6 +12,11 @@ struct mod_arch_specific { - const struct exception_table_entry *dbe_start; - const struct exception_table_entry *dbe_end; - struct mips_hi16 *r_mips_hi16_list; -+ -+ void *phys_plt_tbl; -+ void *virt_plt_tbl; -+ unsigned int phys_plt_offset; -+ unsigned int virt_plt_offset; - }; - - typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */ ---- a/arch/mips/kernel/module.c -+++ b/arch/mips/kernel/module.c -@@ -32,23 +32,261 @@ struct mips_hi16 { - static LIST_HEAD(dbe_list); - static DEFINE_SPINLOCK(dbe_lock); - --#ifdef MODULE_START -+/* -+ * Get the potential max trampolines size required of the init and -+ * non-init sections. Only used if we cannot find enough contiguous -+ * physically mapped memory to put the module into. -+ */ -+static unsigned int -+get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, -+ const char *secstrings, unsigned int symindex, bool is_init) -+{ -+ unsigned long ret = 0; -+ unsigned int i, j; -+ Elf_Sym *syms; -+ -+ /* Everything marked ALLOC (this includes the exported symbols) */ -+ for (i = 1; i < hdr->e_shnum; ++i) { -+ unsigned int info = sechdrs[i].sh_info; -+ -+ if (sechdrs[i].sh_type != SHT_REL -+ && sechdrs[i].sh_type != SHT_RELA) -+ continue; -+ -+ /* Not a valid relocation section? */ -+ if (info >= hdr->e_shnum) -+ continue; -+ -+ /* Don't bother with non-allocated sections */ -+ if (!(sechdrs[info].sh_flags & SHF_ALLOC)) -+ continue; -+ -+ /* If it's called *.init*, and we're not init, we're -+ not interested */ -+ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) -+ != is_init) -+ continue; -+ -+ syms = (Elf_Sym *) sechdrs[symindex].sh_addr; -+ if (sechdrs[i].sh_type == SHT_REL) { -+ Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr; -+ unsigned int size = sechdrs[i].sh_size / sizeof(*rel); -+ -+ for (j = 0; j < size; ++j) { -+ Elf_Sym *sym; -+ -+ if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26) -+ continue; -+ -+ sym = syms + ELF_MIPS_R_SYM(rel[j]); -+ if (!is_init && sym->st_shndx != SHN_UNDEF) -+ continue; -+ -+ ret += 4 * sizeof(int); -+ } -+ } else { -+ Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr; -+ unsigned int size = sechdrs[i].sh_size / sizeof(*rela); -+ -+ for (j = 0; j < size; ++j) { -+ Elf_Sym *sym; -+ -+ if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26) -+ continue; -+ -+ sym = syms + ELF_MIPS_R_SYM(rela[j]); -+ if (!is_init && sym->st_shndx != SHN_UNDEF) -+ continue; -+ -+ ret += 4 * sizeof(int); -+ } -+ } -+ } -+ -+ return ret; -+} -+ -+#ifndef MODULE_START -+static void *alloc_phys(unsigned long size) -+{ -+ unsigned order; -+ struct page *page; -+ struct page *p; -+ -+ size = PAGE_ALIGN(size); -+ order = get_order(size); -+ -+ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN | -+ __GFP_THISNODE, order); -+ if (!page) -+ return NULL; -+ -+ split_page(page, order); -+ -+ /* mark all pages except for the last one */ -+ for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p) -+ set_bit(PG_owner_priv_1, &p->flags); -+ -+ for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p) -+ __free_page(p); -+ -+ return page_address(page); -+} -+#endif -+ -+static void free_phys(void *ptr) -+{ -+ struct page *page; -+ bool free; -+ -+ page = virt_to_page(ptr); -+ do { -+ free = test_and_clear_bit(PG_owner_priv_1, &page->flags); -+ __free_page(page); -+ page++; -+ } while (free); -+} -+ -+ - void *module_alloc(unsigned long size) - { -+#ifdef MODULE_START - return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, - GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -+#else -+ void *ptr; -+ -+ if (size == 0) -+ return NULL; -+ -+ ptr = alloc_phys(size); -+ -+ /* If we failed to allocate physically contiguous memory, -+ * fall back to regular vmalloc. The module loader code will -+ * create jump tables to handle long jumps */ -+ if (!ptr) -+ return vmalloc(size); -+ -+ return ptr; -+#endif - } -+ -+static inline bool is_phys_addr(void *ptr) -+{ -+#ifdef CONFIG_64BIT -+ return (KSEGX((unsigned long)ptr) == CKSEG0); -+#else -+ return (KSEGX(ptr) == KSEG0); - #endif -+} -+ -+/* Free memory returned from module_alloc */ -+void module_memfree(void *module_region) -+{ -+ if (is_phys_addr(module_region)) -+ free_phys(module_region); -+ else -+ vfree(module_region); -+} -+ -+static void *__module_alloc(int size, bool phys) -+{ -+ void *ptr; -+ -+ if (phys) -+ ptr = kmalloc(size, GFP_KERNEL); -+ else -+ ptr = vmalloc(size); -+ return ptr; -+} -+ -+static void __module_free(void *ptr) -+{ -+ if (is_phys_addr(ptr)) -+ kfree(ptr); -+ else -+ vfree(ptr); -+} -+ -+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, -+ char *secstrings, struct module *mod) -+{ -+ unsigned int symindex = 0; -+ unsigned int core_size, init_size; -+ int i; -+ -+ mod->arch.phys_plt_offset = 0; -+ mod->arch.virt_plt_offset = 0; -+ mod->arch.phys_plt_tbl = NULL; -+ mod->arch.virt_plt_tbl = NULL; -+ -+ if (IS_ENABLED(CONFIG_64BIT)) -+ return 0; -+ -+ for (i = 1; i < hdr->e_shnum; i++) -+ if (sechdrs[i].sh_type == SHT_SYMTAB) -+ symindex = i; -+ -+ core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false); -+ init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true); -+ -+ if ((core_size + init_size) == 0) -+ return 0; -+ -+ mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1); -+ if (!mod->arch.phys_plt_tbl) -+ return -ENOMEM; -+ -+ mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0); -+ if (!mod->arch.virt_plt_tbl) { -+ __module_free(mod->arch.phys_plt_tbl); -+ mod->arch.phys_plt_tbl = NULL; -+ return -ENOMEM; -+ } -+ -+ return 0; -+} - - static void apply_r_mips_32(u32 *location, u32 base, Elf_Addr v) - { - *location = base + v; - } - -+static Elf_Addr add_plt_entry_to(unsigned *plt_offset, -+ void *start, Elf_Addr v) -+{ -+ unsigned *tramp = start + *plt_offset; -+ *plt_offset += 4 * sizeof(int); -+ -+ /* adjust carry for addiu */ -+ if (v & 0x00008000) -+ v += 0x10000; -+ -+ tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */ -+ tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */ -+ tramp[2] = 0x03200008; /* jr t9 */ -+ tramp[3] = 0x00000000; /* nop */ -+ -+ return (Elf_Addr) tramp; -+} -+ -+static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v) -+{ -+ if (is_phys_addr(location)) -+ return add_plt_entry_to(&me->arch.phys_plt_offset, -+ me->arch.phys_plt_tbl, v); -+ else -+ return add_plt_entry_to(&me->arch.virt_plt_offset, -+ me->arch.virt_plt_tbl, v); -+ -+} -+ - static int apply_r_mips_26(struct module *me, u32 *location, u32 base, - Elf_Addr v) - { -+ u32 ofs = base & 0x03ffffff; -+ - if (v % 4) { - pr_err("module %s: dangerous R_MIPS_26 relocation\n", - me->name); -@@ -56,13 +294,17 @@ static int apply_r_mips_26(struct module - } - - if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) { -- pr_err("module %s: relocation overflow\n", -- me->name); -- return -ENOEXEC; -+ v = add_plt_entry(me, location, v + (ofs << 2)); -+ if (!v) { -+ pr_err("module %s: relocation overflow\n", -+ me->name); -+ return -ENOEXEC; -+ } -+ ofs = 0; - } - - *location = (*location & ~0x03ffffff) | -- ((base + (v >> 2)) & 0x03ffffff); -+ ((ofs + (v >> 2)) & 0x03ffffff); - - return 0; - } -@@ -442,9 +684,36 @@ int module_finalize(const Elf_Ehdr *hdr, - list_add(&me->arch.dbe_list, &dbe_list); - spin_unlock_irq(&dbe_lock); - } -+ -+ /* Get rid of the fixup trampoline if we're running the module -+ * from physically mapped address space */ -+ if (me->arch.phys_plt_offset == 0) { -+ __module_free(me->arch.phys_plt_tbl); -+ me->arch.phys_plt_tbl = NULL; -+ } -+ if (me->arch.virt_plt_offset == 0) { -+ __module_free(me->arch.virt_plt_tbl); -+ me->arch.virt_plt_tbl = NULL; -+ } -+ - return 0; - } - -+void module_arch_freeing_init(struct module *mod) -+{ -+ if (mod->state == MODULE_STATE_LIVE) -+ return; -+ -+ if (mod->arch.phys_plt_tbl) { -+ __module_free(mod->arch.phys_plt_tbl); -+ mod->arch.phys_plt_tbl = NULL; -+ } -+ if (mod->arch.virt_plt_tbl) { -+ __module_free(mod->arch.virt_plt_tbl); -+ mod->arch.virt_plt_tbl = NULL; -+ } -+} -+ - void module_arch_cleanup(struct module *mod) - { - spin_lock_irq(&dbe_lock); diff --git a/6.10/target/linux/generic/pending-6.10/402-mtd-spi-nor-write-support-for-minor-aligned-partitions.patch b/6.10/target/linux/generic/pending-6.10/402-mtd-spi-nor-write-support-for-minor-aligned-partitions.patch deleted file mode 100644 index 5a812b86..00000000 --- a/6.10/target/linux/generic/pending-6.10/402-mtd-spi-nor-write-support-for-minor-aligned-partitions.patch +++ /dev/null @@ -1,245 +0,0 @@ -From acacdac272927ae1d96e0bca51eb82899671eaea Mon Sep 17 00:00:00 2001 -From: John Thomson -Date: Fri, 25 Dec 2020 18:50:08 +1000 -Subject: [PATCH] mtd: spi-nor: write support for minor aligned partitions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Do not prevent writing to mtd partitions where a partition boundary sits -on a minor erasesize boundary. -This addresses a FIXME that has been present since the start of the -linux git history: -/* Doesn't start on a boundary of major erase size */ -/* FIXME: Let it be writable if it is on a boundary of - * _minor_ erase size though */ - -Allow a uniform erase region spi-nor device to be configured -to use the non-uniform erase regions code path for an erase with: -CONFIG_MTD_SPI_NOR_USE_VARIABLE_ERASE=y - -On supporting hardware (SECT_4K: majority of current SPI-NOR device) -provide the facility for an erase to use the least number -of SPI-NOR operations, as well as access to 4K erase without -requiring CONFIG_MTD_SPI_NOR_USE_4K_SECTORS - -Introduce erasesize_minor to the mtd struct, -the smallest erasesize supported by the device - -On existing devices, this is useful where write support is wanted -for data on a 4K partition, such as some u-boot-env partitions, -or RouterBoot soft_config, while still netting the performance -benefits of using 64K sectors - -Performance: -time mtd erase firmware -OpenWrt 5.10 ramips MT7621 w25q128jv 0xfc0000 partition length - -Without this patch -MTD_SPI_NOR_USE_4K_SECTORS=y |n -real 2m 11.66s |0m 50.86s -user 0m 0.00s |0m 0.00s -sys 1m 56.20s |0m 50.80s - -With this patch -MTD_SPI_NOR_USE_VARIABLE_ERASE=n|y |4K_SECTORS=y -real 0m 51.68s |0m 50.85s |2m 12.89s -user 0m 0.00s |0m 0.00s |0m 0.01s -sys 0m 46.94s |0m 50.38s |2m 12.46s - -Signed-off-by: John Thomson -Signed-off-by: Thibaut VARÈNE - ---- - -checkpatch does not like the printk(KERN_WARNING -these should be changed separately beforehand? - -Changes v1 -> v2: -Added mtdcore sysfs for erasesize_minor -Removed finding minor erasesize for variable erase regions device, -as untested and no responses regarding it. -Moved IF_ENABLED for SPINOR variable erase to guard setting -erasesize_minor in spi-nor/core.c -Removed setting erasesize to minor where partition boundaries require -minor erase to be writable -Simplified minor boundary check by relying on minor being a factor of -major - -Changes RFC -> v1: -Fix uninitialized variable smatch warning -Reported-by: kernel test robot -Reported-by: Dan Carpenter ---- - drivers/mtd/mtdcore.c | 10 ++++++++++ - drivers/mtd/mtdpart.c | 35 +++++++++++++++++++++++++---------- - drivers/mtd/spi-nor/Kconfig | 10 ++++++++++ - drivers/mtd/spi-nor/core.c | 11 +++++++++-- - include/linux/mtd/mtd.h | 2 ++ - 5 files changed, 56 insertions(+), 12 deletions(-) - ---- a/drivers/mtd/mtdcore.c -+++ b/drivers/mtd/mtdcore.c -@@ -198,6 +198,15 @@ static ssize_t mtd_erasesize_show(struct - } - MTD_DEVICE_ATTR_RO(erasesize); - -+static ssize_t mtd_erasesize_minor_show(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct mtd_info *mtd = dev_get_drvdata(dev); -+ -+ return sysfs_emit(buf, "%lu\n", (unsigned long)mtd->erasesize_minor); -+} -+MTD_DEVICE_ATTR_RO(erasesize_minor); -+ - static ssize_t mtd_writesize_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -343,6 +352,7 @@ static struct attribute *mtd_attrs[] = { - &dev_attr_flags.attr, - &dev_attr_size.attr, - &dev_attr_erasesize.attr, -+ &dev_attr_erasesize_minor.attr, - &dev_attr_writesize.attr, - &dev_attr_subpagesize.attr, - &dev_attr_oobsize.attr, ---- a/drivers/mtd/mtdpart.c -+++ b/drivers/mtd/mtdpart.c -@@ -47,6 +47,7 @@ static struct mtd_info *allocate_partiti - struct mtd_info *master = mtd_get_master(parent); - int wr_alignment = (parent->flags & MTD_NO_ERASE) ? - master->writesize : master->erasesize; -+ int wr_alignment_minor = 0; - u64 parent_size = mtd_is_partition(parent) ? - parent->part.size : parent->size; - struct mtd_info *child; -@@ -171,6 +172,7 @@ static struct mtd_info *allocate_partiti - } else { - /* Single erase size */ - child->erasesize = master->erasesize; -+ child->erasesize_minor = master->erasesize_minor; - } - - /* -@@ -178,26 +180,39 @@ static struct mtd_info *allocate_partiti - * exposes several regions with different erasesize. Adjust - * wr_alignment accordingly. - */ -- if (!(child->flags & MTD_NO_ERASE)) -+ if (!(child->flags & MTD_NO_ERASE)) { - wr_alignment = child->erasesize; -+ wr_alignment_minor = child->erasesize_minor; -+ } - - tmp = mtd_get_master_ofs(child, 0); - remainder = do_div(tmp, wr_alignment); - if ((child->flags & MTD_WRITEABLE) && remainder) { -- /* Doesn't start on a boundary of major erase size */ -- /* FIXME: Let it be writable if it is on a boundary of -- * _minor_ erase size though */ -- child->flags &= ~MTD_WRITEABLE; -- printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase/write block boundary -- force read-only\n", -- part->name); -+ if (wr_alignment_minor) { -+ /* rely on minor being a factor of major erasesize */ -+ tmp = remainder; -+ remainder = do_div(tmp, wr_alignment_minor); -+ } -+ if (remainder) { -+ child->flags &= ~MTD_WRITEABLE; -+ printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase/write block boundary -- force read-only\n", -+ part->name); -+ } - } - - tmp = mtd_get_master_ofs(child, 0) + child->part.size; - remainder = do_div(tmp, wr_alignment); - if ((child->flags & MTD_WRITEABLE) && remainder) { -- child->flags &= ~MTD_WRITEABLE; -- printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase/write block -- force read-only\n", -- part->name); -+ if (wr_alignment_minor) { -+ tmp = remainder; -+ remainder = do_div(tmp, wr_alignment_minor); -+ } -+ -+ if (remainder) { -+ child->flags &= ~MTD_WRITEABLE; -+ printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase/write block -- force read-only\n", -+ part->name); -+ } - } - - child->size = child->part.size; ---- a/drivers/mtd/spi-nor/Kconfig -+++ b/drivers/mtd/spi-nor/Kconfig -@@ -10,6 +10,16 @@ menuconfig MTD_SPI_NOR - - if MTD_SPI_NOR - -+config MTD_SPI_NOR_USE_VARIABLE_ERASE -+ bool "Disable uniform_erase to allow use of all hardware supported erasesizes" -+ depends on !MTD_SPI_NOR_USE_4K_SECTORS -+ default n -+ help -+ Allow mixed use of all hardware supported erasesizes, -+ by forcing spi_nor to use the multiple eraseregions code path. -+ For example: A 68K erase will use one 64K erase, and one 4K erase -+ on supporting hardware. -+ - config MTD_SPI_NOR_USE_4K_SECTORS - bool "Use small 4096 B erase sectors" - default y ---- a/drivers/mtd/spi-nor/core.c -+++ b/drivers/mtd/spi-nor/core.c -@@ -1150,6 +1150,8 @@ static u8 spi_nor_convert_3to4_erase(u8 - - static bool spi_nor_has_uniform_erase(const struct spi_nor *nor) - { -+ if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_VARIABLE_ERASE)) -+ return false; - return !!nor->params->erase_map.uniform_erase_type; - } - -@@ -2582,6 +2584,7 @@ static int spi_nor_select_erase(struct s - { - struct spi_nor_erase_map *map = &nor->params->erase_map; - const struct spi_nor_erase_type *erase = NULL; -+ const struct spi_nor_erase_type *erase_minor = NULL; - struct mtd_info *mtd = &nor->mtd; - u32 wanted_size = nor->info->sector_size; - int i; -@@ -2614,8 +2617,9 @@ static int spi_nor_select_erase(struct s - */ - for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) { - if (map->erase_type[i].size) { -- erase = &map->erase_type[i]; -- break; -+ if (!erase) -+ erase = &map->erase_type[i]; -+ erase_minor = &map->erase_type[i]; - } - } - -@@ -2623,6 +2627,9 @@ static int spi_nor_select_erase(struct s - return -EINVAL; - - mtd->erasesize = erase->size; -+ if (IS_ENABLED(CONFIG_MTD_SPI_NOR_USE_VARIABLE_ERASE) && -+ erase_minor && erase_minor->size < erase->size) -+ mtd->erasesize_minor = erase_minor->size; - return 0; - } - ---- a/include/linux/mtd/mtd.h -+++ b/include/linux/mtd/mtd.h -@@ -245,6 +245,8 @@ struct mtd_info { - * information below if they desire - */ - uint32_t erasesize; -+ /* "Minor" (smallest) erase size supported by the whole device */ -+ uint32_t erasesize_minor; - /* Minimal writable flash unit size. In case of NOR flash it is 1 (even - * though individual bits can be cleared), in case of NAND flash it is - * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR diff --git a/6.10/target/linux/generic/pending-6.10/451-block-partitions-populate-fwnode.patch b/6.10/target/linux/generic/pending-6.10/451-block-partitions-populate-fwnode.patch deleted file mode 100644 index 2aef2287..00000000 --- a/6.10/target/linux/generic/pending-6.10/451-block-partitions-populate-fwnode.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 7f4c9c534aabe1315669e076d3fe0af0fd374cda Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Thu, 30 May 2024 03:13:19 +0100 -Subject: [PATCH 2/9] block: partitions: populate fwnode - -Let block partitions to be represented by a firmware node and hence -allow them to being referenced e.g. for use with blk-nvmem. - -Signed-off-by: Daniel Golle ---- - block/partitions/core.c | 41 +++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 41 insertions(+) - ---- a/block/partitions/core.c -+++ b/block/partitions/core.c -@@ -10,6 +10,8 @@ - #include - #include - #include -+#include -+ - #include "check.h" - - static int (*const check_part[])(struct parsed_partitions *) = { -@@ -292,6 +294,40 @@ static ssize_t whole_disk_show(struct de - } - static const DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); - -+static struct fwnode_handle *find_partition_fwnode(struct block_device *bdev) -+{ -+ struct fwnode_handle *fw_parts, *fw_part; -+ struct device *ddev = disk_to_dev(bdev->bd_disk); -+ const char *partname, *uuid; -+ u32 partno; -+ -+ fw_parts = device_get_named_child_node(ddev, "partitions"); -+ if (!fw_parts) -+ return NULL; -+ -+ fwnode_for_each_child_node(fw_parts, fw_part) { -+ if (!fwnode_property_read_string(fw_part, "uuid", &uuid) && -+ (!bdev->bd_meta_info || strncmp(uuid, -+ bdev->bd_meta_info->uuid, -+ PARTITION_META_INFO_UUIDLTH))) -+ continue; -+ -+ if (!fwnode_property_read_string(fw_part, "partname", &partname) && -+ (!bdev->bd_meta_info || strncmp(partname, -+ bdev->bd_meta_info->volname, -+ PARTITION_META_INFO_VOLNAMELTH))) -+ continue; -+ -+ if (!fwnode_property_read_u32(fw_part, "partno", &partno) && -+ bdev->bd_partno != partno) -+ continue; -+ -+ return fw_part; -+ } -+ -+ return NULL; -+} -+ - /* - * Must be called either with open_mutex held, before a disk can be opened or - * after all disk users are gone. -@@ -374,6 +410,8 @@ static struct block_device *add_partitio - goto out_put; - } - -+ device_set_node(pdev, find_partition_fwnode(bdev)); -+ - /* delay uevent until 'holders' subdir is created */ - dev_set_uevent_suppress(pdev, 1); - err = device_add(pdev); diff --git a/6.10/target/linux/generic/pending-6.10/476-mtd-spi-nor-add-eon-en25q128.patch b/6.10/target/linux/generic/pending-6.10/476-mtd-spi-nor-add-eon-en25q128.patch deleted file mode 100644 index 303e4884..00000000 --- a/6.10/target/linux/generic/pending-6.10/476-mtd-spi-nor-add-eon-en25q128.patch +++ /dev/null @@ -1,19 +0,0 @@ -From: Piotr Dymacz -Subject: kernel/mtd: add support for EON EN25Q128 - -Signed-off-by: Piotr Dymacz ---- - drivers/mtd/spi-nor/spi-nor.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/mtd/spi-nor/eon.c -+++ b/drivers/mtd/spi-nor/eon.c -@@ -17,6 +17,8 @@ static const struct flash_info eon_nor_p - { "en25p64", INFO(0x1c2017, 0, 64 * 1024, 128) }, - { "en25q64", INFO(0x1c3017, 0, 64 * 1024, 128) - NO_SFDP_FLAGS(SECT_4K) }, -+ { "en25q128", INFO(0x1c3018, 0, 64 * 1024, 256) -+ NO_SFDP_FLAGS(SECT_4K) }, - { "en25q80a", INFO(0x1c3014, 0, 64 * 1024, 16) - NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ) }, - { "en25qh16", INFO(0x1c7015, 0, 64 * 1024, 32) diff --git a/6.10/target/linux/generic/pending-6.10/477-mtd-spi-nor-add-eon-en25qx128a.patch b/6.10/target/linux/generic/pending-6.10/477-mtd-spi-nor-add-eon-en25qx128a.patch deleted file mode 100644 index 6740d1d7..00000000 --- a/6.10/target/linux/generic/pending-6.10/477-mtd-spi-nor-add-eon-en25qx128a.patch +++ /dev/null @@ -1,21 +0,0 @@ -From: Christian Marangi -Subject: kernel/mtd: add support for EON EN25QX128A - -Add support for EON EN25QX128A with no flags as it does -support SFDP parsing. - -Signed-off-by: Christian Marangi ---- - drivers/mtd/spi-nor/spi-nor.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/drivers/mtd/spi-nor/eon.c -+++ b/drivers/mtd/spi-nor/eon.c -@@ -19,6 +19,7 @@ static const struct flash_info eon_nor_p - NO_SFDP_FLAGS(SECT_4K) }, - { "en25q128", INFO(0x1c3018, 0, 64 * 1024, 256) - NO_SFDP_FLAGS(SECT_4K) }, -+ { "en25qx128a", INFO(0x1c7118, 0, 64 * 1024, 256) }, - { "en25q80a", INFO(0x1c3014, 0, 64 * 1024, 16) - NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ) }, - { "en25qh16", INFO(0x1c7015, 0, 64 * 1024, 32) diff --git a/6.10/target/linux/generic/pending-6.10/481-mtd-spi-nor-add-support-for-Gigadevice-GD25D05.patch b/6.10/target/linux/generic/pending-6.10/481-mtd-spi-nor-add-support-for-Gigadevice-GD25D05.patch deleted file mode 100644 index 3fdd354e..00000000 --- a/6.10/target/linux/generic/pending-6.10/481-mtd-spi-nor-add-support-for-Gigadevice-GD25D05.patch +++ /dev/null @@ -1,23 +0,0 @@ -From d68b4aa22e8c625685bfad642dd7337948dc0ad1 Mon Sep 17 00:00:00 2001 -From: Koen Vandeputte -Date: Mon, 6 Jan 2020 13:07:56 +0100 -Subject: [PATCH] mtd: spi-nor: add support for Gigadevice GD25D05 - -Signed-off-by: Koen Vandeputte ---- - drivers/mtd/spi-nor/spi-nor.c | 5 +++++ - 1 file changed, 5 insertions(+) - ---- a/drivers/mtd/spi-nor/gigadevice.c -+++ b/drivers/mtd/spi-nor/gigadevice.c -@@ -34,6 +34,10 @@ static const struct spi_nor_fixups gd25q - }; - - static const struct flash_info gigadevice_nor_parts[] = { -+ { "gd25q05", INFO(0xc84010, 0, 64 * 1024, 1) -+ FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) -+ NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | -+ SPI_NOR_QUAD_READ) }, - { "gd25q16", INFO(0xc84015, 0, 64 * 1024, 32) - FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) - NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | diff --git a/6.10/target/linux/generic/pending-6.10/482-mtd-spi-nor-add-gd25q512.patch b/6.10/target/linux/generic/pending-6.10/482-mtd-spi-nor-add-gd25q512.patch deleted file mode 100644 index ddd3405a..00000000 --- a/6.10/target/linux/generic/pending-6.10/482-mtd-spi-nor-add-gd25q512.patch +++ /dev/null @@ -1,23 +0,0 @@ -From f8943df3beb0d3f9754bb35320c3a378727175a8 Mon Sep 17 00:00:00 2001 -From: OpenWrt community -Date: Thu, 14 Jul 2022 08:38:07 +0200 -Subject: [PATCH] spi-nor/gigadevic: add gd25q512 - ---- - drivers/mtd/spi-nor/gigadevice.c | 3 +++ - 1 file changed, 3 insertions(+) - ---- a/drivers/mtd/spi-nor/gigadevice.c -+++ b/drivers/mtd/spi-nor/gigadevice.c -@@ -71,6 +71,11 @@ static const struct flash_info gigadevic - FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB | SPI_NOR_TB_SR_BIT6) - FIXUP_FLAGS(SPI_NOR_4B_OPCODES) - .fixups = &gd25q256_fixups }, -+ { "gd25q512", INFO(0xc84020, 0, 64 * 1024, 1024) -+ FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) -+ FIXUP_FLAGS(SPI_NOR_4B_OPCODES) -+ NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | -+ SPI_NOR_QUAD_READ) }, - }; - - const struct spi_nor_manufacturer spi_nor_gigadevice = { diff --git a/6.10/target/linux/generic/pending-6.10/484-mtd-spi-nor-add-esmt-f25l16pa.patch b/6.10/target/linux/generic/pending-6.10/484-mtd-spi-nor-add-esmt-f25l16pa.patch deleted file mode 100644 index d5ebe203..00000000 --- a/6.10/target/linux/generic/pending-6.10/484-mtd-spi-nor-add-esmt-f25l16pa.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 87363cc0e522de3294ea6ae10fb468d2a8d6fb2f Mon Sep 17 00:00:00 2001 -From: OpenWrt community -Date: Wed, 13 Jul 2022 12:17:21 +0200 -Subject: [PATCH] spi-nor/esmt.c: add esmt f25l16pa - -This fixes support for Dongwon T&I DW02-412H which uses F25L16PA(2S) -flash. - ---- - drivers/mtd/spi-nor/esmt.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/mtd/spi-nor/esmt.c -+++ b/drivers/mtd/spi-nor/esmt.c -@@ -10,6 +10,9 @@ - - static const struct flash_info esmt_nor_parts[] = { - /* ESMT */ -+ { "f25l16pa-2s", INFO(0x8c2115, 0, 64 * 1024, 32) -+ FLAGS(SPI_NOR_HAS_LOCK) -+ NO_SFDP_FLAGS(SECT_4K) }, - { "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64) - FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_SWP_IS_VOLATILE) - NO_SFDP_FLAGS(SECT_4K) }, diff --git a/6.10/target/linux/generic/pending-6.10/485-mtd-spi-nor-add-xmc-xm25qh128c.patch b/6.10/target/linux/generic/pending-6.10/485-mtd-spi-nor-add-xmc-xm25qh128c.patch deleted file mode 100644 index e8583cc2..00000000 --- a/6.10/target/linux/generic/pending-6.10/485-mtd-spi-nor-add-xmc-xm25qh128c.patch +++ /dev/null @@ -1,25 +0,0 @@ -From f6b33d850f7f12555df2fa0e3349b33427bf5890 Mon Sep 17 00:00:00 2001 -From: OpenWrt community -Date: Wed, 13 Jul 2022 12:19:01 +0200 -Subject: [PATCH] spi-nor/xmc.c: add xm25qh128c - -The XMC XM25QH128C is a 16MB SPI NOR chip. The patch is verified on -Ruijie RG-EW3200GX PRO. -Datasheet available at https://www.xmcwh.com/uploads/435/XM25QH128C.pdf - ---- - drivers/mtd/spi-nor/xmc.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/mtd/spi-nor/xmc.c -+++ b/drivers/mtd/spi-nor/xmc.c -@@ -16,6 +16,9 @@ static const struct flash_info xmc_nor_p - { "XM25QH128A", INFO(0x207018, 0, 64 * 1024, 256) - NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | - SPI_NOR_QUAD_READ) }, -+ { "XM25QH128C", INFO(0x204018, 0, 64 * 1024, 256) -+ NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | -+ SPI_NOR_QUAD_READ) }, - }; - - const struct spi_nor_manufacturer spi_nor_xmc = { diff --git a/6.10/target/linux/generic/pending-6.10/487-mtd-spinand-Add-support-for-Etron-EM73D044VCx.patch b/6.10/target/linux/generic/pending-6.10/487-mtd-spinand-Add-support-for-Etron-EM73D044VCx.patch deleted file mode 100644 index 8fd13659..00000000 --- a/6.10/target/linux/generic/pending-6.10/487-mtd-spinand-Add-support-for-Etron-EM73D044VCx.patch +++ /dev/null @@ -1,170 +0,0 @@ -From f32085fc0b87049491b07e198d924d738a1a2834 Mon Sep 17 00:00:00 2001 -From: Daniel Danzberger -Date: Wed, 3 Aug 2022 17:31:03 +0200 -Subject: [PATCH] mtd: spinand: Add support for Etron EM73D044VCx - -Airoha is a new ARM platform based on Cortex-A53 which has recently been -merged into linux-next. - -Due to BootROM limitations on this platform, the Cortex-A53 can't run in -Aarch64 mode and code must be compiled for 32-Bit ARM. - -This support is based mostly on those linux-next commits backported -for kernel 5.15. - -Patches: -1 - platform support = linux-next -2 - clock driver = linux-next -3 - gpio driver = linux-next -4 - linux,usable-memory-range dts support = linux-next -5 - mtd spinand driver -6 - spi driver -7 - pci driver (kconfig only, uses mediatek PCI) = linux-next - -Still missing: -- Ethernet driver -- Sysupgrade support - -A.t.m there exists one subtarget EN7523 with only one evaluation -board. - -The initramfs can be run with the following commands from u-boot: -- -u-boot> setenv bootfile \ - openwrt-airoha-airoha_en7523-evb-initramfs-kernel.bin -u-boot> tftpboot -u-boot> bootm 0x81800000 -- - -Submitted-by: Daniel Danzberger - ---- a/drivers/mtd/nand/spi/Makefile -+++ b/drivers/mtd/nand/spi/Makefile -@@ -1,4 +1,4 @@ - # SPDX-License-Identifier: GPL-2.0 --spinand-objs := core.o alliancememory.o ato.o esmt.o gigadevice.o macronix.o --spinand-objs += micron.o paragon.o toshiba.o winbond.o xtx.o -+spinand-objs := core.o alliancememory.o ato.o esmt.o etron.o gigadevice.o -+spinand-objs += macronix.o micron.o paragon.o toshiba.o winbond.o xtx.o - obj-$(CONFIG_MTD_SPI_NAND) += spinand.o ---- a/drivers/mtd/nand/spi/core.c -+++ b/drivers/mtd/nand/spi/core.c -@@ -940,6 +940,7 @@ static const struct spinand_manufacturer - &alliancememory_spinand_manufacturer, - &ato_spinand_manufacturer, - &esmt_c8_spinand_manufacturer, -+ &etron_spinand_manufacturer, - &gigadevice_spinand_manufacturer, - ¯onix_spinand_manufacturer, - µn_spinand_manufacturer, ---- /dev/null -+++ b/drivers/mtd/nand/spi/etron.c -@@ -0,0 +1,98 @@ -+// SPDX-License-Identifier: GPL-2.0 -+ -+#include -+#include -+#include -+ -+#define SPINAND_MFR_ETRON 0xd5 -+ -+ -+static SPINAND_OP_VARIANTS(read_cache_variants, -+ SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0), -+ SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), -+ SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0), -+ SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), -+ SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), -+ SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); -+ -+static SPINAND_OP_VARIANTS(write_cache_variants, -+ SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), -+ SPINAND_PROG_LOAD(true, 0, NULL, 0)); -+ -+static SPINAND_OP_VARIANTS(update_cache_variants, -+ SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), -+ SPINAND_PROG_LOAD(false, 0, NULL, 0)); -+ -+static int etron_ooblayout_ecc(struct mtd_info *mtd, int section, -+ struct mtd_oob_region *oobregion) -+{ -+ if (section) -+ return -ERANGE; -+ -+ oobregion->offset = 72; -+ oobregion->length = 56; -+ -+ return 0; -+} -+ -+static int etron_ooblayout_free(struct mtd_info *mtd, int section, -+ struct mtd_oob_region *oobregion) -+{ -+ if (section) -+ return -ERANGE; -+ -+ oobregion->offset = 1; -+ oobregion->length = 71; -+ -+ return 0; -+} -+ -+static int etron_ecc_get_status(struct spinand_device *spinand, u8 status) -+{ -+ switch (status & STATUS_ECC_MASK) { -+ case STATUS_ECC_NO_BITFLIPS: -+ return 0; -+ -+ case STATUS_ECC_HAS_BITFLIPS: -+ /* Between 1-7 bitflips were corrected */ -+ return 7; -+ -+ case STATUS_ECC_MASK: -+ /* Maximum bitflips were corrected */ -+ return 8; -+ -+ case STATUS_ECC_UNCOR_ERROR: -+ return -EBADMSG; -+ } -+ -+ return -EINVAL; -+} -+ -+static const struct mtd_ooblayout_ops etron_ooblayout = { -+ .ecc = etron_ooblayout_ecc, -+ .free = etron_ooblayout_free, -+}; -+ -+static const struct spinand_info etron_spinand_table[] = { -+ SPINAND_INFO("EM73D044VCx", -+ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x1f), -+ // bpc, pagesize, oobsize, pagesperblock, bperlun, maxbadplun, ppl, lpt, #t -+ NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1), -+ NAND_ECCREQ(8, 512), -+ SPINAND_INFO_OP_VARIANTS(&read_cache_variants, -+ &write_cache_variants, -+ &update_cache_variants), -+ SPINAND_HAS_QE_BIT, -+ SPINAND_ECCINFO(&etron_ooblayout, etron_ecc_get_status)), -+}; -+ -+static const struct spinand_manufacturer_ops etron_spinand_manuf_ops = { -+}; -+ -+const struct spinand_manufacturer etron_spinand_manufacturer = { -+ .id = SPINAND_MFR_ETRON, -+ .name = "Etron", -+ .chips = etron_spinand_table, -+ .nchips = ARRAY_SIZE(etron_spinand_table), -+ .ops = &etron_spinand_manuf_ops, -+}; ---- a/include/linux/mtd/spinand.h -+++ b/include/linux/mtd/spinand.h -@@ -263,6 +263,7 @@ struct spinand_manufacturer { - extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; - extern const struct spinand_manufacturer ato_spinand_manufacturer; - extern const struct spinand_manufacturer esmt_c8_spinand_manufacturer; -+extern const struct spinand_manufacturer etron_spinand_manufacturer; - extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; - extern const struct spinand_manufacturer macronix_spinand_manufacturer; - extern const struct spinand_manufacturer micron_spinand_manufacturer; diff --git a/6.10/target/linux/generic/pending-6.10/488-mtd-spi-nor-add-xmc-xm25qh64c.patch b/6.10/target/linux/generic/pending-6.10/488-mtd-spi-nor-add-xmc-xm25qh64c.patch deleted file mode 100644 index e1e4f25e..00000000 --- a/6.10/target/linux/generic/pending-6.10/488-mtd-spi-nor-add-xmc-xm25qh64c.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Joe Mullally -Subject: mtd/spi-nor/xmc: add support for XMC XM25QH64C - -The XMC XM25QH64C is a 8MB SPI NOR chip. The patch is verified on TL-WPA8631P v3. -Datasheet available at https://www.xmcwh.com/uploads/442/XM25QH64C.pdf - -Signed-off-by: Joe Mullally ---- - drivers/mtd/spi-nor/xmc.c | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/drivers/mtd/spi-nor/xmc.c -+++ b/drivers/mtd/spi-nor/xmc.c -@@ -13,6 +13,9 @@ static const struct flash_info xmc_nor_p - { "XM25QH64A", INFO(0x207017, 0, 64 * 1024, 128) - NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | - SPI_NOR_QUAD_READ) }, -+ { "XM25QH64C", INFO(0x204017, 0, 64 * 1024, 128) -+ NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | -+ SPI_NOR_QUAD_READ) }, - { "XM25QH128A", INFO(0x207018, 0, 64 * 1024, 256) - NO_SFDP_FLAGS(SECT_4K | SPI_NOR_DUAL_READ | - SPI_NOR_QUAD_READ) }, diff --git a/6.10/target/linux/generic/pending-6.10/498-mtd-spi-nor-locking-support-for-MX25L6405D.patch b/6.10/target/linux/generic/pending-6.10/498-mtd-spi-nor-locking-support-for-MX25L6405D.patch deleted file mode 100644 index 1a4d5a76..00000000 --- a/6.10/target/linux/generic/pending-6.10/498-mtd-spi-nor-locking-support-for-MX25L6405D.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 8bf2ce6ea4ee840b70f55a27f80e1cd308051b13 Mon Sep 17 00:00:00 2001 -From: Nick Hainke -Date: Mon, 27 Dec 2021 00:38:13 +0100 -Subject: [PATCH 1/2] mtd: spi-nor: locking support for MX25L6405D - -Macronix MX25L6405D supports locking with four block-protection bits. -Currently, the driver only sets three bits. If the bootloader does not -sustain the flash chip in an unlocked state, the flash might be -non-writeable. Add the corresponding flag to enable locking support with -four bits in the status register. - -Tested on Nanostation M2 XM. - -Similar to commit 7ea40b54e83b ("mtd: spi-nor: enable locking support for -MX25L12805D") - -Signed-off-by: David Bauer -Signed-off-by: Nick Hainke ---- - drivers/mtd/spi-nor/macronix.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/drivers/mtd/spi-nor/macronix.c -+++ b/drivers/mtd/spi-nor/macronix.c -@@ -48,6 +48,7 @@ static const struct flash_info macronix_ - { "mx25l3255e", INFO(0xc29e16, 0, 64 * 1024, 64) - NO_SFDP_FLAGS(SECT_4K) }, - { "mx25l6405d", INFO(0xc22017, 0, 64 * 1024, 128) -+ FLAGS(SPI_NOR_HAS_LOCK | SPI_NOR_4BIT_BP) - NO_SFDP_FLAGS(SECT_4K) }, - { "mx25u2033e", INFO(0xc22532, 0, 64 * 1024, 4) - NO_SFDP_FLAGS(SECT_4K) }, diff --git a/6.10/target/linux/generic/pending-6.10/680-net-add-TCP-fraglist-GRO-support.patch b/6.10/target/linux/generic/pending-6.10/680-net-add-TCP-fraglist-GRO-support.patch deleted file mode 100644 index 7672f46d..00000000 --- a/6.10/target/linux/generic/pending-6.10/680-net-add-TCP-fraglist-GRO-support.patch +++ /dev/null @@ -1,578 +0,0 @@ -From: Felix Fietkau -Date: Tue, 23 Apr 2024 11:23:03 +0200 -Subject: [PATCH] net: add TCP fraglist GRO support - -When forwarding TCP after GRO, software segmentation is very expensive, -especially when the checksum needs to be recalculated. -One case where that's currently unavoidable is when routing packets over -PPPoE. Performance improves significantly when using fraglist GRO -implemented in the same way as for UDP. - -Here's a measurement of running 2 TCP streams through a MediaTek MT7622 -device (2-core Cortex-A53), which runs NAT with flow offload enabled from -one ethernet port to PPPoE on another ethernet port + cake qdisc set to -1Gbps. - -rx-gro-list off: 630 Mbit/s, CPU 35% idle -rx-gro-list on: 770 Mbit/s, CPU 40% idle - -Signe-off-by: Felix Fietkau ---- - ---- a/include/net/gro.h -+++ b/include/net/gro.h -@@ -439,6 +439,7 @@ static inline __wsum ip6_gro_compute_pse - } - - int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); -+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); - - /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ - static inline void gro_normal_list(struct napi_struct *napi) ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -2083,7 +2083,10 @@ void tcp_v4_destroy_sock(struct sock *sk - - struct sk_buff *tcp_gso_segment(struct sk_buff *skb, - netdev_features_t features); --struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); -+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb); -+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); -+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, -+ struct tcphdr *th); - INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); - INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); - INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); ---- a/net/core/gro.c -+++ b/net/core/gro.c -@@ -233,6 +233,33 @@ done: - return 0; - } - -+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) -+{ -+ if (unlikely(p->len + skb->len >= 65536)) -+ return -E2BIG; -+ -+ if (NAPI_GRO_CB(p)->last == p) -+ skb_shinfo(p)->frag_list = skb; -+ else -+ NAPI_GRO_CB(p)->last->next = skb; -+ -+ skb_pull(skb, skb_gro_offset(skb)); -+ -+ NAPI_GRO_CB(p)->last = skb; -+ NAPI_GRO_CB(p)->count++; -+ p->data_len += skb->len; -+ -+ /* sk ownership - if any - completely transferred to the aggregated packet */ -+ skb->destructor = NULL; -+ skb->sk = NULL; -+ p->truesize += skb->truesize; -+ p->len += skb->len; -+ -+ NAPI_GRO_CB(skb)->same_flow = 1; -+ -+ return 0; -+} -+ - - static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) - { ---- a/net/ipv4/tcp_offload.c -+++ b/net/ipv4/tcp_offload.c -@@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buf - } - } - -+static void __tcpv4_gso_segment_csum(struct sk_buff *seg, -+ __be32 *oldip, __be32 newip, -+ __be16 *oldport, __be16 newport) -+{ -+ struct tcphdr *th; -+ struct iphdr *iph; -+ -+ if (*oldip == newip && *oldport == newport) -+ return; -+ -+ th = tcp_hdr(seg); -+ iph = ip_hdr(seg); -+ -+ inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); -+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); -+ *oldport = newport; -+ -+ csum_replace4(&iph->check, *oldip, newip); -+ *oldip = newip; -+} -+ -+static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) -+{ -+ const struct tcphdr *th; -+ const struct iphdr *iph; -+ struct sk_buff *seg; -+ struct tcphdr *th2; -+ struct iphdr *iph2; -+ -+ seg = segs; -+ th = tcp_hdr(seg); -+ iph = ip_hdr(seg); -+ th2 = tcp_hdr(seg->next); -+ iph2 = ip_hdr(seg->next); -+ -+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && -+ iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) -+ return segs; -+ -+ while ((seg = seg->next)) { -+ th2 = tcp_hdr(seg); -+ iph2 = ip_hdr(seg); -+ -+ __tcpv4_gso_segment_csum(seg, -+ &iph2->saddr, iph->saddr, -+ &th2->source, th->source); -+ __tcpv4_gso_segment_csum(seg, -+ &iph2->daddr, iph->daddr, -+ &th2->dest, th->dest); -+ } -+ -+ return segs; -+} -+ -+static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb, -+ netdev_features_t features) -+{ -+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); -+ if (IS_ERR(skb)) -+ return skb; -+ -+ return __tcpv4_gso_segment_list_csum(skb); -+} -+ - static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, - netdev_features_t features) - { -@@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment( - if (!pskb_may_pull(skb, sizeof(struct tcphdr))) - return ERR_PTR(-EINVAL); - -+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) -+ return __tcp4_gso_segment_list(skb, features); -+ - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); -@@ -178,61 +245,76 @@ out: - return segs; - } - --struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) -+struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th) - { -- struct sk_buff *pp = NULL; -+ struct tcphdr *th2; - struct sk_buff *p; -+ -+ list_for_each_entry(p, head, list) { -+ if (!NAPI_GRO_CB(p)->same_flow) -+ continue; -+ -+ th2 = tcp_hdr(p); -+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { -+ NAPI_GRO_CB(p)->same_flow = 0; -+ continue; -+ } -+ -+ return p; -+ } -+ -+ return NULL; -+} -+ -+struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) -+{ -+ unsigned int thlen, hlen, off; - struct tcphdr *th; -- struct tcphdr *th2; -- unsigned int len; -- unsigned int thlen; -- __be32 flags; -- unsigned int mss = 1; -- unsigned int hlen; -- unsigned int off; -- int flush = 1; -- int i; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*th); - th = skb_gro_header(skb, hlen, off); - if (unlikely(!th)) -- goto out; -+ return NULL; - - thlen = th->doff * 4; - if (thlen < sizeof(*th)) -- goto out; -+ return NULL; - - hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) -- goto out; -+ return NULL; - } - - skb_gro_pull(skb, thlen); - -- len = skb_gro_len(skb); -- flags = tcp_flag_word(th); -- -- list_for_each_entry(p, head, list) { -- if (!NAPI_GRO_CB(p)->same_flow) -- continue; -+ return th; -+} - -- th2 = tcp_hdr(p); -+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, -+ struct tcphdr *th) -+{ -+ unsigned int thlen = th->doff * 4; -+ struct sk_buff *pp = NULL; -+ struct sk_buff *p; -+ struct tcphdr *th2; -+ unsigned int len; -+ __be32 flags; -+ unsigned int mss = 1; -+ int flush = 1; -+ int i; - -- if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { -- NAPI_GRO_CB(p)->same_flow = 0; -- continue; -- } -+ len = skb_gro_len(skb); -+ flags = tcp_flag_word(th); - -- goto found; -- } -- p = NULL; -- goto out_check_final; -+ p = tcp_gro_lookup(head, th); -+ if (!p) -+ goto out_check_final; - --found: - /* Include the IP ID check below from the inner most IP hdr */ -+ th2 = tcp_hdr(p); - flush = NAPI_GRO_CB(p)->flush; - flush |= (__force int)(flags & TCP_FLAG_CWR); - flush |= (__force int)((flags ^ tcp_flag_word(th2)) & -@@ -269,6 +351,19 @@ found: - flush |= p->decrypted ^ skb->decrypted; - #endif - -+ if (unlikely(NAPI_GRO_CB(p)->is_flist)) { -+ flush |= (__force int)(flags ^ tcp_flag_word(th2)); -+ flush |= skb->ip_summed != p->ip_summed; -+ flush |= skb->csum_level != p->csum_level; -+ flush |= !pskb_may_pull(skb, skb_gro_offset(skb)); -+ flush |= NAPI_GRO_CB(p)->count >= 64; -+ -+ if (flush || skb_gro_receive_list(p, skb)) -+ mss = 1; -+ -+ goto out_check_final; -+ } -+ - if (flush || skb_gro_receive(p, skb)) { - mss = 1; - goto out_check_final; -@@ -290,7 +385,6 @@ out_check_final: - if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) - pp = p; - --out: - NAPI_GRO_CB(skb)->flush |= (flush != 0); - - return pp; -@@ -314,18 +408,58 @@ void tcp_gro_complete(struct sk_buff *sk - } - EXPORT_SYMBOL(tcp_gro_complete); - -+static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, -+ struct tcphdr *th) -+{ -+ const struct iphdr *iph; -+ struct sk_buff *p; -+ struct sock *sk; -+ struct net *net; -+ int iif, sdif; -+ -+ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST)) -+ return; -+ -+ p = tcp_gro_lookup(head, th); -+ if (p) { -+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; -+ return; -+ } -+ -+ inet_get_iif_sdif(skb, &iif, &sdif); -+ iph = skb_gro_network_header(skb); -+ net = dev_net(skb->dev); -+ sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, -+ iph->saddr, th->source, -+ iph->daddr, ntohs(th->dest), -+ iif, sdif); -+ NAPI_GRO_CB(skb)->is_flist = !sk; -+ if (sk) -+ sock_put(sk); -+} -+ - INDIRECT_CALLABLE_SCOPE - struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) - { -+ struct tcphdr *th; -+ - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_validate(skb, IPPROTO_TCP, -- inet_gro_compute_pseudo)) { -- NAPI_GRO_CB(skb)->flush = 1; -- return NULL; -- } -+ inet_gro_compute_pseudo)) -+ goto flush; -+ -+ th = tcp_gro_pull_header(skb); -+ if (!th) -+ goto flush; - -- return tcp_gro_receive(head, skb); -+ tcp4_check_fraglist_gro(head, skb, th); -+ -+ return tcp_gro_receive(head, skb, th); -+ -+flush: -+ NAPI_GRO_CB(skb)->flush = 1; -+ return NULL; - } - - INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) -@@ -333,6 +467,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_com - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - -+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { -+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4; -+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; -+ -+ __skb_incr_checksum_unnecessary(skb); -+ -+ return 0; -+ } -+ - th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, - iph->daddr, 0); - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; ---- a/net/ipv4/udp_offload.c -+++ b/net/ipv4/udp_offload.c -@@ -433,33 +433,6 @@ out: - return segs; - } - --static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) --{ -- if (unlikely(p->len + skb->len >= 65536)) -- return -E2BIG; -- -- if (NAPI_GRO_CB(p)->last == p) -- skb_shinfo(p)->frag_list = skb; -- else -- NAPI_GRO_CB(p)->last->next = skb; -- -- skb_pull(skb, skb_gro_offset(skb)); -- -- NAPI_GRO_CB(p)->last = skb; -- NAPI_GRO_CB(p)->count++; -- p->data_len += skb->len; -- -- /* sk ownership - if any - completely transferred to the aggregated packet */ -- skb->destructor = NULL; -- skb->sk = NULL; -- p->truesize += skb->truesize; -- p->len += skb->len; -- -- NAPI_GRO_CB(skb)->same_flow = 1; -- -- return 0; --} -- - - #define UDP_GRO_CNT_MAX 64 - static struct sk_buff *udp_gro_receive_segment(struct list_head *head, ---- a/net/ipv6/tcpv6_offload.c -+++ b/net/ipv6/tcpv6_offload.c -@@ -7,24 +7,67 @@ - */ - #include - #include -+#include - #include - #include - #include - #include - #include "ip6_offload.h" - -+static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, -+ struct tcphdr *th) -+{ -+#if IS_ENABLED(CONFIG_IPV6) -+ const struct ipv6hdr *hdr; -+ struct sk_buff *p; -+ struct sock *sk; -+ struct net *net; -+ int iif, sdif; -+ -+ if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST)) -+ return; -+ -+ p = tcp_gro_lookup(head, th); -+ if (p) { -+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; -+ return; -+ } -+ -+ inet6_get_iif_sdif(skb, &iif, &sdif); -+ hdr = skb_gro_network_header(skb); -+ net = dev_net(skb->dev); -+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, -+ &hdr->saddr, th->source, -+ &hdr->daddr, ntohs(th->dest), -+ iif, sdif); -+ NAPI_GRO_CB(skb)->is_flist = !sk; -+ if (sk) -+ sock_put(sk); -+#endif /* IS_ENABLED(CONFIG_IPV6) */ -+} -+ - INDIRECT_CALLABLE_SCOPE - struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) - { -+ struct tcphdr *th; -+ - /* Don't bother verifying checksum if we're going to flush anyway. */ - if (!NAPI_GRO_CB(skb)->flush && - skb_gro_checksum_validate(skb, IPPROTO_TCP, -- ip6_gro_compute_pseudo)) { -- NAPI_GRO_CB(skb)->flush = 1; -- return NULL; -- } -+ ip6_gro_compute_pseudo)) -+ goto flush; - -- return tcp_gro_receive(head, skb); -+ th = tcp_gro_pull_header(skb); -+ if (!th) -+ goto flush; -+ -+ tcp6_check_fraglist_gro(head, skb, th); -+ -+ return tcp_gro_receive(head, skb, th); -+ -+flush: -+ NAPI_GRO_CB(skb)->flush = 1; -+ return NULL; - } - - INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) -@@ -32,6 +75,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_com - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - -+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { -+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6; -+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; -+ -+ __skb_incr_checksum_unnecessary(skb); -+ -+ return 0; -+ } -+ - th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, - &iph->daddr, 0); - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; -@@ -40,6 +92,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_com - return 0; - } - -+static void __tcpv6_gso_segment_csum(struct sk_buff *seg, -+ __be16 *oldport, __be16 newport) -+{ -+ struct tcphdr *th; -+ -+ if (*oldport == newport) -+ return; -+ -+ th = tcp_hdr(seg); -+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); -+ *oldport = newport; -+} -+ -+static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) -+{ -+ const struct tcphdr *th; -+ const struct ipv6hdr *iph; -+ struct sk_buff *seg; -+ struct tcphdr *th2; -+ struct ipv6hdr *iph2; -+ -+ seg = segs; -+ th = tcp_hdr(seg); -+ iph = ipv6_hdr(seg); -+ th2 = tcp_hdr(seg->next); -+ iph2 = ipv6_hdr(seg->next); -+ -+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && -+ ipv6_addr_equal(&iph->saddr, &iph2->saddr) && -+ ipv6_addr_equal(&iph->daddr, &iph2->daddr)) -+ return segs; -+ -+ while ((seg = seg->next)) { -+ th2 = tcp_hdr(seg); -+ iph2 = ipv6_hdr(seg); -+ -+ iph2->saddr = iph->saddr; -+ iph2->daddr = iph->daddr; -+ __tcpv6_gso_segment_csum(seg, &th2->source, th->source); -+ __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest); -+ } -+ -+ return segs; -+} -+ -+static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb, -+ netdev_features_t features) -+{ -+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); -+ if (IS_ERR(skb)) -+ return skb; -+ -+ return __tcpv6_gso_segment_list_csum(skb); -+} -+ - static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, - netdev_features_t features) - { -@@ -51,6 +158,9 @@ static struct sk_buff *tcp6_gso_segment( - if (!pskb_may_pull(skb, sizeof(*th))) - return ERR_PTR(-EINVAL); - -+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) -+ return __tcp6_gso_segment_list(skb, features); -+ - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); diff --git a/6.10/target/linux/generic/pending-6.10/702-net-ethernet-mtk_eth_soc-enable-threaded-NAPI.patch b/6.10/target/linux/generic/pending-6.10/702-net-ethernet-mtk_eth_soc-enable-threaded-NAPI.patch deleted file mode 100644 index f528e35d..00000000 --- a/6.10/target/linux/generic/pending-6.10/702-net-ethernet-mtk_eth_soc-enable-threaded-NAPI.patch +++ /dev/null @@ -1,21 +0,0 @@ -From: Felix Fietkau -Date: Mon, 21 Mar 2022 20:39:59 +0100 -Subject: [PATCH] net: ethernet: mtk_eth_soc: enable threaded NAPI - -This can improve performance under load by ensuring that NAPI processing is -not pinned on CPU 0. - -Signed-off-by: Felix Fietkau ---- - ---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c -+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c -@@ -4994,6 +4994,8 @@ static int mtk_probe(struct platform_dev - * for NAPI to work - */ - init_dummy_netdev(ð->dummy_dev); -+ eth->dummy_dev.threaded = 1; -+ strcpy(eth->dummy_dev.name, "mtk_eth"); - netif_napi_add(ð->dummy_dev, ð->tx_napi, mtk_napi_tx); - netif_napi_add(ð->dummy_dev, ð->rx_napi, mtk_napi_rx); - diff --git a/6.10/target/linux/generic/pending-6.10/704-netfilter-nf_tables-fix-bidirectional-offload-regres.patch b/6.10/target/linux/generic/pending-6.10/704-netfilter-nf_tables-fix-bidirectional-offload-regres.patch deleted file mode 100644 index d1d6fa3f..00000000 --- a/6.10/target/linux/generic/pending-6.10/704-netfilter-nf_tables-fix-bidirectional-offload-regres.patch +++ /dev/null @@ -1,24 +0,0 @@ -From: Felix Fietkau -Date: Wed, 14 Feb 2024 15:24:41 +0100 -Subject: [PATCH] netfilter: nf_tables: fix bidirectional offload regression - -Commit 8f84780b84d6 ("netfilter: flowtable: allow unidirectional rules") -made unidirectional flow offload possible, while completely ignoring (and -breaking) bidirectional flow offload for nftables. -Add the missing flag that was left out as an exercise for the reader :) - -Cc: Vlad Buslov -Fixes: 8f84780b84d6 ("netfilter: flowtable: allow unidirectional rules") -Signed-off-by: Felix Fietkau ---- - ---- a/net/netfilter/nft_flow_offload.c -+++ b/net/netfilter/nft_flow_offload.c -@@ -361,6 +361,7 @@ static void nft_flow_offload_eval(const - ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; - } - -+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags); - ret = flow_offload_add(flowtable, flow); - if (ret < 0) - goto err_flow_add; diff --git a/6.10/target/linux/generic/pending-6.10/711-03-net-dsa-qca8k-add-support-for-port_change_master.patch b/6.10/target/linux/generic/pending-6.10/711-03-net-dsa-qca8k-add-support-for-port_change_master.patch deleted file mode 100644 index 9b553e77..00000000 --- a/6.10/target/linux/generic/pending-6.10/711-03-net-dsa-qca8k-add-support-for-port_change_master.patch +++ /dev/null @@ -1,158 +0,0 @@ -From b2d6ebf2f92f8695c83fa6979f4ab579c588df76 Mon Sep 17 00:00:00 2001 -From: Christian Marangi -Date: Tue, 20 Jun 2023 07:57:38 +0200 -Subject: [PATCH 4/4] net: dsa: qca8k: add support for port_change_master - -Add support for port_change_master to permit assigning an alternative -CPU port if the switch have both CPU port connected or create a LAG on -both CPU port and assign the LAG as DSA master. - -On port change master request, we check if the master is a LAG. -With LAG we compose the cpu_port_mask with the CPU port in the LAG, if -master is a simple dsa_port, we derive the index. - -Finally we apply the new cpu_port_mask to the LOOKUP MEMBER to permit -the port to receive packet by the new CPU port setup for the port and we -refresh the CPU ports LOOKUP MEMBER configuration to reflect the new -user port state. - -port_lag_join/leave is updated to refresh the user ports if we detect -that the LAG is a DSA master and we have user port using it as a master. - -Signed-off-by: Christian Marangi ---- - drivers/net/dsa/qca/qca8k-8xxx.c | 116 ++++++++++++++++++++++++++++++- - 1 file changed, 114 insertions(+), 2 deletions(-) - ---- a/drivers/net/dsa/qca/qca8k-8xxx.c -+++ b/drivers/net/dsa/qca/qca8k-8xxx.c -@@ -1738,6 +1738,117 @@ qca8k_get_tag_protocol(struct dsa_switch - return DSA_TAG_PROTO_QCA; - } - -+static int qca8k_port_change_master(struct dsa_switch *ds, int port, -+ struct net_device *master, -+ struct netlink_ext_ack *extack) -+{ -+ struct dsa_switch_tree *dst = ds->dst; -+ struct qca8k_priv *priv = ds->priv; -+ u8 cpu_port_mask = 0; -+ struct dsa_port *dp; -+ u32 val; -+ int ret; -+ -+ /* With LAG of CPU port, compose the mask for port LOOKUP MEMBER */ -+ if (netif_is_lag_master(master)) { -+ struct dsa_lag *lag; -+ int id; -+ -+ id = dsa_lag_id(dst, master); -+ lag = dsa_lag_by_id(dst, id); -+ -+ dsa_lag_foreach_port(dp, dst, lag) -+ if (dsa_port_is_cpu(dp)) -+ cpu_port_mask |= BIT(dp->index); -+ } else { -+ dp = master->dsa_ptr; -+ cpu_port_mask |= BIT(dp->index); -+ } -+ -+ /* Connect port to new cpu port */ -+ ret = regmap_read(priv->regmap, QCA8K_PORT_LOOKUP_CTRL(port), &val); -+ if (ret) -+ return ret; -+ -+ /* Reset connected CPU port in port LOOKUP MEMBER */ -+ val &= ~dsa_cpu_ports(ds); -+ /* Assign the new CPU port in port LOOKUP MEMBER */ -+ val |= cpu_port_mask; -+ -+ ret = regmap_update_bits(priv->regmap, QCA8K_PORT_LOOKUP_CTRL(port), -+ QCA8K_PORT_LOOKUP_MEMBER, -+ val); -+ if (ret) -+ return ret; -+ -+ /* Refresh CPU port LOOKUP MEMBER with new port */ -+ dsa_tree_for_each_cpu_port(dp, ds->dst) { -+ u32 reg = QCA8K_PORT_LOOKUP_CTRL(dp->index); -+ -+ /* If CPU port in mask assign port, else remove port */ -+ if (BIT(dp->index) & cpu_port_mask) -+ ret = regmap_set_bits(priv->regmap, reg, BIT(port)); -+ else -+ ret = regmap_clear_bits(priv->regmap, reg, BIT(port)); -+ -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int qca8k_port_lag_refresh_user_ports(struct dsa_switch *ds, -+ struct dsa_lag lag) -+{ -+ struct net_device *lag_dev = lag.dev; -+ struct dsa_port *dp; -+ int ret; -+ -+ /* Ignore if LAG is not a DSA master */ -+ if (!netif_is_lag_master(lag_dev)) -+ return 0; -+ -+ dsa_switch_for_each_user_port(dp, ds) { -+ /* Skip if assigned master is not the LAG */ -+ if (dsa_port_to_master(dp) != lag_dev) -+ continue; -+ -+ ret = qca8k_port_change_master(ds, dp->index, -+ lag_dev, NULL); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int qca8xxx_port_lag_join(struct dsa_switch *ds, int port, -+ struct dsa_lag lag, -+ struct netdev_lag_upper_info *info, -+ struct netlink_ext_ack *extack) -+{ -+ int ret; -+ -+ ret = qca8k_port_lag_join(ds, port, lag, info, extack); -+ if (ret) -+ return ret; -+ -+ return qca8k_port_lag_refresh_user_ports(ds, lag); -+} -+ -+static int qca8xxx_port_lag_leave(struct dsa_switch *ds, int port, -+ struct dsa_lag lag) -+{ -+ int ret; -+ -+ ret = qca8k_port_lag_leave(ds, port, lag); -+ if (ret) -+ return ret; -+ -+ return qca8k_port_lag_refresh_user_ports(ds, lag); -+} -+ - static void - qca8k_master_change(struct dsa_switch *ds, const struct net_device *master, - bool operational) -@@ -2024,8 +2135,9 @@ static const struct dsa_switch_ops qca8k - .phylink_mac_link_down = qca8k_phylink_mac_link_down, - .phylink_mac_link_up = qca8k_phylink_mac_link_up, - .get_phy_flags = qca8k_get_phy_flags, -- .port_lag_join = qca8k_port_lag_join, -- .port_lag_leave = qca8k_port_lag_leave, -+ .port_lag_join = qca8xxx_port_lag_join, -+ .port_lag_leave = qca8xxx_port_lag_leave, -+ .port_change_master = qca8k_port_change_master, - .master_state_change = qca8k_master_change, - .connect_tag_protocol = qca8k_connect_tag_protocol, - }; diff --git a/6.10/target/linux/generic/pending-6.10/722-net-phy-realtek-support-switching-between-SGMII-and-.patch b/6.10/target/linux/generic/pending-6.10/722-net-phy-realtek-support-switching-between-SGMII-and-.patch deleted file mode 100644 index 58bd2591..00000000 --- a/6.10/target/linux/generic/pending-6.10/722-net-phy-realtek-support-switching-between-SGMII-and-.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 312753d0aadba0f58841ae513b80fdbabc887523 Mon Sep 17 00:00:00 2001 -From: Chukun Pan -Date: Wed, 8 Feb 2023 16:32:18 +0800 -Subject: [PATCH] net: phy: realtek: support switching between SGMII and - 2500BASE-X for RTL822x series - -After commit ace6aba ("net: phy: realtek: rtl8221: allow to configure -SERDES mode"), the rtl8221 phy can work in SGMII and 2500base-x modes -respectively. So add interface automatic switching for rtl8221 phy to -match various wire speeds. - -Signed-off-by: Chukun Pan ---- - drivers/net/phy/realtek.c | 26 ++++++++++++++++++++++++-- - 1 file changed, 24 insertions(+), 2 deletions(-) - ---- a/drivers/net/phy/realtek.c -+++ b/drivers/net/phy/realtek.c -@@ -714,6 +714,25 @@ static int rtl822x_config_aneg(struct ph - return __genphy_config_aneg(phydev, ret); - } - -+static void rtl822x_update_interface(struct phy_device *phydev) -+{ -+ /* Automatically switch SERDES interface between -+ * SGMII and 2500-BaseX according to speed. -+ */ -+ switch (phydev->speed) { -+ case SPEED_2500: -+ phydev->interface = PHY_INTERFACE_MODE_2500BASEX; -+ break; -+ case SPEED_1000: -+ case SPEED_100: -+ case SPEED_10: -+ phydev->interface = PHY_INTERFACE_MODE_SGMII; -+ break; -+ default: -+ break; -+ } -+} -+ - static int rtl822x_read_status(struct phy_device *phydev) - { - int ret; -@@ -732,11 +751,14 @@ static int rtl822x_read_status(struct ph - phydev->lp_advertising, lpadv & RTL_LPADV_2500FULL); - } - -- ret = genphy_read_status(phydev); -+ ret = rtlgen_read_status(phydev); - if (ret < 0) - return ret; - -- return rtlgen_get_speed(phydev); -+ if (phydev->is_c45 && phydev->link) -+ rtl822x_update_interface(phydev); -+ -+ return 0; - } - - static bool rtlgen_supports_2_5gbps(struct phy_device *phydev) diff --git a/6.10/target/linux/generic/pending-6.10/727-net-phy-realtek-use-inline-functions-for-10GbE-adver.patch b/6.10/target/linux/generic/pending-6.10/727-net-phy-realtek-use-inline-functions-for-10GbE-adver.patch deleted file mode 100644 index e6cbfbe6..00000000 --- a/6.10/target/linux/generic/pending-6.10/727-net-phy-realtek-use-inline-functions-for-10GbE-adver.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 92c8b9d558160d94b981dd8a2b9c47657627ffdc Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Sat, 22 Apr 2023 01:23:08 +0100 -Subject: [PATCH 2/3] net: phy: realtek: use inline functions for 10GbE - advertisement - -Use existing generic inline functions to encode local advertisement -of 10GbE link modes as well as to decode link-partner advertisement. - -Signed-off-by: Daniel Golle ---- - drivers/net/phy/realtek.c | 22 +++++----------------- - 1 file changed, 5 insertions(+), 17 deletions(-) - ---- a/drivers/net/phy/realtek.c -+++ b/drivers/net/phy/realtek.c -@@ -69,10 +69,6 @@ - #define RTL_SUPPORTS_5000FULL BIT(14) - #define RTL_SUPPORTS_2500FULL BIT(13) - #define RTL_SUPPORTS_10000FULL BIT(0) --#define RTL_ADV_2500FULL BIT(7) --#define RTL_LPADV_10000FULL BIT(11) --#define RTL_LPADV_5000FULL BIT(6) --#define RTL_LPADV_2500FULL BIT(5) - - #define RTL9000A_GINMR 0x14 - #define RTL9000A_GINMR_LINK_STATUS BIT(4) -@@ -699,14 +695,11 @@ static int rtl822x_config_aneg(struct ph - int ret = 0; - - if (phydev->autoneg == AUTONEG_ENABLE) { -- u16 adv2500 = 0; -- -- if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, -- phydev->advertising)) -- adv2500 = RTL_ADV_2500FULL; -- - ret = phy_modify_paged_changed(phydev, 0xa5d, 0x12, -- RTL_ADV_2500FULL, adv2500); -+ MDIO_AN_10GBT_CTRL_ADV10G | -+ MDIO_AN_10GBT_CTRL_ADV5G | -+ MDIO_AN_10GBT_CTRL_ADV2_5G, -+ linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising)); - if (ret < 0) - return ret; - } -@@ -743,12 +736,7 @@ static int rtl822x_read_status(struct ph - if (lpadv < 0) - return lpadv; - -- linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, -- phydev->lp_advertising, lpadv & RTL_LPADV_10000FULL); -- linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, -- phydev->lp_advertising, lpadv & RTL_LPADV_5000FULL); -- linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, -- phydev->lp_advertising, lpadv & RTL_LPADV_2500FULL); -+ mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, lpadv); - } - - ret = rtlgen_read_status(phydev); diff --git a/6.10/target/linux/generic/pending-6.10/728-net-phy-realtek-check-validity-of-10GbE-link-partner.patch b/6.10/target/linux/generic/pending-6.10/728-net-phy-realtek-check-validity-of-10GbE-link-partner.patch deleted file mode 100644 index 329415ba..00000000 --- a/6.10/target/linux/generic/pending-6.10/728-net-phy-realtek-check-validity-of-10GbE-link-partner.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 929bb4d3cfbc7878326c0771a01a636d49c54b40 Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Sat, 22 Apr 2023 01:25:39 +0100 -Subject: [PATCH 3/3] net: phy: realtek: check validity of 10GbE link-partner - advertisement - -Only use link-partner advertisement bits for 10GbE modes if they are -actually valid. Check LOCALOK and REMOTEOK bits and clear 10GbE modes -unless both of them are set. - -Signed-off-by: Daniel Golle ---- - drivers/net/phy/realtek.c | 4 ++++ - 1 file changed, 4 insertions(+) - ---- a/drivers/net/phy/realtek.c -+++ b/drivers/net/phy/realtek.c -@@ -736,6 +736,10 @@ static int rtl822x_read_status(struct ph - if (lpadv < 0) - return lpadv; - -+ if (!(lpadv & MDIO_AN_10GBT_STAT_REMOK) || -+ !(lpadv & MDIO_AN_10GBT_STAT_LOCOK)) -+ lpadv = 0; -+ - mii_10gbt_stat_mod_linkmode_lpa_t(phydev->lp_advertising, lpadv); - } - diff --git a/6.10/target/linux/generic/pending-6.10/729-net-phy-realtek-introduce-rtl822x_probe.patch b/6.10/target/linux/generic/pending-6.10/729-net-phy-realtek-introduce-rtl822x_probe.patch deleted file mode 100644 index 7098fa6b..00000000 --- a/6.10/target/linux/generic/pending-6.10/729-net-phy-realtek-introduce-rtl822x_probe.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 9155098547fb1172d4fa536f3f6bc9d42f59d08c Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Sat, 22 Apr 2023 03:26:01 +0100 -Subject: [PATCH] net: phy: realtek: setup ALDPS on RTL822x - -Setup Link Down Power Saving Mode according the DTS property -just like for RTL821x 1GE PHYs. - -Signed-off-by: Daniel Golle ---- - drivers/net/phy/realtek.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - ---- a/drivers/net/phy/realtek.c -+++ b/drivers/net/phy/realtek.c -@@ -63,6 +63,10 @@ - #define RTL8221B_SERDES_OPTION_MODE_2500BASEX 2 - #define RTL8221B_SERDES_OPTION_MODE_HISGMII 3 - -+#define RTL8221B_PHYCR1 0xa430 -+#define RTL8221B_PHYCR1_ALDPS_EN BIT(2) -+#define RTL8221B_PHYCR1_ALDPS_XTAL_OFF_EN BIT(12) -+ - #define RTL8366RB_POWER_SAVE 0x15 - #define RTL8366RB_POWER_SAVE_ON BIT(12) - -@@ -778,6 +782,25 @@ static int rtl8226_match_phy_device(stru - rtlgen_supports_2_5gbps(phydev); - } - -+static int rtl822x_probe(struct phy_device *phydev) -+{ -+ struct device *dev = &phydev->mdio.dev; -+ int val; -+ -+ val = phy_read_mmd(phydev, RTL8221B_MMD_SERDES_CTRL, RTL8221B_PHYCR1); -+ if (val < 0) -+ return val; -+ -+ if (of_property_read_bool(dev->of_node, "realtek,aldps-enable")) -+ val |= RTL8221B_PHYCR1_ALDPS_EN | RTL8221B_PHYCR1_ALDPS_XTAL_OFF_EN; -+ else -+ val &= ~(RTL8221B_PHYCR1_ALDPS_EN | RTL8221B_PHYCR1_ALDPS_XTAL_OFF_EN); -+ -+ phy_write_mmd(phydev, RTL8221B_MMD_SERDES_CTRL, RTL8221B_PHYCR1, val); -+ -+ return 0; -+} -+ - static int rtlgen_resume(struct phy_device *phydev) - { - int ret = genphy_resume(phydev); -@@ -1091,6 +1114,7 @@ static struct phy_driver realtek_drvs[] - .name = "RTL8226-CG 2.5Gbps PHY", - .get_features = rtl822x_get_features, - .config_aneg = rtl822x_config_aneg, -+ .probe = rtl822x_probe, - .read_status = rtl822x_read_status, - .suspend = genphy_suspend, - .resume = rtlgen_resume, -@@ -1102,6 +1126,7 @@ static struct phy_driver realtek_drvs[] - .name = "RTL8226B-CG_RTL8221B-CG 2.5Gbps PHY", - .get_features = rtl822x_get_features, - .config_aneg = rtl822x_config_aneg, -+ .probe = rtl822x_probe, - .read_status = rtl822x_read_status, - .suspend = genphy_suspend, - .resume = rtlgen_resume, -@@ -1114,6 +1139,7 @@ static struct phy_driver realtek_drvs[] - .get_features = rtl822x_get_features, - .config_init = rtl8221b_config_init, - .config_aneg = rtl822x_config_aneg, -+ .probe = rtl822x_probe, - .read_status = rtl822x_read_status, - .suspend = genphy_suspend, - .resume = rtlgen_resume, -@@ -1126,6 +1152,7 @@ static struct phy_driver realtek_drvs[] - .get_features = rtl822x_get_features, - .config_aneg = rtl822x_config_aneg, - .config_init = rtl8221b_config_init, -+ .probe = rtl822x_probe, - .read_status = rtl822x_read_status, - .suspend = genphy_suspend, - .resume = rtlgen_resume, diff --git a/6.10/target/linux/generic/pending-6.10/730-net-phy-realtek-detect-early-version-of-RTL8221B.patch b/6.10/target/linux/generic/pending-6.10/730-net-phy-realtek-detect-early-version-of-RTL8221B.patch deleted file mode 100644 index 0e9affd1..00000000 --- a/6.10/target/linux/generic/pending-6.10/730-net-phy-realtek-detect-early-version-of-RTL8221B.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 0de82310d2b32e78ff79d42c08b1122a6ede3778 Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Sun, 30 Apr 2023 00:15:41 +0100 -Subject: [PATCH] net: phy: realtek: detect early version of RTL8221B - -Early versions (?) of the RTL8221B PHY cannot be identified in a regular -Clause-45 bus scan as the PHY doesn't report the implemented MMDs -correctly but returns 0 instead. -Implement custom identify function using the PKGID instead of iterating -over the implemented MMDs. - -Signed-off-by: Daniel Golle - ---- a/drivers/net/phy/realtek.c -+++ b/drivers/net/phy/realtek.c -@@ -81,6 +81,7 @@ - - #define RTL_GENERIC_PHYID 0x001cc800 - #define RTL_8211FVD_PHYID 0x001cc878 -+#define RTL_8221B_VB_CG_PHYID 0x001cc849 - - MODULE_DESCRIPTION("Realtek PHY driver"); - MODULE_AUTHOR("Johnson Leung"); -@@ -782,6 +783,38 @@ static int rtl8226_match_phy_device(stru - rtlgen_supports_2_5gbps(phydev); - } - -+static int rtl8221b_vb_cg_match_phy_device(struct phy_device *phydev) -+{ -+ int val; -+ u32 id; -+ -+ if (phydev->mdio.bus->read_c45) { -+ val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PKGID1); -+ if (val < 0) -+ return 0; -+ -+ id = val << 16; -+ val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PKGID2); -+ if (val < 0) -+ return 0; -+ -+ id |= val; -+ } else { -+ val = phy_read(phydev, MII_PHYSID1); -+ if (val < 0) -+ return 0; -+ -+ id = val << 16; -+ val = phy_read(phydev, MII_PHYSID2); -+ if (val < 0) -+ return 0; -+ -+ id |= val; -+ } -+ -+ return (id == RTL_8221B_VB_CG_PHYID); -+} -+ - static int rtl822x_probe(struct phy_device *phydev) - { - struct device *dev = &phydev->mdio.dev; -@@ -1134,7 +1167,7 @@ static struct phy_driver realtek_drvs[] - .write_page = rtl821x_write_page, - .soft_reset = genphy_soft_reset, - }, { -- PHY_ID_MATCH_EXACT(0x001cc849), -+ .match_phy_device = rtl8221b_vb_cg_match_phy_device, - .name = "RTL8221B-VB-CG 2.5Gbps PHY", - .get_features = rtl822x_get_features, - .config_init = rtl8221b_config_init, diff --git a/6.10/target/linux/generic/pending-6.10/739-02-phy-add-driver-for-MediaTek-XFI-T-PHY.patch b/6.10/target/linux/generic/pending-6.10/739-02-phy-add-driver-for-MediaTek-XFI-T-PHY.patch deleted file mode 100644 index 1aa36fcd..00000000 --- a/6.10/target/linux/generic/pending-6.10/739-02-phy-add-driver-for-MediaTek-XFI-T-PHY.patch +++ /dev/null @@ -1,498 +0,0 @@ -From patchwork Thu Feb 1 21:53:06 2024 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -X-Patchwork-Submitter: Daniel Golle -X-Patchwork-Id: 13541843 -Date: Thu, 1 Feb 2024 21:53:06 +0000 -From: Daniel Golle -To: Bc-bocun Chen , - Chunfeng Yun , - Vinod Koul , - Kishon Vijay Abraham I , - Rob Herring , - Krzysztof Kozlowski , - Conor Dooley , - Daniel Golle , - Qingfang Deng , - SkyLake Huang , - Matthias Brugger , - AngeloGioacchino Del Regno , - Philipp Zabel , - linux-arm-kernel@lists.infradead.org, - linux-mediatek@lists.infradead.org, linux-phy@lists.infradead.org, - devicetree@vger.kernel.org, linux-kernel@vger.kernel.org, - netdev@vger.kernel.org -Subject: [PATCH 2/2] phy: add driver for MediaTek XFI T-PHY -Message-ID: - -References: - <702afb0c1246d95c90b22e57105304028bdd3083.1706823233.git.daniel@makrotopia.org> -MIME-Version: 1.0 -Content-Disposition: inline -In-Reply-To: - <702afb0c1246d95c90b22e57105304028bdd3083.1706823233.git.daniel@makrotopia.org> -List-Id: Linux Phy Mailing list - -Add driver for MediaTek's XFI T-PHY, 10 Gigabit/s Ethernet SerDes PHY -which can be found in the MT7988 SoC. - -The PHY can operates only in PHY_MODE_ETHERNET, the submode is one of -PHY_INTERFACE_MODE_* corresponding to the supported modes: - - * USXGMII \ - * 10GBase-R }- USXGMII PCS - XGDM \ - * 5GBase-R / \ - }- Ethernet MAC - * 2500Base-X \ / - * 1000Base-X }- LynxI PCS - GDM / - * Cisco SGMII (MAC side) / - -In order to work-around a performance issue present on the first of -two XFI T-PHYs present in MT7988, special tuning is applied which can be -selected by adding the 'mediatek,usxgmii-performance-errata' property to -the device tree node. - -There is no documentation for most registers used for the -analog/tuning part, however, most of the registers have been partially -reverse-engineered from MediaTek's SDK implementation (an opaque -sequence of 32-bit register writes) and descriptions for all relevant -digital registers and bits such as resets and muxes have been supplied -by MediaTek. - -Signed-off-by: Daniel Golle ---- - MAINTAINERS | 1 + - drivers/phy/mediatek/Kconfig | 12 + - drivers/phy/mediatek/Makefile | 1 + - drivers/phy/mediatek/phy-mtk-xfi-tphy.c | 392 ++++++++++++++++++++++++ - 4 files changed, 406 insertions(+) - create mode 100644 drivers/phy/mediatek/phy-mtk-xfi-tphy.c - ---- a/drivers/phy/mediatek/Kconfig -+++ b/drivers/phy/mediatek/Kconfig -@@ -13,6 +13,18 @@ config PHY_MTK_PCIE - callback for PCIe GEN3 port, it supports software efuse - initialization. - -+config PHY_MTK_XFI_TPHY -+ tristate "MediaTek XFI T-PHY Driver" -+ depends on ARCH_MEDIATEK || COMPILE_TEST -+ depends on OF && OF_ADDRESS -+ depends on HAS_IOMEM -+ select GENERIC_PHY -+ help -+ Say 'Y' here to add support for MediaTek XFI T-PHY driver. -+ The driver provides access to the Ethernet SerDes T-PHY supporting -+ 1GE and 2.5GE modes via the LynxI PCS, and 5GE and 10GE modes -+ via the USXGMII PCS found in MediaTek SoCs with 10G Ethernet. -+ - config PHY_MTK_TPHY - tristate "MediaTek T-PHY Driver" - depends on ARCH_MEDIATEK || COMPILE_TEST ---- a/drivers/phy/mediatek/Makefile -+++ b/drivers/phy/mediatek/Makefile -@@ -8,6 +8,7 @@ obj-$(CONFIG_PHY_MTK_PCIE) += phy-mtk-p - obj-$(CONFIG_PHY_MTK_TPHY) += phy-mtk-tphy.o - obj-$(CONFIG_PHY_MTK_UFS) += phy-mtk-ufs.o - obj-$(CONFIG_PHY_MTK_XSPHY) += phy-mtk-xsphy.o -+obj-$(CONFIG_PHY_MTK_XFI_TPHY) += phy-mtk-xfi-tphy.o - - phy-mtk-hdmi-drv-y := phy-mtk-hdmi.o - phy-mtk-hdmi-drv-y += phy-mtk-hdmi-mt2701.o ---- /dev/null -+++ b/drivers/phy/mediatek/phy-mtk-xfi-tphy.c -@@ -0,0 +1,393 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/* MediaTek 10GE SerDes PHY driver -+ * -+ * Copyright (c) 2024 Daniel Golle -+ * Bc-bocun Chen -+ * based on mtk_usxgmii.c found in MediaTek's SDK released under GPL-2.0 -+ * Copyright (c) 2022 MediaTek Inc. -+ * Author: Henry Yen -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define MTK_XFI_TPHY_NUM_CLOCKS 2 -+ -+#define REG_DIG_GLB_70 0x0070 -+#define XTP_PCS_RX_EQ_IN_PROGRESS(x) FIELD_PREP(GENMASK(25, 24), (x)) -+#define XTP_PCS_MODE_MASK GENMASK(17, 16) -+#define XTP_PCS_MODE(x) FIELD_PREP(GENMASK(17, 16), (x)) -+#define XTP_PCS_RST_B BIT(15) -+#define XTP_FRC_PCS_RST_B BIT(14) -+#define XTP_PCS_PWD_SYNC_MASK GENMASK(13, 12) -+#define XTP_PCS_PWD_SYNC(x) FIELD_PREP(XTP_PCS_PWD_SYNC_MASK, (x)) -+#define XTP_PCS_PWD_ASYNC_MASK GENMASK(11, 10) -+#define XTP_PCS_PWD_ASYNC(x) FIELD_PREP(XTP_PCS_PWD_ASYNC_MASK, (x)) -+#define XTP_FRC_PCS_PWD_ASYNC BIT(8) -+#define XTP_PCS_UPDT BIT(4) -+#define XTP_PCS_IN_FR_RG BIT(0) -+ -+#define REG_DIG_GLB_F4 0x00f4 -+#define XFI_DPHY_PCS_SEL BIT(0) -+#define XFI_DPHY_PCS_SEL_SGMII FIELD_PREP(XFI_DPHY_PCS_SEL, 1) -+#define XFI_DPHY_PCS_SEL_USXGMII FIELD_PREP(XFI_DPHY_PCS_SEL, 0) -+#define XFI_DPHY_AD_SGDT_FRC_EN BIT(5) -+ -+#define REG_DIG_LN_TRX_40 0x3040 -+#define XTP_LN_FRC_TX_DATA_EN BIT(29) -+#define XTP_LN_TX_DATA_EN BIT(28) -+ -+#define REG_DIG_LN_TRX_B0 0x30b0 -+#define XTP_LN_FRC_TX_MACCK_EN BIT(5) -+#define XTP_LN_TX_MACCK_EN BIT(4) -+ -+#define REG_ANA_GLB_D0 0x90d0 -+#define XTP_GLB_USXGMII_SEL_MASK GENMASK(3, 1) -+#define XTP_GLB_USXGMII_SEL(x) FIELD_PREP(GENMASK(3, 1), (x)) -+#define XTP_GLB_USXGMII_EN BIT(0) -+ -+struct mtk_xfi_tphy { -+ void __iomem *base; -+ struct device *dev; -+ struct reset_control *reset; -+ struct clk_bulk_data clocks[MTK_XFI_TPHY_NUM_CLOCKS]; -+ bool da_war; -+}; -+ -+static void mtk_xfi_tphy_write(struct mtk_xfi_tphy *xfi_tphy, u16 reg, -+ u32 value) -+{ -+ iowrite32(value, xfi_tphy->base + reg); -+} -+ -+static void mtk_xfi_tphy_rmw(struct mtk_xfi_tphy *xfi_tphy, u16 reg, -+ u32 clr, u32 set) -+{ -+ u32 val; -+ -+ val = ioread32(xfi_tphy->base + reg); -+ val &= ~clr; -+ val |= set; -+ iowrite32(val, xfi_tphy->base + reg); -+} -+ -+static void mtk_xfi_tphy_set(struct mtk_xfi_tphy *xfi_tphy, u16 reg, -+ u32 set) -+{ -+ mtk_xfi_tphy_rmw(xfi_tphy, reg, 0, set); -+} -+ -+static void mtk_xfi_tphy_clear(struct mtk_xfi_tphy *xfi_tphy, u16 reg, -+ u32 clr) -+{ -+ mtk_xfi_tphy_rmw(xfi_tphy, reg, clr, 0); -+} -+ -+static void mtk_xfi_tphy_setup(struct mtk_xfi_tphy *xfi_tphy, -+ phy_interface_t interface) -+{ -+ bool is_2p5g = (interface == PHY_INTERFACE_MODE_2500BASEX); -+ bool is_1g = (interface == PHY_INTERFACE_MODE_1000BASEX || -+ interface == PHY_INTERFACE_MODE_SGMII); -+ bool is_10g = (interface == PHY_INTERFACE_MODE_10GBASER || -+ interface == PHY_INTERFACE_MODE_USXGMII); -+ bool is_5g = (interface == PHY_INTERFACE_MODE_5GBASER); -+ bool is_xgmii = (is_10g || is_5g); -+ -+ dev_dbg(xfi_tphy->dev, "setting up for mode %s\n", phy_modes(interface)); -+ -+ /* Setup PLL setting */ -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x9024, 0x100000, is_10g ? 0x0 : 0x100000); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x2020, 0x202000, is_5g ? 0x202000 : 0x0); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x2030, 0x500, is_1g ? 0x0 : 0x500); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x2034, 0xa00, is_1g ? 0x0 : 0xa00); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x2040, 0x340000, is_1g ? 0x200000 : -+ 0x140000); -+ -+ /* Setup RXFE BW setting */ -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x50f0, 0xc10, is_1g ? 0x410 : -+ is_5g ? 0x800 : 0x400); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x50e0, 0x4000, is_5g ? 0x0 : 0x4000); -+ -+ /* Setup RX CDR setting */ -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x506c, 0x30000, is_5g ? 0x0 : 0x30000); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x5070, 0x670000, is_5g ? 0x620000 : 0x50000); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x5074, 0x180000, is_5g ? 0x180000 : 0x0); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x5078, 0xf000400, is_5g ? 0x8000000 : -+ 0x7000400); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x507c, 0x5000500, is_5g ? 0x4000400 : -+ 0x1000100); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x5080, 0x1410, is_1g ? 0x400 : -+ is_5g ? 0x1010 : 0x0); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x5084, 0x30300, is_1g ? 0x30300 : -+ is_5g ? 0x30100 : -+ 0x100); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x5088, 0x60200, is_1g ? 0x20200 : -+ is_5g ? 0x40000 : -+ 0x20000); -+ -+ /* Setting RXFE adaptation range setting */ -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x50e4, 0xc0000, is_5g ? 0x0 : 0xc0000); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x50e8, 0x40000, is_5g ? 0x0 : 0x40000); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x50ec, 0xa00, is_1g ? 0x200 : 0x800); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x50a8, 0xee0000, is_5g ? 0x800000 : -+ 0x6e0000); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x6004, 0x190000, is_5g ? 0x0 : 0x190000); -+ if (is_10g) -+ mtk_xfi_tphy_write(xfi_tphy, 0x00f8, 0x01423342); -+ else if (is_5g) -+ mtk_xfi_tphy_write(xfi_tphy, 0x00f8, 0x00a132a1); -+ else if (is_2p5g) -+ mtk_xfi_tphy_write(xfi_tphy, 0x00f8, 0x009c329c); -+ else -+ mtk_xfi_tphy_write(xfi_tphy, 0x00f8, 0x00fa32fa); -+ -+ /* Force SGDT_OUT off and select PCS */ -+ mtk_xfi_tphy_rmw(xfi_tphy, REG_DIG_GLB_F4, -+ XFI_DPHY_AD_SGDT_FRC_EN | XFI_DPHY_PCS_SEL, -+ XFI_DPHY_AD_SGDT_FRC_EN | -+ (is_xgmii ? XFI_DPHY_PCS_SEL_USXGMII : -+ XFI_DPHY_PCS_SEL_SGMII)); -+ -+ -+ /* Force GLB_CKDET_OUT */ -+ mtk_xfi_tphy_set(xfi_tphy, 0x0030, 0xc00); -+ -+ /* Force AEQ on */ -+ mtk_xfi_tphy_write(xfi_tphy, REG_DIG_GLB_70, -+ XTP_PCS_RX_EQ_IN_PROGRESS(2) | -+ XTP_PCS_PWD_SYNC(2) | -+ XTP_PCS_PWD_ASYNC(2)); -+ -+ usleep_range(1, 5); -+ writel(XTP_LN_FRC_TX_DATA_EN, xfi_tphy->base + REG_DIG_LN_TRX_40); -+ -+ /* Setup TX DA default value */ -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x30b0, 0x30, 0x20); -+ mtk_xfi_tphy_write(xfi_tphy, 0x3028, 0x00008a01); -+ mtk_xfi_tphy_write(xfi_tphy, 0x302c, 0x0000a884); -+ mtk_xfi_tphy_write(xfi_tphy, 0x3024, 0x00083002); -+ -+ /* Setup RG default value */ -+ if (is_xgmii) { -+ mtk_xfi_tphy_write(xfi_tphy, 0x3010, 0x00022220); -+ mtk_xfi_tphy_write(xfi_tphy, 0x5064, 0x0f020a01); -+ mtk_xfi_tphy_write(xfi_tphy, 0x50b4, 0x06100600); -+ if (interface == PHY_INTERFACE_MODE_USXGMII) -+ mtk_xfi_tphy_write(xfi_tphy, 0x3048, 0x40704000); -+ else -+ mtk_xfi_tphy_write(xfi_tphy, 0x3048, 0x47684100); -+ } else { -+ mtk_xfi_tphy_write(xfi_tphy, 0x3010, 0x00011110); -+ mtk_xfi_tphy_write(xfi_tphy, 0x3048, 0x40704000); -+ } -+ -+ if (is_1g) -+ mtk_xfi_tphy_write(xfi_tphy, 0x3064, 0x0000c000); -+ -+ /* Setup RX EQ initial value */ -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x3050, 0xa8000000, -+ (interface != PHY_INTERFACE_MODE_10GBASER) ? -+ 0xa8000000 : 0x0); -+ mtk_xfi_tphy_rmw(xfi_tphy, 0x3054, 0xaa, -+ (interface != PHY_INTERFACE_MODE_10GBASER) ? -+ 0xaa : 0x0); -+ -+ if (is_xgmii) -+ mtk_xfi_tphy_write(xfi_tphy, 0x306c, 0x00000f00); -+ else if (is_2p5g) -+ mtk_xfi_tphy_write(xfi_tphy, 0x306c, 0x22000f00); -+ else -+ mtk_xfi_tphy_write(xfi_tphy, 0x306c, 0x20200f00); -+ -+ if (interface == PHY_INTERFACE_MODE_10GBASER && xfi_tphy->da_war) -+ mtk_xfi_tphy_rmw(xfi_tphy, 0xa008, 0x10000, 0x10000); -+ -+ mtk_xfi_tphy_rmw(xfi_tphy, 0xa060, 0x50000, is_xgmii ? 0x40000 : -+ 0x50000); -+ -+ /* Setup PHYA speed */ -+ mtk_xfi_tphy_rmw(xfi_tphy, REG_ANA_GLB_D0, -+ XTP_GLB_USXGMII_SEL_MASK | XTP_GLB_USXGMII_EN, -+ is_10g ? XTP_GLB_USXGMII_SEL(0) : -+ is_5g ? XTP_GLB_USXGMII_SEL(1) : -+ is_2p5g ? XTP_GLB_USXGMII_SEL(2) : -+ XTP_GLB_USXGMII_SEL(3)); -+ mtk_xfi_tphy_set(xfi_tphy, REG_ANA_GLB_D0, XTP_GLB_USXGMII_EN); -+ -+ /* Release reset */ -+ mtk_xfi_tphy_set(xfi_tphy, REG_DIG_GLB_70, -+ XTP_PCS_RST_B | XTP_FRC_PCS_RST_B); -+ usleep_range(150, 500); -+ -+ /* Switch to P0 */ -+ mtk_xfi_tphy_rmw(xfi_tphy, REG_DIG_GLB_70, -+ XTP_PCS_PWD_SYNC_MASK | -+ XTP_PCS_PWD_ASYNC_MASK, -+ XTP_FRC_PCS_PWD_ASYNC | -+ XTP_PCS_UPDT | XTP_PCS_IN_FR_RG); -+ usleep_range(1, 5); -+ -+ mtk_xfi_tphy_clear(xfi_tphy, REG_DIG_GLB_70, XTP_PCS_UPDT); -+ usleep_range(15, 50); -+ -+ if (is_xgmii) { -+ /* Switch to Gen3 */ -+ mtk_xfi_tphy_rmw(xfi_tphy, REG_DIG_GLB_70, -+ XTP_PCS_MODE_MASK | XTP_PCS_UPDT, -+ XTP_PCS_MODE(2) | XTP_PCS_UPDT); -+ } else { -+ /* Switch to Gen2 */ -+ mtk_xfi_tphy_rmw(xfi_tphy, REG_DIG_GLB_70, -+ XTP_PCS_MODE_MASK | XTP_PCS_UPDT, -+ XTP_PCS_MODE(1) | XTP_PCS_UPDT); -+ } -+ usleep_range(1, 5); -+ -+ mtk_xfi_tphy_clear(xfi_tphy, REG_DIG_GLB_70, XTP_PCS_UPDT); -+ -+ usleep_range(100, 500); -+ -+ /* Enable MAC CK */ -+ mtk_xfi_tphy_set(xfi_tphy, REG_DIG_LN_TRX_B0, XTP_LN_TX_MACCK_EN); -+ mtk_xfi_tphy_clear(xfi_tphy, REG_DIG_GLB_F4, XFI_DPHY_AD_SGDT_FRC_EN); -+ -+ /* Enable TX data */ -+ mtk_xfi_tphy_set(xfi_tphy, REG_DIG_LN_TRX_40, -+ XTP_LN_FRC_TX_DATA_EN | XTP_LN_TX_DATA_EN); -+ usleep_range(400, 1000); -+} -+ -+static int mtk_xfi_tphy_set_mode(struct phy *phy, enum phy_mode mode, int -+ submode) -+{ -+ struct mtk_xfi_tphy *xfi_tphy = phy_get_drvdata(phy); -+ -+ if (mode != PHY_MODE_ETHERNET) -+ return -EINVAL; -+ -+ switch (submode) { -+ case PHY_INTERFACE_MODE_1000BASEX: -+ case PHY_INTERFACE_MODE_2500BASEX: -+ case PHY_INTERFACE_MODE_SGMII: -+ case PHY_INTERFACE_MODE_5GBASER: -+ case PHY_INTERFACE_MODE_10GBASER: -+ case PHY_INTERFACE_MODE_USXGMII: -+ mtk_xfi_tphy_setup(xfi_tphy, submode); -+ return 0; -+ default: -+ return -EINVAL; -+ } -+} -+ -+static int mtk_xfi_tphy_reset(struct phy *phy) -+{ -+ struct mtk_xfi_tphy *xfi_tphy = phy_get_drvdata(phy); -+ -+ reset_control_assert(xfi_tphy->reset); -+ usleep_range(100, 500); -+ reset_control_deassert(xfi_tphy->reset); -+ usleep_range(1, 10); -+ -+ return 0; -+} -+ -+static int mtk_xfi_tphy_power_on(struct phy *phy) -+{ -+ struct mtk_xfi_tphy *xfi_tphy = phy_get_drvdata(phy); -+ -+ return clk_bulk_prepare_enable(MTK_XFI_TPHY_NUM_CLOCKS, xfi_tphy->clocks); -+} -+ -+static int mtk_xfi_tphy_power_off(struct phy *phy) -+{ -+ struct mtk_xfi_tphy *xfi_tphy = phy_get_drvdata(phy); -+ -+ clk_bulk_disable_unprepare(MTK_XFI_TPHY_NUM_CLOCKS, xfi_tphy->clocks); -+ -+ return 0; -+} -+ -+static const struct phy_ops mtk_xfi_tphy_ops = { -+ .power_on = mtk_xfi_tphy_power_on, -+ .power_off = mtk_xfi_tphy_power_off, -+ .set_mode = mtk_xfi_tphy_set_mode, -+ .reset = mtk_xfi_tphy_reset, -+ .owner = THIS_MODULE, -+}; -+ -+static int mtk_xfi_tphy_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ struct phy_provider *phy_provider; -+ struct mtk_xfi_tphy *xfi_tphy; -+ struct phy *phy; -+ -+ if (!np) -+ return -ENODEV; -+ -+ xfi_tphy = devm_kzalloc(&pdev->dev, sizeof(*xfi_tphy), GFP_KERNEL); -+ if (!xfi_tphy) -+ return -ENOMEM; -+ -+ xfi_tphy->base = devm_of_iomap(&pdev->dev, np, 0, NULL); -+ if (!xfi_tphy->base) -+ return -EIO; -+ -+ xfi_tphy->dev = &pdev->dev; -+ -+ xfi_tphy->clocks[0].id = "topxtal"; -+ xfi_tphy->clocks[0].clk = devm_clk_get(&pdev->dev, xfi_tphy->clocks[0].id); -+ if (IS_ERR(xfi_tphy->clocks[0].clk)) -+ return PTR_ERR(xfi_tphy->clocks[0].clk); -+ -+ xfi_tphy->clocks[1].id = "xfipll"; -+ xfi_tphy->clocks[1].clk = devm_clk_get(&pdev->dev, xfi_tphy->clocks[1].id); -+ if (IS_ERR(xfi_tphy->clocks[1].clk)) -+ return PTR_ERR(xfi_tphy->clocks[1].clk); -+ -+ xfi_tphy->reset = devm_reset_control_get_exclusive(&pdev->dev, NULL); -+ if (IS_ERR(xfi_tphy->reset)) -+ return PTR_ERR(xfi_tphy->reset); -+ -+ xfi_tphy->da_war = of_property_read_bool(np, -+ "mediatek,usxgmii-performance-errata"); -+ -+ phy = devm_phy_create(&pdev->dev, NULL, &mtk_xfi_tphy_ops); -+ if (IS_ERR(phy)) -+ return PTR_ERR(phy); -+ -+ phy_set_drvdata(phy, xfi_tphy); -+ -+ phy_provider = devm_of_phy_provider_register(&pdev->dev, -+ of_phy_simple_xlate); -+ -+ return PTR_ERR_OR_ZERO(phy_provider); -+} -+ -+static const struct of_device_id mtk_xfi_tphy_match[] = { -+ { .compatible = "mediatek,mt7988-xfi-tphy", }, -+ { } -+}; -+MODULE_DEVICE_TABLE(of, mtk_xfi_tphy_match); -+ -+static struct platform_driver mtk_xfi_tphy_driver = { -+ .probe = mtk_xfi_tphy_probe, -+ .driver = { -+ .name = "mtk-xfi-tphy", -+ .of_match_table = mtk_xfi_tphy_match, -+ }, -+}; -+module_platform_driver(mtk_xfi_tphy_driver); -+ -+MODULE_DESCRIPTION("MediaTek XFI T-PHY driver"); -+MODULE_AUTHOR("Daniel Golle "); -+MODULE_AUTHOR("Bc-bocun Chen "); -+MODULE_LICENSE("GPL"); diff --git a/6.10/target/linux/generic/pending-6.10/739-03-net-pcs-pcs-mtk-lynxi-add-platform-driver-for-MT7988.patch b/6.10/target/linux/generic/pending-6.10/739-03-net-pcs-pcs-mtk-lynxi-add-platform-driver-for-MT7988.patch deleted file mode 100644 index b67c8a0e..00000000 --- a/6.10/target/linux/generic/pending-6.10/739-03-net-pcs-pcs-mtk-lynxi-add-platform-driver-for-MT7988.patch +++ /dev/null @@ -1,371 +0,0 @@ -From 4b1a2716299c0e96a698044aebf3f80513509ae7 Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Tue, 12 Dec 2023 03:47:18 +0000 -Subject: [PATCH 3/5] net: pcs: pcs-mtk-lynxi: add platform driver for MT7988 - -Introduce a proper platform MFD driver for the LynxI (H)SGMII PCS which -is going to initially be used for the MT7988 SoC. - -Signed-off-by: Daniel Golle ---- - drivers/net/pcs/pcs-mtk-lynxi.c | 227 ++++++++++++++++++++++++++++-- - include/linux/pcs/pcs-mtk-lynxi.h | 11 ++ - 2 files changed, 227 insertions(+), 11 deletions(-) - ---- a/drivers/net/pcs/pcs-mtk-lynxi.c -+++ b/drivers/net/pcs/pcs-mtk-lynxi.c -@@ -1,6 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0 - // Copyright (c) 2018-2019 MediaTek Inc. --/* A library for MediaTek SGMII circuit -+/* A library and platform driver for the MediaTek LynxI SGMII circuit - * - * Author: Sean Wang - * Author: Alexander Couzens -@@ -8,11 +8,17 @@ - * - */ - -+#include - #include -+#include -+#include - #include -+#include - #include - #include -+#include - #include -+#include - - /* SGMII subsystem config registers */ - /* BMCR (low 16) BMSR (high 16) */ -@@ -65,6 +71,8 @@ - #define SGMII_PN_SWAP_MASK GENMASK(1, 0) - #define SGMII_PN_SWAP_TX_RX (BIT(0) | BIT(1)) - -+#define MTK_NETSYS_V3_AMA_RGC3 0x128 -+ - /* struct mtk_pcs_lynxi - This structure holds each sgmii regmap andassociated - * data - * @regmap: The register map pointing at the range used to setup -@@ -74,15 +82,29 @@ - * @interface: Currently configured interface mode - * @pcs: Phylink PCS structure - * @flags: Flags indicating hardware properties -+ * @rstc: Reset controller -+ * @sgmii_sel: SGMII Register Clock -+ * @sgmii_rx: SGMII RX Clock -+ * @sgmii_tx: SGMII TX Clock -+ * @node: List node - */ - struct mtk_pcs_lynxi { - struct regmap *regmap; -+ struct device *dev; - u32 ana_rgc3; - phy_interface_t interface; - struct phylink_pcs pcs; - u32 flags; -+ struct reset_control *rstc; -+ struct clk *sgmii_sel; -+ struct clk *sgmii_rx; -+ struct clk *sgmii_tx; -+ struct list_head node; - }; - -+static LIST_HEAD(mtk_pcs_lynxi_instances); -+static DEFINE_MUTEX(instance_mutex); -+ - static struct mtk_pcs_lynxi *pcs_to_mtk_pcs_lynxi(struct phylink_pcs *pcs) - { - return container_of(pcs, struct mtk_pcs_lynxi, pcs); -@@ -102,6 +124,17 @@ static void mtk_pcs_lynxi_get_state(stru - FIELD_GET(SGMII_LPA, adv)); - } - -+static void mtk_sgmii_reset(struct mtk_pcs_lynxi *mpcs) -+{ -+ if (!mpcs->rstc) -+ return; -+ -+ reset_control_assert(mpcs->rstc); -+ udelay(100); -+ reset_control_deassert(mpcs->rstc); -+ mdelay(1); -+} -+ - static int mtk_pcs_lynxi_config(struct phylink_pcs *pcs, unsigned int neg_mode, - phy_interface_t interface, - const unsigned long *advertising, -@@ -147,6 +180,7 @@ static int mtk_pcs_lynxi_config(struct p - SGMII_PHYA_PWD); - - /* Reset SGMII PCS state */ -+ mtk_sgmii_reset(mpcs); - regmap_set_bits(mpcs->regmap, SGMSYS_RESERVED_0, - SGMII_SW_RESET); - -@@ -233,10 +267,29 @@ static void mtk_pcs_lynxi_link_up(struct - } - } - -+static int mtk_pcs_lynxi_enable(struct phylink_pcs *pcs) -+{ -+ struct mtk_pcs_lynxi *mpcs = pcs_to_mtk_pcs_lynxi(pcs); -+ -+ if (mpcs->sgmii_tx && mpcs->sgmii_rx) { -+ clk_prepare_enable(mpcs->sgmii_rx); -+ clk_prepare_enable(mpcs->sgmii_tx); -+ } -+ -+ return 0; -+} -+ - static void mtk_pcs_lynxi_disable(struct phylink_pcs *pcs) - { - struct mtk_pcs_lynxi *mpcs = pcs_to_mtk_pcs_lynxi(pcs); - -+ regmap_set_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL, SGMII_PHYA_PWD); -+ -+ if (mpcs->sgmii_tx && mpcs->sgmii_rx) { -+ clk_disable_unprepare(mpcs->sgmii_tx); -+ clk_disable_unprepare(mpcs->sgmii_rx); -+ } -+ - mpcs->interface = PHY_INTERFACE_MODE_NA; - } - -@@ -246,11 +299,12 @@ static const struct phylink_pcs_ops mtk_ - .pcs_an_restart = mtk_pcs_lynxi_restart_an, - .pcs_link_up = mtk_pcs_lynxi_link_up, - .pcs_disable = mtk_pcs_lynxi_disable, -+ .pcs_enable = mtk_pcs_lynxi_enable, - }; - --struct phylink_pcs *mtk_pcs_lynxi_create(struct device *dev, -- struct regmap *regmap, u32 ana_rgc3, -- u32 flags) -+static struct phylink_pcs *mtk_pcs_lynxi_init(struct device *dev, struct regmap *regmap, -+ u32 ana_rgc3, u32 flags, -+ struct mtk_pcs_lynxi *prealloc) - { - struct mtk_pcs_lynxi *mpcs; - u32 id, ver; -@@ -258,29 +312,33 @@ struct phylink_pcs *mtk_pcs_lynxi_create - - ret = regmap_read(regmap, SGMSYS_PCS_DEVICE_ID, &id); - if (ret < 0) -- return NULL; -+ return ERR_PTR(ret); - - if (id != SGMII_LYNXI_DEV_ID) { - dev_err(dev, "unknown PCS device id %08x\n", id); -- return NULL; -+ return ERR_PTR(-ENODEV); - } - - ret = regmap_read(regmap, SGMSYS_PCS_SCRATCH, &ver); - if (ret < 0) -- return NULL; -+ return ERR_PTR(ret); - - ver = FIELD_GET(SGMII_DEV_VERSION, ver); - if (ver != 0x1) { - dev_err(dev, "unknown PCS device version %04x\n", ver); -- return NULL; -+ return ERR_PTR(-ENODEV); - } - - dev_dbg(dev, "MediaTek LynxI SGMII PCS (id 0x%08x, ver 0x%04x)\n", id, - ver); - -- mpcs = kzalloc(sizeof(*mpcs), GFP_KERNEL); -- if (!mpcs) -- return NULL; -+ if (prealloc) { -+ mpcs = prealloc; -+ } else { -+ mpcs = kzalloc(sizeof(*mpcs), GFP_KERNEL); -+ if (!mpcs) -+ return ERR_PTR(-ENOMEM); -+ }; - - mpcs->ana_rgc3 = ana_rgc3; - mpcs->regmap = regmap; -@@ -291,6 +349,13 @@ struct phylink_pcs *mtk_pcs_lynxi_create - mpcs->interface = PHY_INTERFACE_MODE_NA; - - return &mpcs->pcs; -+}; -+ -+struct phylink_pcs *mtk_pcs_lynxi_create(struct device *dev, -+ struct regmap *regmap, u32 ana_rgc3, -+ u32 flags) -+{ -+ return mtk_pcs_lynxi_init(dev, regmap, ana_rgc3, flags, NULL); - } - EXPORT_SYMBOL(mtk_pcs_lynxi_create); - -@@ -303,4 +368,144 @@ void mtk_pcs_lynxi_destroy(struct phylin - } - EXPORT_SYMBOL(mtk_pcs_lynxi_destroy); - -+static int mtk_pcs_lynxi_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = dev->of_node; -+ struct mtk_pcs_lynxi *mpcs; -+ struct phylink_pcs *pcs; -+ struct regmap *regmap; -+ u32 flags = 0; -+ -+ mpcs = devm_kzalloc(dev, sizeof(*mpcs), GFP_KERNEL); -+ if (!mpcs) -+ return -ENOMEM; -+ -+ mpcs->dev = dev; -+ regmap = syscon_node_to_regmap(np->parent); -+ if (IS_ERR(regmap)) -+ return PTR_ERR(regmap); -+ -+ if (of_property_read_bool(np->parent, "mediatek,pnswap")) -+ flags |= MTK_SGMII_FLAG_PN_SWAP; -+ -+ mpcs->rstc = of_reset_control_get_shared(np->parent, NULL); -+ if (IS_ERR(mpcs->rstc)) -+ return PTR_ERR(mpcs->rstc); -+ -+ reset_control_deassert(mpcs->rstc); -+ mpcs->sgmii_sel = devm_clk_get_enabled(dev, "sgmii_sel"); -+ if (IS_ERR(mpcs->sgmii_sel)) -+ return PTR_ERR(mpcs->sgmii_sel); -+ -+ mpcs->sgmii_rx = devm_clk_get(dev, "sgmii_rx"); -+ if (IS_ERR(mpcs->sgmii_rx)) -+ return PTR_ERR(mpcs->sgmii_rx); -+ -+ mpcs->sgmii_tx = devm_clk_get(dev, "sgmii_tx"); -+ if (IS_ERR(mpcs->sgmii_tx)) -+ return PTR_ERR(mpcs->sgmii_tx); -+ -+ pcs = mtk_pcs_lynxi_init(dev, regmap, (uintptr_t)of_device_get_match_data(dev), -+ flags, mpcs); -+ if (IS_ERR(pcs)) -+ return PTR_ERR(pcs); -+ -+ regmap_set_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL, SGMII_PHYA_PWD); -+ -+ platform_set_drvdata(pdev, mpcs); -+ -+ mutex_lock(&instance_mutex); -+ list_add_tail(&mpcs->node, &mtk_pcs_lynxi_instances); -+ mutex_unlock(&instance_mutex); -+ -+ return 0; -+} -+ -+static int mtk_pcs_lynxi_remove(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct mtk_pcs_lynxi *cur, *tmp; -+ -+ mutex_lock(&instance_mutex); -+ list_for_each_entry_safe(cur, tmp, &mtk_pcs_lynxi_instances, node) -+ if (cur->dev == dev) { -+ list_del(&cur->node); -+ kfree(cur); -+ break; -+ } -+ mutex_unlock(&instance_mutex); -+ -+ return 0; -+} -+ -+static const struct of_device_id mtk_pcs_lynxi_of_match[] = { -+ { .compatible = "mediatek,mt7988-sgmii", .data = (void *)MTK_NETSYS_V3_AMA_RGC3 }, -+ { /* sentinel */ }, -+}; -+MODULE_DEVICE_TABLE(of, mtk_pcs_lynxi_of_match); -+ -+struct phylink_pcs *mtk_pcs_lynxi_get(struct device *dev, struct device_node *np) -+{ -+ struct platform_device *pdev; -+ struct mtk_pcs_lynxi *mpcs; -+ -+ if (!np) -+ return NULL; -+ -+ if (!of_device_is_available(np)) -+ return ERR_PTR(-ENODEV); -+ -+ if (!of_match_node(mtk_pcs_lynxi_of_match, np)) -+ return ERR_PTR(-EINVAL); -+ -+ pdev = of_find_device_by_node(np); -+ if (!pdev || !platform_get_drvdata(pdev)) { -+ if (pdev) -+ put_device(&pdev->dev); -+ return ERR_PTR(-EPROBE_DEFER); -+ } -+ -+ mpcs = platform_get_drvdata(pdev); -+ device_link_add(dev, mpcs->dev, DL_FLAG_AUTOREMOVE_CONSUMER); -+ -+ return &mpcs->pcs; -+} -+EXPORT_SYMBOL(mtk_pcs_lynxi_get); -+ -+void mtk_pcs_lynxi_put(struct phylink_pcs *pcs) -+{ -+ struct mtk_pcs_lynxi *cur, *mpcs = NULL; -+ -+ if (!pcs) -+ return; -+ -+ mutex_lock(&instance_mutex); -+ list_for_each_entry(cur, &mtk_pcs_lynxi_instances, node) -+ if (pcs == &cur->pcs) { -+ mpcs = cur; -+ break; -+ } -+ mutex_unlock(&instance_mutex); -+ -+ if (WARN_ON(!mpcs)) -+ return; -+ -+ put_device(mpcs->dev); -+} -+EXPORT_SYMBOL(mtk_pcs_lynxi_put); -+ -+static struct platform_driver mtk_pcs_lynxi_driver = { -+ .driver = { -+ .name = "mtk-pcs-lynxi", -+ .suppress_bind_attrs = true, -+ .of_match_table = mtk_pcs_lynxi_of_match, -+ }, -+ .probe = mtk_pcs_lynxi_probe, -+ .remove = mtk_pcs_lynxi_remove, -+}; -+module_platform_driver(mtk_pcs_lynxi_driver); -+ -+MODULE_AUTHOR("Daniel Golle "); -+MODULE_DESCRIPTION("MediaTek LynxI HSGMII PCS"); - MODULE_LICENSE("GPL"); ---- a/include/linux/pcs/pcs-mtk-lynxi.h -+++ b/include/linux/pcs/pcs-mtk-lynxi.h -@@ -10,4 +10,15 @@ struct phylink_pcs *mtk_pcs_lynxi_create - struct regmap *regmap, - u32 ana_rgc3, u32 flags); - void mtk_pcs_lynxi_destroy(struct phylink_pcs *pcs); -+ -+#if IS_ENABLED(CONFIG_PCS_MTK_LYNXI) -+struct phylink_pcs *mtk_pcs_lynxi_get(struct device *dev, struct device_node *np); -+void mtk_pcs_lynxi_put(struct phylink_pcs *pcs); -+#else -+static inline struct phylink_pcs *mtk_pcs_lynxi_get(struct device *dev, struct device_node *np) -+{ -+ return NULL; -+} -+static inline void mtk_pcs_lynxi_put(struct phylink_pcs *pcs) { } -+#endif /* IS_ENABLED(CONFIG_PCS_MTK_LYNXI) */ - #endif diff --git a/6.10/target/linux/generic/pending-6.10/741-net-phy-realtek-support-interrupt-of-RTL8221B.patch b/6.10/target/linux/generic/pending-6.10/741-net-phy-realtek-support-interrupt-of-RTL8221B.patch deleted file mode 100644 index 726f66cf..00000000 --- a/6.10/target/linux/generic/pending-6.10/741-net-phy-realtek-support-interrupt-of-RTL8221B.patch +++ /dev/null @@ -1,75 +0,0 @@ -From d7943c31d57c11e1a517aa3ce2006fca44866870 Mon Sep 17 00:00:00 2001 -From: Jianhui Zhao -Date: Sun, 24 Sep 2023 22:15:00 +0800 -Subject: [PATCH] net: phy: realtek: add interrupt support for RTL8221B - -This commit introduces interrupt support for RTL8221B. - -Signed-off-by: Jianhui Zhao ---- - drivers/net/phy/realtek.c | 47 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 47 insertions(+) - ---- a/drivers/net/phy/realtek.c -+++ b/drivers/net/phy/realtek.c -@@ -1010,6 +1010,51 @@ static int rtl8221b_config_init(struct p - return 0; - } - -+static int rtl8221b_ack_interrupt(struct phy_device *phydev) -+{ -+ int err; -+ -+ err = phy_read_mmd(phydev, RTL8221B_MMD_PHY_CTRL, 0xa4d4); -+ -+ return (err < 0) ? err : 0; -+} -+ -+static int rtl8221b_config_intr(struct phy_device *phydev) -+{ -+ int err; -+ -+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { -+ err = rtl8221b_ack_interrupt(phydev); -+ if (err) -+ return err; -+ -+ err = phy_write_mmd(phydev, RTL8221B_MMD_PHY_CTRL, 0xa4d2, 0x7ff); -+ } else { -+ err = phy_write_mmd(phydev, RTL8221B_MMD_PHY_CTRL, 0xa4d2, 0x0); -+ if (err) -+ return err; -+ -+ err = rtl8221b_ack_interrupt(phydev); -+ } -+ -+ return err; -+} -+ -+static irqreturn_t rtl8221b_handle_interrupt(struct phy_device *phydev) -+{ -+ int err; -+ -+ err = rtl8221b_ack_interrupt(phydev); -+ if (err) { -+ phy_error(phydev); -+ return IRQ_NONE; -+ } -+ -+ phy_trigger_machine(phydev); -+ -+ return IRQ_HANDLED; -+} -+ - static struct phy_driver realtek_drvs[] = { - { - PHY_ID_MATCH_EXACT(0x00008201), -@@ -1172,6 +1217,8 @@ static struct phy_driver realtek_drvs[] - .get_features = rtl822x_get_features, - .config_init = rtl8221b_config_init, - .config_aneg = rtl822x_config_aneg, -+ .config_intr = rtl8221b_config_intr, -+ .handle_interrupt = rtl8221b_handle_interrupt, - .probe = rtl822x_probe, - .read_status = rtl822x_read_status, - .suspend = genphy_suspend, diff --git a/6.10/target/linux/generic/pending-6.10/743-net-phy-aquantia-add-support-for-PHY-LEDs.patch b/6.10/target/linux/generic/pending-6.10/743-net-phy-aquantia-add-support-for-PHY-LEDs.patch deleted file mode 100644 index ca3a2b5c..00000000 --- a/6.10/target/linux/generic/pending-6.10/743-net-phy-aquantia-add-support-for-PHY-LEDs.patch +++ /dev/null @@ -1,368 +0,0 @@ -From c6a1759365fc35463138a7d9e335ee53f384b8df Mon Sep 17 00:00:00 2001 -From: Daniel Golle -Date: Fri, 10 May 2024 02:53:52 +0100 -Subject: [PATCH] net: phy: aquantia: add support for PHY LEDs - -Aquantia Ethernet PHYs got 3 LED output pins which are typically used -to indicate link status and activity. -Add a minimal LED controller driver supporting the most common uses -with the 'netdev' trigger as well as software-driven forced control of -the LEDs. - -Signed-off-by: Daniel Golle ---- - drivers/net/phy/aquantia/Makefile | 3 + - drivers/net/phy/aquantia/aquantia.h | 84 +++++++++++++ - drivers/net/phy/aquantia/aquantia_leds.c | 152 +++++++++++++++++++++++ - drivers/net/phy/aquantia/aquantia_main.c | 127 +++++++++++++------ - 4 files changed, 329 insertions(+), 37 deletions(-) - create mode 100644 drivers/net/phy/aquantia/aquantia_leds.c - ---- a/drivers/net/phy/aquantia/Makefile -+++ b/drivers/net/phy/aquantia/Makefile -@@ -3,4 +3,7 @@ aquantia-objs += aquantia_main.o aquan - ifdef CONFIG_HWMON - aquantia-objs += aquantia_hwmon.o - endif -+ifdef CONFIG_PHYLIB_LEDS -+aquantia-objs += aquantia_leds.o -+endif - obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o ---- a/drivers/net/phy/aquantia/aquantia.h -+++ b/drivers/net/phy/aquantia/aquantia.h -@@ -62,6 +62,26 @@ - #define VEND1_THERMAL_PROV_LOW_TEMP_FAIL 0xc422 - #define VEND1_THERMAL_PROV_HIGH_TEMP_WARN 0xc423 - #define VEND1_THERMAL_PROV_LOW_TEMP_WARN 0xc424 -+ -+#define AQR_NUM_LEDS 3 -+ -+#define VEND1_GLOBAL_LED_PROV 0xc430 -+#define AQR_LED_PROV(x) (VEND1_GLOBAL_LED_PROV + x) -+#define VEND1_GLOBAL_LED_PROV_ACT_STRETCH GENMASK(0, 1) -+#define VEND1_GLOBAL_LED_PROV_TX_ACT BIT(2) -+#define VEND1_GLOBAL_LED_PROV_RX_ACT BIT(3) -+#define VEND1_GLOBAL_LED_PROV_LINK_MASK (GENMASK(15, 14) | GENMASK(8, 5)) -+#define VEND1_GLOBAL_LED_PROV_LINK100 BIT(5) -+#define VEND1_GLOBAL_LED_PROV_LINK1000 BIT(6) -+#define VEND1_GLOBAL_LED_PROV_LINK10000 BIT(7) -+#define VEND1_GLOBAL_LED_PROV_FORCE_ON BIT(8) -+#define VEND1_GLOBAL_LED_PROV_LINK2500 BIT(14) -+#define VEND1_GLOBAL_LED_PROV_LINK5000 BIT(15) -+ -+#define VEND1_GLOBAL_LED_DRIVE 0xc438 -+#define VEND1_GLOBAL_LED_DRIVE_VDD BIT(1) -+#define AQR_LED_DRIVE(x) (VEND1_GLOBAL_LED_DRIVE + x) -+ - #define VEND1_THERMAL_STAT1 0xc820 - #define VEND1_THERMAL_STAT2 0xc821 - #define VEND1_THERMAL_STAT2_VALID BIT(0) -@@ -115,3 +135,23 @@ static inline int aqr_hwmon_probe(struct - #endif - - int aqr_firmware_load(struct phy_device *phydev); -+ -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+int aqr_phy_led_blink_set(struct phy_device *phydev, u8 index, -+ unsigned long *delay_on, -+ unsigned long *delay_off); -+ -+int aqr_phy_led_brightness_set(struct phy_device *phydev, -+ u8 index, enum led_brightness value); -+ -+int aqr_phy_led_hw_is_supported(struct phy_device *phydev, u8 index, -+ unsigned long rules); -+ -+int aqr_phy_led_hw_control_get(struct phy_device *phydev, u8 index, -+ unsigned long *rules); -+ -+int aqr_phy_led_hw_control_set(struct phy_device *phydev, u8 index, -+ unsigned long rules); -+ -+int aqr_phy_led_polarity_set(struct phy_device *phydev, int index, unsigned long modes); -+#endif ---- /dev/null -+++ b/drivers/net/phy/aquantia/aquantia_leds.c -@@ -0,0 +1,140 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* LED driver for Aquantia PHY -+ * -+ * Author: Daniel Golle -+ */ -+ -+#include -+ -+#include "aquantia.h" -+ -+int aqr_phy_led_brightness_set(struct phy_device *phydev, -+ u8 index, enum led_brightness value) -+{ -+ if (index > 2) -+ return -EINVAL; -+ -+ return phy_modify_mmd(phydev, MDIO_MMD_VEND1, AQR_LED_PROV(index), VEND1_GLOBAL_LED_PROV_LINK_MASK | -+ VEND1_GLOBAL_LED_PROV_FORCE_ON | -+ VEND1_GLOBAL_LED_PROV_RX_ACT | -+ VEND1_GLOBAL_LED_PROV_TX_ACT, -+ value ? VEND1_GLOBAL_LED_PROV_FORCE_ON : 0); -+} -+ -+static const unsigned long supported_triggers = (BIT(TRIGGER_NETDEV_LINK) | -+ BIT(TRIGGER_NETDEV_LINK_100) | -+ BIT(TRIGGER_NETDEV_LINK_1000) | -+ BIT(TRIGGER_NETDEV_LINK_2500) | -+ BIT(TRIGGER_NETDEV_LINK_5000) | -+ BIT(TRIGGER_NETDEV_LINK_10000) | -+ BIT(TRIGGER_NETDEV_RX) | -+ BIT(TRIGGER_NETDEV_TX)); -+ -+int aqr_phy_led_hw_is_supported(struct phy_device *phydev, u8 index, -+ unsigned long rules) -+{ -+ if (index >= AQR_NUM_LEDS) -+ return -EINVAL; -+ -+ /* All combinations of the supported triggers are allowed */ -+ if (rules & ~supported_triggers) -+ return -EOPNOTSUPP; -+ -+ return 0; -+} -+ -+int aqr_phy_led_hw_control_get(struct phy_device *phydev, u8 index, -+ unsigned long *rules) -+{ -+ int val; -+ -+ if (index >= AQR_NUM_LEDS) -+ return -EINVAL; -+ -+ val = phy_read_mmd(phydev, MDIO_MMD_VEND1, AQR_LED_PROV(index)); -+ if (val < 0) -+ return val; -+ -+ *rules = 0; -+ if (val & VEND1_GLOBAL_LED_PROV_LINK100) -+ *rules |= BIT(TRIGGER_NETDEV_LINK_100); -+ -+ if (val & VEND1_GLOBAL_LED_PROV_LINK1000) -+ *rules |= BIT(TRIGGER_NETDEV_LINK_1000); -+ -+ if (val & VEND1_GLOBAL_LED_PROV_LINK2500) -+ *rules |= BIT(TRIGGER_NETDEV_LINK_2500); -+ -+ if (val & VEND1_GLOBAL_LED_PROV_LINK5000) -+ *rules |= BIT(TRIGGER_NETDEV_LINK_5000); -+ -+ if (val & VEND1_GLOBAL_LED_PROV_LINK10000) -+ *rules |= BIT(TRIGGER_NETDEV_LINK_10000); -+ -+ if (val & VEND1_GLOBAL_LED_PROV_RX_ACT) -+ *rules |= BIT(TRIGGER_NETDEV_RX); -+ -+ if (val & VEND1_GLOBAL_LED_PROV_TX_ACT) -+ *rules |= BIT(TRIGGER_NETDEV_TX); -+ -+ return 0; -+} -+ -+int aqr_phy_led_hw_control_set(struct phy_device *phydev, u8 index, -+ unsigned long rules) -+{ -+ u16 val = 0; -+ -+ if (index >= AQR_NUM_LEDS) -+ return -EINVAL; -+ -+ if (rules & (BIT(TRIGGER_NETDEV_LINK_100) | BIT(TRIGGER_NETDEV_LINK))) -+ val |= VEND1_GLOBAL_LED_PROV_LINK100; -+ -+ if (rules & (BIT(TRIGGER_NETDEV_LINK_1000) | BIT(TRIGGER_NETDEV_LINK))) -+ val |= VEND1_GLOBAL_LED_PROV_LINK1000; -+ -+ if (rules & (BIT(TRIGGER_NETDEV_LINK_2500) | BIT(TRIGGER_NETDEV_LINK))) -+ val |= VEND1_GLOBAL_LED_PROV_LINK2500; -+ -+ if (rules & (BIT(TRIGGER_NETDEV_LINK_5000) | BIT(TRIGGER_NETDEV_LINK))) -+ val |= VEND1_GLOBAL_LED_PROV_LINK5000; -+ -+ if (rules & (BIT(TRIGGER_NETDEV_LINK_10000) | BIT(TRIGGER_NETDEV_LINK))) -+ val |= VEND1_GLOBAL_LED_PROV_LINK10000; -+ -+ if (rules & BIT(TRIGGER_NETDEV_RX)) -+ val |= VEND1_GLOBAL_LED_PROV_RX_ACT; -+ -+ if (rules & BIT(TRIGGER_NETDEV_TX)) -+ val |= VEND1_GLOBAL_LED_PROV_TX_ACT; -+ -+ return phy_modify_mmd(phydev, MDIO_MMD_VEND1, AQR_LED_PROV(index), -+ VEND1_GLOBAL_LED_PROV_LINK_MASK | -+ VEND1_GLOBAL_LED_PROV_FORCE_ON | -+ VEND1_GLOBAL_LED_PROV_RX_ACT | -+ VEND1_GLOBAL_LED_PROV_TX_ACT, val); -+} -+ -+int aqr_phy_led_polarity_set(struct phy_device *phydev, int index, unsigned long modes) -+{ -+ bool active_low = false; -+ u32 mode; -+ -+ if (index >= AQR_NUM_LEDS) -+ return -EINVAL; -+ -+ for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) { -+ switch (mode) { -+ case PHY_LED_ACTIVE_LOW: -+ active_low = true; -+ break; -+ default: -+ return -EINVAL; -+ } -+ } -+ -+ return phy_modify_mmd(phydev, MDIO_MMD_VEND1, AQR_LED_DRIVE(index), -+ VEND1_GLOBAL_LED_DRIVE_VDD, -+ active_low ? VEND1_GLOBAL_LED_DRIVE_VDD : 0); -+} ---- a/drivers/net/phy/aquantia/aquantia_main.c -+++ b/drivers/net/phy/aquantia/aquantia_main.c -@@ -740,6 +740,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQCS109), -@@ -759,6 +766,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR111), -@@ -778,6 +792,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR111B0), -@@ -797,6 +818,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR405), -@@ -823,6 +851,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR412), -@@ -841,6 +876,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR113), -@@ -860,6 +902,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR113C), -@@ -879,6 +928,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR114C), -@@ -898,6 +954,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - { - PHY_ID_MATCH_MODEL(PHY_ID_AQR813), -@@ -917,6 +980,13 @@ static struct phy_driver aqr_driver[] = - .get_strings = aqr107_get_strings, - .get_stats = aqr107_get_stats, - .link_change_notify = aqr107_link_change_notify, -+#if IS_ENABLED(CONFIG_PHYLIB_LEDS) -+ .led_brightness_set = aqr_phy_led_brightness_set, -+ .led_hw_is_supported = aqr_phy_led_hw_is_supported, -+ .led_hw_control_set = aqr_phy_led_hw_control_set, -+ .led_hw_control_get = aqr_phy_led_hw_control_get, -+ .led_polarity_set = aqr_phy_led_polarity_set, -+#endif - }, - }; - diff --git a/6.10/target/linux/generic/pending-6.10/811-pci_disable_usb_common_quirks.patch b/6.10/target/linux/generic/pending-6.10/811-pci_disable_usb_common_quirks.patch deleted file mode 100644 index e91d1ef6..00000000 --- a/6.10/target/linux/generic/pending-6.10/811-pci_disable_usb_common_quirks.patch +++ /dev/null @@ -1,115 +0,0 @@ -From: Felix Fietkau -Subject: debloat: disable common USB quirks - -Signed-off-by: Felix Fietkau ---- - drivers/usb/host/pci-quirks.c | 16 ++++++++++++++++ - drivers/usb/host/pci-quirks.h | 18 +++++++++++++++++- - include/linux/usb/hcd.h | 7 +++++++ - 3 files changed, 40 insertions(+), 1 deletion(-) - ---- a/drivers/usb/host/pci-quirks.c -+++ b/drivers/usb/host/pci-quirks.c -@@ -128,6 +128,8 @@ struct amd_chipset_type { - u8 rev; - }; - -+#ifndef CONFIG_PCI_DISABLE_COMMON_QUIRKS -+ - static struct amd_chipset_info { - struct pci_dev *nb_dev; - struct pci_dev *smbus_dev; -@@ -631,6 +633,10 @@ bool usb_amd_pt_check_port(struct device - } - EXPORT_SYMBOL_GPL(usb_amd_pt_check_port); - -+#endif /* CONFIG_PCI_DISABLE_COMMON_QUIRKS */ -+ -+#if IS_ENABLED(CONFIG_USB_UHCI_HCD) -+ - /* - * Make sure the controller is completely inactive, unable to - * generate interrupts or do DMA. -@@ -710,8 +716,17 @@ reset_needed: - uhci_reset_hc(pdev, base); - return 1; - } -+#else -+int uhci_check_and_reset_hc(struct pci_dev *pdev, unsigned long base) -+{ -+ return 0; -+} -+ -+#endif - EXPORT_SYMBOL_GPL(uhci_check_and_reset_hc); - -+#ifndef CONFIG_PCI_DISABLE_COMMON_QUIRKS -+ - static inline int io_type_enabled(struct pci_dev *pdev, unsigned int mask) - { - u16 cmd; -@@ -1283,3 +1298,4 @@ static void quirk_usb_early_handoff(stru - } - DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_SERIAL_USB, 8, quirk_usb_early_handoff); -+#endif ---- a/drivers/usb/host/pci-quirks.h -+++ b/drivers/usb/host/pci-quirks.h -@@ -5,6 +5,9 @@ - #ifdef CONFIG_USB_PCI - void uhci_reset_hc(struct pci_dev *pdev, unsigned long base); - int uhci_check_and_reset_hc(struct pci_dev *pdev, unsigned long base); -+#endif /* CONFIG_USB_PCI */ -+ -+#if defined(CONFIG_USB_PCI) && !defined(CONFIG_PCI_DISABLE_COMMON_QUIRKS) - int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev); - bool usb_amd_hang_symptom_quirk(void); - bool usb_amd_prefetch_quirk(void); -@@ -19,6 +22,18 @@ void sb800_prefetch(struct device *dev, - bool usb_amd_pt_check_port(struct device *device, int port); - #else - struct pci_dev; -+static inline int usb_amd_quirk_pll_check(void) -+{ -+ return 0; -+} -+static inline bool usb_amd_hang_symptom_quirk(void) -+{ -+ return false; -+} -+static inline bool usb_amd_prefetch_quirk(void) -+{ -+ return false; -+} - static inline void usb_amd_quirk_pll_disable(void) {} - static inline void usb_amd_quirk_pll_enable(void) {} - static inline void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev) {} -@@ -29,6 +44,11 @@ static inline bool usb_amd_pt_check_port - { - return false; - } -+static inline void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev) {} -+static inline bool usb_xhci_needs_pci_reset(struct pci_dev *pdev) -+{ -+ return false; -+} - #endif /* CONFIG_USB_PCI */ - - #endif /* __LINUX_USB_PCI_QUIRKS_H */ ---- a/include/linux/usb/hcd.h -+++ b/include/linux/usb/hcd.h -@@ -485,7 +485,14 @@ extern int usb_hcd_pci_probe(struct pci_ - extern void usb_hcd_pci_remove(struct pci_dev *dev); - extern void usb_hcd_pci_shutdown(struct pci_dev *dev); - -+#ifndef CONFIG_PCI_DISABLE_COMMON_QUIRKS - extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); -+#else -+static inline int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev) -+{ -+ return 0; -+} -+#endif - - extern const struct dev_pm_ops usb_hcd_pci_pm_ops; - #endif /* CONFIG_USB_PCI */ diff --git a/6.10/target/linux/generic/pending-6.10/820-w1-gpio-fix-problem-with-platfom-data-in-w1-gpio.patch b/6.10/target/linux/generic/pending-6.10/820-w1-gpio-fix-problem-with-platfom-data-in-w1-gpio.patch deleted file mode 100644 index 33eb34c9..00000000 --- a/6.10/target/linux/generic/pending-6.10/820-w1-gpio-fix-problem-with-platfom-data-in-w1-gpio.patch +++ /dev/null @@ -1,26 +0,0 @@ -From d9c8bc8c1408f3e8529db6e4e04017b4c579c342 Mon Sep 17 00:00:00 2001 -From: Pawel Dembicki -Date: Sun, 18 Feb 2018 17:08:04 +0100 -Subject: [PATCH] w1: gpio: fix problem with platfom data in w1-gpio - -In devices, where fdt is used, is impossible to apply platform data -without proper fdt node. - -This patch allow to use platform data in devices with fdt. - -Signed-off-by: Pawel Dembicki ---- - drivers/w1/masters/w1-gpio.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - ---- a/drivers/w1/masters/w1-gpio.c -+++ b/drivers/w1/masters/w1-gpio.c -@@ -76,7 +76,7 @@ static int w1_gpio_probe(struct platform - enum gpiod_flags gflags = GPIOD_OUT_LOW_OPEN_DRAIN; - int err; - -- if (of_have_populated_dt()) { -+ if (of_have_populated_dt() && !dev_get_platdata(&pdev->dev)) { - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return -ENOMEM; diff --git a/6.10/target/linux/generic/pending-6.10/880-01-dt-bindings-leds-add-LED_FUNCTION_MOBILE-for-mobile-.patch b/6.10/target/linux/generic/pending-6.10/880-01-dt-bindings-leds-add-LED_FUNCTION_MOBILE-for-mobile-.patch deleted file mode 100644 index 3321b03f..00000000 --- a/6.10/target/linux/generic/pending-6.10/880-01-dt-bindings-leds-add-LED_FUNCTION_MOBILE-for-mobile-.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 38eb5b3370c29515d2ce92adac2d6eba96f276f5 Mon Sep 17 00:00:00 2001 -From: INAGAKI Hiroshi -Date: Wed, 20 Mar 2024 15:32:18 +0900 -Subject: [PATCH v2 1/2] dt-bindings: leds: add LED_FUNCTION_MOBILE for mobile - network - -Add LED_FUNCTION_MOBILE for LEDs that indicate status of mobile network -connection. This is useful to distinguish those LEDs from LEDs that -indicates status of wired "wan" connection. - -example (on stock fw): - -IIJ SA-W2 has "Mobile" LEDs that indicate status (no signal, too low, -low, good) of mobile network connection via dongle connected to USB -port. - -- no signal: (none, turned off) -- too low: green:mobile & red:mobile (amber, blink) -- low: green:mobile & red:mobile (amber, turned on) -- good: green:mobile (turned on) - -Suggested-by: Hauke Mehrtens -Signed-off-by: INAGAKI Hiroshi ---- - include/dt-bindings/leds/common.h | 1 + - 1 file changed, 1 insertion(+) - ---- a/include/dt-bindings/leds/common.h -+++ b/include/dt-bindings/leds/common.h -@@ -90,6 +90,7 @@ - #define LED_FUNCTION_INDICATOR "indicator" - #define LED_FUNCTION_LAN "lan" - #define LED_FUNCTION_MAIL "mail" -+#define LED_FUNCTION_MOBILE "mobile" - #define LED_FUNCTION_MTD "mtd" - #define LED_FUNCTION_PANIC "panic" - #define LED_FUNCTION_PROGRAMMING "programming" diff --git a/6.10/target/linux/generic/pending-6.10/880-02-dt-bindings-leds-add-LED_FUNCTION_SPEED_-for-link-sp.patch b/6.10/target/linux/generic/pending-6.10/880-02-dt-bindings-leds-add-LED_FUNCTION_SPEED_-for-link-sp.patch deleted file mode 100644 index ab27cd33..00000000 --- a/6.10/target/linux/generic/pending-6.10/880-02-dt-bindings-leds-add-LED_FUNCTION_SPEED_-for-link-sp.patch +++ /dev/null @@ -1,37 +0,0 @@ -From e22afe910afcfb51b6ba6a0ae776939959727f54 Mon Sep 17 00:00:00 2001 -From: INAGAKI Hiroshi -Date: Wed, 20 Mar 2024 15:59:06 +0900 -Subject: [PATCH v2 2/2] dt-bindings: leds: add LED_FUNCTION_SPEED_* for link - speed on LAN/WAN - -Add LED_FUNCTION_SPEED_LAN and LED_FUNCTION_SPEED_WAN for LEDs that -indicate link speed of ethernet ports on LAN/WAN. This is useful to -distinguish those LEDs from LEDs that indicate link status (up/down). - -example: - -Fortinet FortiGate 30E/50E have LEDs that indicate link speed on each -of the ethernet ports in addition to LEDs that indicate link status -(up/down). - -- 1000 Mbps: green:speed-(lan|wan)-N -- 100 Mbps: amber:speed-(lan|wan)-N -- 10 Mbps: (none, turned off) - -Reviewed-by: Rob Herring -Signed-off-by: INAGAKI Hiroshi ---- - include/dt-bindings/leds/common.h | 2 ++ - 1 file changed, 2 insertions(+) - ---- a/include/dt-bindings/leds/common.h -+++ b/include/dt-bindings/leds/common.h -@@ -96,6 +96,8 @@ - #define LED_FUNCTION_PROGRAMMING "programming" - #define LED_FUNCTION_RX "rx" - #define LED_FUNCTION_SD "sd" -+#define LED_FUNCTION_SPEED_LAN "speed-lan" -+#define LED_FUNCTION_SPEED_WAN "speed-wan" - #define LED_FUNCTION_STANDBY "standby" - #define LED_FUNCTION_TORCH "torch" - #define LED_FUNCTION_TX "tx" diff --git a/6.10/target/linux/generic/pending-6.10/980-tools-thermal-tmon-Fix-compilation-warning-for-wrong.patch b/6.10/target/linux/generic/pending-6.10/980-tools-thermal-tmon-Fix-compilation-warning-for-wrong.patch deleted file mode 100644 index 6a0a1998..00000000 --- a/6.10/target/linux/generic/pending-6.10/980-tools-thermal-tmon-Fix-compilation-warning-for-wrong.patch +++ /dev/null @@ -1,51 +0,0 @@ -From a7a94ca21ac0f347f683d33c72b4aab57ce5eec3 Mon Sep 17 00:00:00 2001 -From: Florian Eckert -Date: Mon, 20 Nov 2023 11:13:20 +0100 -Subject: [PATCH] tools/thermal/tmon: Fix compilation warning for wrong format - -The following warnings are shown during compilation: - -tui.c: In function 'show_cooling_device': - tui.c:216:40: warning: format '%d' expects argument of type 'int', but -argument 7 has type 'long unsigned int' [-Wformat=] - 216 | "%02d %12.12s%6d %6d", - | ~~^ - | | - | int - | %6ld - ...... - 219 | ptdata.cdi[j].cur_state, - | ~~~~~~~~~~~~~~~~~~~~~~~ - | | - | long unsigned int - tui.c:216:44: warning: format '%d' expects argument of type 'int', but -argument 8 has type 'long unsigned int' [-Wformat=] - 216 | "%02d %12.12s%6d %6d", - | ~~^ - | | - | int - | %6ld - ...... - 220 | ptdata.cdi[j].max_state); - | ~~~~~~~~~~~~~~~~~~~~~~~ - | | - | long unsigned int - -To fix this, the correct string format must be used for printing. - -Signed-off-by: Florian Eckert ---- - tools/thermal/tmon/tui.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/tools/thermal/tmon/tui.c -+++ b/tools/thermal/tmon/tui.c -@@ -213,7 +213,7 @@ void show_cooling_device(void) - * cooling device instances. skip unused idr. - */ - mvwprintw(cooling_device_window, j + 2, 1, -- "%02d %12.12s%6d %6d", -+ "%02d %12.12s%6lu %6lu", - ptdata.cdi[j].instance, - ptdata.cdi[j].type, - ptdata.cdi[j].cur_state, diff --git a/6.10/target/linux/generic/pending-6.10/999-net-phy-move-LED-polarity-to-phy_init_hw.patch b/6.10/target/linux/generic/pending-6.10/999-net-phy-move-LED-polarity-to-phy_init_hw.patch deleted file mode 100644 index 22c47768..00000000 --- a/6.10/target/linux/generic/pending-6.10/999-net-phy-move-LED-polarity-to-phy_init_hw.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 6e6fff51ae5e54092611d174fa45fa78c237a415 Mon Sep 17 00:00:00 2001 -From: Christian Marangi -Date: Tue, 21 May 2024 20:01:46 +0200 -Subject: [PATCH] net: phy: move LED polarity to phy_init_hw - -Some PHY reset the polarity on reset and this cause the LED to -malfunction as LED polarity is configured only when LED is -registered. - -To better handle this, move the LED polarity configuration in -phy_init_hw to reconfigure it after PHY reset. - -Signed-off-by: Christian Marangi ---- - drivers/net/phy/phy_device.c | 53 +++++++++++++++++++++++++----------- - 1 file changed, 37 insertions(+), 16 deletions(-) - ---- a/drivers/net/phy/phy_device.c -+++ b/drivers/net/phy/phy_device.c -@@ -1223,6 +1223,37 @@ static int phy_poll_reset(struct phy_dev - return 0; - } - -+static int of_phy_led_init(struct phy_device *phydev) -+{ -+ struct phy_led *phyled; -+ -+ list_for_each_entry(phyled, &phydev->leds, list) { -+ struct led_classdev *cdev = &phyled->led_cdev; -+ struct device_node *np = cdev->dev->of_node; -+ unsigned long modes = 0; -+ int err; -+ -+ if (of_property_read_bool(np, "active-low")) -+ set_bit(PHY_LED_ACTIVE_LOW, &modes); -+ if (of_property_read_bool(np, "inactive-high-impedance")) -+ set_bit(PHY_LED_INACTIVE_HIGH_IMPEDANCE, &modes); -+ -+ if (!modes) -+ continue; -+ -+ /* Return error if asked to set polarity modes but not supported */ -+ if (!phydev->drv->led_polarity_set) -+ return -EINVAL; -+ -+ err = phydev->drv->led_polarity_set(phydev, phyled->index, -+ modes); -+ if (err) -+ return err; -+ } -+ -+ return 0; -+} -+ - int phy_init_hw(struct phy_device *phydev) - { - int ret = 0; -@@ -1259,6 +1290,12 @@ int phy_init_hw(struct phy_device *phyde - return ret; - } - -+ if (IS_ENABLED(CONFIG_PHYLIB_LEDS)) { -+ ret = of_phy_led_init(phydev); -+ if (ret < 0) -+ return ret; -+ } -+ - return 0; - } - EXPORT_SYMBOL(phy_init_hw); -@@ -3204,7 +3241,6 @@ static int of_phy_led(struct phy_device - struct device *dev = &phydev->mdio.dev; - struct led_init_data init_data = {}; - struct led_classdev *cdev; -- unsigned long modes = 0; - struct phy_led *phyled; - u32 index; - int err; -@@ -3222,21 +3258,6 @@ static int of_phy_led(struct phy_device - if (index > U8_MAX) - return -EINVAL; - -- if (of_property_read_bool(led, "active-low")) -- set_bit(PHY_LED_ACTIVE_LOW, &modes); -- if (of_property_read_bool(led, "inactive-high-impedance")) -- set_bit(PHY_LED_INACTIVE_HIGH_IMPEDANCE, &modes); -- -- if (modes) { -- /* Return error if asked to set polarity modes but not supported */ -- if (!phydev->drv->led_polarity_set) -- return -EINVAL; -- -- err = phydev->drv->led_polarity_set(phydev, index, modes); -- if (err) -- return err; -- } -- - phyled->index = index; - if (phydev->drv->led_brightness_set) - cdev->brightness_set_blocking = phy_led_set_brightness;