From ced4c72dd53b6069ed5b1a011c866a38d228fa5e Mon Sep 17 00:00:00 2001 From: suyuan168 <175338101@qq.com> Date: Mon, 3 Jan 2022 19:24:42 +0800 Subject: [PATCH] fixfix --- .../arch/arm/boot/dts/bcm2711-rpi-cm4.dts | 0 .../011-kbuild-export-SUBARCH.patch | 2 +- ...ow_offload-handle-netdevice-events-f.patch | 2 +- ...w_table-fix-offloaded-connection-tim.patch | 3 +- root/target/linux/generic/config-5.15 | 253 +- .../generic/hack-5.15/204-module_strip.patch | 34 +- .../generic/hack-5.15/205-kconfig-exit.patch | 11 + .../210-darwin_scripts_include.patch | 2 +- .../hack-5.15/212-tools_portability.patch | 10 +- .../hack-5.15/220-arm-gc_sections.patch | 122 + .../hack-5.15/221-module_exports.patch | 8 +- .../hack-5.15/230-openwrt_lzma_options.patch | 2 +- .../linux/generic/hack-5.15/251-kconfig.patch | 2 +- .../generic/hack-5.15/252-SATA_PMP.patch | 23 + .../hack-5.15/259-regmap_dynamic.patch | 144 + .../301-mips_image_cmdline_hack.patch | 2 +- ...don-t-reply-on-mtdblock-device-minor.patch | 84 + ...rans-call-add-disks-after-mtd-device.patch | 98 + .../410-block-fit-partition-parser.patch | 220 + .../420-mtd-set-rootfs-to-be-root-dev.patch | 2 +- .../640-bridge-only-accept-EAP-locally.patch | 46 +- ...-netfilter-add-xt_FLOWOFFLOAD-target.patch | 8 +- .../hack-5.15/651-wireless_mesh_header.patch | 2 +- .../661-use_fq_codel_by_default.patch | 2 +- .../710-net-dsa-mv88e6xxx-default-VID-1.patch | 4 +- ...-dsa-mv88e6xxx-disable-ATU-violation.patch | 2 +- .../hack-5.15/720-net-phy-add-aqr-phys.patch | 142 + .../721-net-add-packet-mangeling.patch | 178 + ...-r8152-add-LED-configuration-from-OF.patch | 74 + ...et-add-RTL8152-binding-documentation.patch | 54 + .../hack-5.15/773-bgmac-add-srab-switch.patch | 4 +- .../800-GPIO-add-named-gpio-exports.patch | 12 +- .../hack-5.15/901-debloat_sock_diag.patch | 8 +- .../generic/hack-5.15/902-debloat_proc.patch | 12 +- .../hack-5.15/904-debloat_dma_buf.patch | 92 + .../generic/hack-5.4/204-module_strip.patch | 220 + .../generic/hack-5.4/205-kconfig-exit.patch | 11 + .../hack-5.4/210-darwin_scripts_include.patch | 3053 ++ .../211-darwin-uuid-typedef-clash.patch | 22 + .../hack-5.4/214-spidev_h_portability.patch | 24 + .../hack-5.4/220-arm-gc_sections.patch | 138 + .../generic/hack-5.4/221-module_exports.patch | 109 + .../hack-5.4/230-openwrt_lzma_options.patch | 71 + .../hack-5.4/249-udp-tunnel-selection.patch | 11 + .../hack-5.4/250-netfilter_depends.patch | 27 + .../generic/hack-5.4/251-sound_kconfig.patch | 199 + .../generic/hack-5.4/259-regmap_dynamic.patch | 125 + .../260-crypto_test_dependencies.patch | 52 + .../hack-5.4/260-lib-arc4-unhide.patch | 15 + .../generic/hack-5.4/280-rfkill-stubs.patch | 84 + ...cache-use-more-efficient-cache-blast.patch | 64 + .../301-mips_image_cmdline_hack.patch | 38 + .../321-powerpc_crtsavres_prereq.patch | 39 + .../400-block-fit-partition-parser.patch | 176 + ...k_mx25l6406e_with_4bit_block_protect.patch | 69 + .../generic/hack-5.4/531-debloat_lzma.patch | 1040 + .../550-loop-Report-EOPNOTSUPP-properly.patch | 41 + .../640-bridge-only-accept-EAP-locally.patch | 82 + ...lter-connmark-introduce-set-dscpmark.patch | 212 + .../hack-5.4/647-netfilter-flow-acct.patch | 70 + .../650-netfilter-add-xt_OFFLOAD-target.patch | 589 + .../hack-5.4/651-wireless_mesh_header.patch | 24 + .../hack-5.4/660-fq_codel_defaults.patch | 27 + .../661-use_fq_codel_by_default.patch | 100 + .../hack-5.4/662-remove_pfifo_fast.patch | 243 + .../generic/hack-5.4/690-mptcp_v0.96.patch | 24166 +--------------- .../700-swconfig_switch_drivers.patch | 135 + .../hack-5.4/703-add_vsc8504_support.patch | 57 + .../710-net-dsa-mv88e6xxx-default-VID-1.patch | 18 + ...-dsa-mv88e6xxx-disable-ATU-violation.patch | 12 + .../generic/hack-5.4/721-phy_packets.patch | 176 + ...-r8152-add-LED-configuration-from-OF.patch | 74 + ...et-add-RTL8152-binding-documentation.patch | 54 + .../hack-5.4/773-bgmac-add-srab-switch.patch | 98 + .../hack-5.4/901-debloat_sock_diag.patch | 145 + .../generic/hack-5.4/902-debloat_proc.patch | 408 + .../hack-5.4/904-debloat_dma_buf.patch | 74 + .../generic/hack-5.4/910-kobject_uevent.patch | 32 + .../911-kobject_add_broadcast_uevent.patch | 76 + ...ays-create-console-node-in-initramfs.patch | 40 + .../hack-5.4/999-stop-promiscuous-info.patch | 47 - ...terrupt-provider-address-cells-check.patch | 28 + ...e_mem_map-with-ARCH_PFN_OFFSET-calcu.patch | 2 +- ...0-add-linux-spidev-compatible-si3210.patch | 8 +- ...ame2-and-add-RENAME_WHITEOUT-support.patch | 81 + ...41-jffs2-add-RENAME_EXCHANGE-support.patch | 73 + .../142-jffs2-add-splice-ops.patch | 20 + ...ge_allow_receiption_on_disabled_port.patch | 4 +- .../pending-5.15/201-extra_optimization.patch | 2 +- .../203-kallsyms_uncompressed.patch | 4 +- .../270-platform-mikrotik-build-bits.patch | 14 +- .../300-mips_expose_boot_raw.patch | 4 +- .../pending-5.15/305-mips_module_reloc.patch | 2 +- ...CPU-option-reporting-to-proc-cpuinfo.patch | 6 +- ...t-command-line-parameters-from-users.patch | 7 +- ...ernel-XZ-compression-option-on-PPC_8.patch | 2 +- .../pending-5.15/420-mtd-redboot_space.patch | 41 + ...mtd-add-routerbootpart-parser-config.patch | 18 +- ...or-rework-broken-flash-reset-support.patch | 182 + ...spinand-add-support-for-xtx-xt26g0xa.patch | 178 + .../484-mtd-spi-nor-add-esmt-f25l16pa.patch | 11 + ...ting-ubi0-rootfs-in-init-do_mounts.c.patch | 51 + ...cat-add-dt-driver-for-concat-devices.patch | 2 +- .../530-jffs2_make_lzma_available.patch | 2 +- .../600-netfilter_conntrack_flush.patch | 4 +- ...-netfilter_optional_tcp_window_check.patch | 64 +- .../pending-5.15/630-packet_socket_type.patch | 16 +- .../pending-5.15/655-increase_skb_pad.patch | 2 +- ...Add-support-for-MAP-E-FMRs-mesh-mode.patch | 14 +- ...ng-with-source-address-failed-policy.patch | 2 +- ...T-skip-GRO-for-foreign-MAC-addresses.patch | 26 +- ...et-add-mac-address-increment-support.patch | 41 +- ...83-of_net-add-mac-address-to-of-tree.patch | 13 +- ...detach-callback-to-struct-phy_driver.patch | 2 +- ...d-knob-for-filtering-rx-tx-BPDU-pack.patch | 174 + ...760-net-dsa-mv88e6xxx-fix-vlan-setup.patch | 6 +- ...equest-assisted-learning-on-CPU-port.patch | 2 +- ...-missing-linux-if_ether.h-for-ETH_AL.patch | 61 + ...ice-struct-copy-its-DMA-params-to-th.patch | 6 +- .../810-pci_disable_common_quirks.patch | 6 +- .../811-pci_disable_usb_common_quirks.patch | 2 +- .../pending-5.15/834-ledtrig-libata.patch | 12 +- .../pending-5.15/920-mangle_bootargs.patch | 6 +- .../pending-5.15/930-qcom-qmi-helpers.patch | 11 + 124 files changed, 10938 insertions(+), 24525 deletions(-) mode change 100644 => 100755 root/target/linux/bcm27xx/files/arch/arm/boot/dts/bcm2711-rpi-cm4.dts create mode 100755 root/target/linux/generic/hack-5.15/205-kconfig-exit.patch create mode 100755 root/target/linux/generic/hack-5.15/220-arm-gc_sections.patch create mode 100755 root/target/linux/generic/hack-5.15/252-SATA_PMP.patch create mode 100755 root/target/linux/generic/hack-5.15/259-regmap_dynamic.patch create mode 100755 root/target/linux/generic/hack-5.15/401-mtd-super-don-t-reply-on-mtdblock-device-minor.patch create mode 100755 root/target/linux/generic/hack-5.15/402-mtd-blktrans-call-add-disks-after-mtd-device.patch create mode 100755 root/target/linux/generic/hack-5.15/410-block-fit-partition-parser.patch create mode 100755 root/target/linux/generic/hack-5.15/720-net-phy-add-aqr-phys.patch create mode 100755 root/target/linux/generic/hack-5.15/721-net-add-packet-mangeling.patch create mode 100755 root/target/linux/generic/hack-5.15/760-net-usb-r8152-add-LED-configuration-from-OF.patch create mode 100755 root/target/linux/generic/hack-5.15/761-dt-bindings-net-add-RTL8152-binding-documentation.patch create mode 100755 root/target/linux/generic/hack-5.15/904-debloat_dma_buf.patch create mode 100755 root/target/linux/generic/hack-5.4/204-module_strip.patch create mode 100755 root/target/linux/generic/hack-5.4/205-kconfig-exit.patch create mode 100755 root/target/linux/generic/hack-5.4/210-darwin_scripts_include.patch create mode 100755 root/target/linux/generic/hack-5.4/211-darwin-uuid-typedef-clash.patch create mode 100755 root/target/linux/generic/hack-5.4/214-spidev_h_portability.patch create mode 100755 root/target/linux/generic/hack-5.4/220-arm-gc_sections.patch create mode 100755 root/target/linux/generic/hack-5.4/221-module_exports.patch create mode 100755 root/target/linux/generic/hack-5.4/230-openwrt_lzma_options.patch create mode 100755 root/target/linux/generic/hack-5.4/249-udp-tunnel-selection.patch create mode 100755 root/target/linux/generic/hack-5.4/250-netfilter_depends.patch create mode 100755 root/target/linux/generic/hack-5.4/251-sound_kconfig.patch create mode 100755 root/target/linux/generic/hack-5.4/259-regmap_dynamic.patch create mode 100755 root/target/linux/generic/hack-5.4/260-crypto_test_dependencies.patch create mode 100755 root/target/linux/generic/hack-5.4/260-lib-arc4-unhide.patch create mode 100755 root/target/linux/generic/hack-5.4/280-rfkill-stubs.patch create mode 100755 root/target/linux/generic/hack-5.4/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch create mode 100755 root/target/linux/generic/hack-5.4/301-mips_image_cmdline_hack.patch create mode 100755 root/target/linux/generic/hack-5.4/321-powerpc_crtsavres_prereq.patch create mode 100755 root/target/linux/generic/hack-5.4/400-block-fit-partition-parser.patch create mode 100755 root/target/linux/generic/hack-5.4/400-unlock_mx25l6406e_with_4bit_block_protect.patch create mode 100755 root/target/linux/generic/hack-5.4/531-debloat_lzma.patch create mode 100755 root/target/linux/generic/hack-5.4/550-loop-Report-EOPNOTSUPP-properly.patch create mode 100755 root/target/linux/generic/hack-5.4/640-bridge-only-accept-EAP-locally.patch create mode 100755 root/target/linux/generic/hack-5.4/645-netfilter-connmark-introduce-set-dscpmark.patch create mode 100755 root/target/linux/generic/hack-5.4/647-netfilter-flow-acct.patch create mode 100755 root/target/linux/generic/hack-5.4/650-netfilter-add-xt_OFFLOAD-target.patch create mode 100755 root/target/linux/generic/hack-5.4/651-wireless_mesh_header.patch create mode 100755 root/target/linux/generic/hack-5.4/660-fq_codel_defaults.patch create mode 100755 root/target/linux/generic/hack-5.4/661-use_fq_codel_by_default.patch create mode 100755 root/target/linux/generic/hack-5.4/662-remove_pfifo_fast.patch create mode 100755 root/target/linux/generic/hack-5.4/700-swconfig_switch_drivers.patch create mode 100755 root/target/linux/generic/hack-5.4/703-add_vsc8504_support.patch create mode 100755 root/target/linux/generic/hack-5.4/710-net-dsa-mv88e6xxx-default-VID-1.patch create mode 100755 root/target/linux/generic/hack-5.4/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch create mode 100755 root/target/linux/generic/hack-5.4/721-phy_packets.patch create mode 100755 root/target/linux/generic/hack-5.4/760-net-usb-r8152-add-LED-configuration-from-OF.patch create mode 100755 root/target/linux/generic/hack-5.4/761-dt-bindings-net-add-RTL8152-binding-documentation.patch create mode 100755 root/target/linux/generic/hack-5.4/773-bgmac-add-srab-switch.patch create mode 100755 root/target/linux/generic/hack-5.4/901-debloat_sock_diag.patch create mode 100755 root/target/linux/generic/hack-5.4/902-debloat_proc.patch create mode 100755 root/target/linux/generic/hack-5.4/904-debloat_dma_buf.patch create mode 100755 root/target/linux/generic/hack-5.4/910-kobject_uevent.patch create mode 100755 root/target/linux/generic/hack-5.4/911-kobject_add_broadcast_uevent.patch create mode 100755 root/target/linux/generic/hack-5.4/921-always-create-console-node-in-initramfs.patch delete mode 100755 root/target/linux/generic/hack-5.4/999-stop-promiscuous-info.patch create mode 100755 root/target/linux/generic/pending-5.15/050-dtc-checks-Drop-interrupt-provider-address-cells-check.patch create mode 100755 root/target/linux/generic/pending-5.15/140-jffs2-use-.rename2-and-add-RENAME_WHITEOUT-support.patch create mode 100755 root/target/linux/generic/pending-5.15/141-jffs2-add-RENAME_EXCHANGE-support.patch create mode 100755 root/target/linux/generic/pending-5.15/142-jffs2-add-splice-ops.patch create mode 100755 root/target/linux/generic/pending-5.15/420-mtd-redboot_space.patch create mode 100755 root/target/linux/generic/pending-5.15/481-mtd-spi-nor-rework-broken-flash-reset-support.patch create mode 100755 root/target/linux/generic/pending-5.15/483-mtd-spinand-add-support-for-xtx-xt26g0xa.patch create mode 100755 root/target/linux/generic/pending-5.15/484-mtd-spi-nor-add-esmt-f25l16pa.patch create mode 100755 root/target/linux/generic/pending-5.15/492-try-auto-mounting-ubi0-rootfs-in-init-do_mounts.c.patch create mode 100755 root/target/linux/generic/pending-5.15/710-bridge-add-knob-for-filtering-rx-tx-BPDU-pack.patch create mode 100755 root/target/linux/generic/pending-5.15/780-ARM-kirkwood-add-missing-linux-if_ether.h-for-ETH_AL.patch create mode 100755 root/target/linux/generic/pending-5.15/930-qcom-qmi-helpers.patch diff --git a/root/target/linux/bcm27xx/files/arch/arm/boot/dts/bcm2711-rpi-cm4.dts b/root/target/linux/bcm27xx/files/arch/arm/boot/dts/bcm2711-rpi-cm4.dts old mode 100644 new mode 100755 diff --git a/root/target/linux/generic/backport-5.15/011-kbuild-export-SUBARCH.patch b/root/target/linux/generic/backport-5.15/011-kbuild-export-SUBARCH.patch index d99dcc9f..0aedad4b 100755 --- a/root/target/linux/generic/backport-5.15/011-kbuild-export-SUBARCH.patch +++ b/root/target/linux/generic/backport-5.15/011-kbuild-export-SUBARCH.patch @@ -10,7 +10,7 @@ Signed-off-by: Felix Fietkau --- a/Makefile +++ b/Makefile -@@ -524,7 +524,7 @@ KBUILD_LDFLAGS_MODULE := +@@ -523,7 +523,7 @@ KBUILD_LDFLAGS_MODULE := KBUILD_LDFLAGS := CLANG_FLAGS := diff --git a/root/target/linux/generic/backport-5.15/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch b/root/target/linux/generic/backport-5.15/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch index aa4ecf1b..9a6b802f 100755 --- a/root/target/linux/generic/backport-5.15/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch +++ b/root/target/linux/generic/backport-5.15/343-netfilter-nft_flow_offload-handle-netdevice-events-f.patch @@ -10,7 +10,7 @@ Signed-off-by: Pablo Neira Ayuso --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c -@@ -656,13 +656,41 @@ void nf_flow_table_free(struct nf_flowta +@@ -646,13 +646,41 @@ void nf_flow_table_free(struct nf_flowta } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/root/target/linux/generic/backport-5.4/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch b/root/target/linux/generic/backport-5.4/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch index 5dac7f5e..373a1564 100755 --- a/root/target/linux/generic/backport-5.4/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch +++ b/root/target/linux/generic/backport-5.4/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch @@ -40,11 +40,10 @@ Signed-off-by: Felix Fietkau static void gc_worker(struct work_struct *work) { unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; -@@ -1250,11 +1238,9 @@ static void gc_worker(struct work_struct +@@ -1250,10 +1238,8 @@ static void gc_worker(struct work_struct tmp = nf_ct_tuplehash_to_ctrack(h); - scanned++; - if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { - nf_ct_offload_timeout(tmp); + if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) diff --git a/root/target/linux/generic/config-5.15 b/root/target/linux/generic/config-5.15 index 38fb2aca..45ffe0f0 100755 --- a/root/target/linux/generic/config-5.15 +++ b/root/target/linux/generic/config-5.15 @@ -22,10 +22,8 @@ CONFIG_64BIT_TIME=y # CONFIG_ACPI_CONFIGFS is not set # CONFIG_ACPI_CUSTOM_METHOD is not set # CONFIG_ACPI_EXTLOG is not set -# CONFIG_ACPI_FPDT is not set # CONFIG_ACPI_HED is not set # CONFIG_ACPI_NFIT is not set -# CONFIG_ACPI_PRMT is not set # CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set # CONFIG_ACPI_TABLE_UPGRADE is not set # CONFIG_ACPI_VIDEO is not set @@ -51,7 +49,6 @@ CONFIG_64BIT_TIME=y # CONFIG_AD5758 is not set # CONFIG_AD5761 is not set # CONFIG_AD5764 is not set -# CONFIG_AD5766 is not set # CONFIG_AD5770R is not set # CONFIG_AD5791 is not set # CONFIG_AD5933 is not set @@ -111,7 +108,6 @@ CONFIG_64BIT_TIME=y # CONFIG_ADM8211 is not set # CONFIG_ADT7316 is not set # CONFIG_ADUX1020 is not set -# CONFIG_ADV_SWBUTTON is not set CONFIG_ADVISE_SYSCALLS=y # CONFIG_ADXL345_I2C is not set # CONFIG_ADXL345_SPI is not set @@ -158,8 +154,6 @@ CONFIG_ALLOW_DEV_COREDUMP=y # CONFIG_AMD8111_ETH is not set # CONFIG_AMD_MEM_ENCRYPT is not set # CONFIG_AMD_PHY is not set -# CONFIG_AMD_PMC is not set -# CONFIG_AMD_SFH_HID is not set # CONFIG_AMD_XGBE is not set # CONFIG_AMD_XGBE_HAVE_ECC is not set # CONFIG_AMIGA_PARTITION is not set @@ -184,14 +178,12 @@ CONFIG_ANON_INODES=y # CONFIG_ARCH_ACTIONS is not set # CONFIG_ARCH_AGILEX is not set # CONFIG_ARCH_ALPINE is not set -# CONFIG_ARCH_APPLE is not set # CONFIG_ARCH_ARTPEC is not set # CONFIG_ARCH_ASPEED is not set # CONFIG_ARCH_AT91 is not set # CONFIG_ARCH_AXXIA is not set # CONFIG_ARCH_BCM is not set # CONFIG_ARCH_BCM2835 is not set -# CONFIG_ARCH_BCM4908 is not set # CONFIG_ARCH_BCM_21664 is not set # CONFIG_ARCH_BCM_23550 is not set # CONFIG_ARCH_BCM_281XX is not set @@ -321,7 +313,6 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 # CONFIG_ARM64_BTI is not set # CONFIG_ARM64_CRYPTO is not set # CONFIG_ARM64_E0PD is not set -# CONFIG_ARM64_EPAN is not set # CONFIG_ARM64_ERRATUM_1024718 is not set # CONFIG_ARM64_ERRATUM_1319367 is not set # CONFIG_ARM64_ERRATUM_1463225 is not set @@ -339,13 +330,12 @@ CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 # CONFIG_ARM64_ERRATUM_858921 is not set # CONFIG_ARM64_HW_AFDBM is not set # CONFIG_ARM64_LSE_ATOMICS is not set -CONFIG_ARM64_MODULE_PLTS=y +# CONFIG_ARM64_MODULE_PLTS is not set # CONFIG_ARM64_MTE is not set # CONFIG_ARM64_PAN is not set # CONFIG_ARM64_PMEM is not set # CONFIG_ARM64_PSEUDO_NMI is not set # CONFIG_ARM64_PTDUMP_DEBUGFS is not set -# CONFIG_ARM64_PTR_AUTH_KERNEL is not set # CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET is not set # CONFIG_ARM64_RAS_EXTN is not set # CONFIG_ARM64_RELOC_TEST is not set @@ -396,20 +386,19 @@ CONFIG_ARM_DMA_MEM_BUFFERABLE=y # CONFIG_ARM_ERRATA_852423 is not set # CONFIG_ARM_ERRATA_857271 is not set # CONFIG_ARM_ERRATA_857272 is not set -# CONFIG_ARM_FFA_TRANSPORT is not set CONFIG_ARM_GIC_MAX_NR=1 # CONFIG_ARM_KIRKWOOD_CPUFREQ is not set # CONFIG_ARM_KPROBES_TEST is not set # CONFIG_ARM_LPAE is not set # CONFIG_ARM_MHU is not set -# CONFIG_ARM_MHU_V2 is not set -CONFIG_ARM_MODULE_PLTS=y +# CONFIG_ARM_MODULE_PLTS is not set # CONFIG_ARM_PATCH_PHYS_VIRT is not set # CONFIG_ARM_PSCI is not set # CONFIG_ARM_PSCI_CHECKER is not set # CONFIG_ARM_PSCI_CPUIDLE is not set # CONFIG_ARM_PTDUMP_DEBUGFS is not set # CONFIG_ARM_SBSA_WATCHDOG is not set +# CONFIG_ARM_SCMI_PROTOCOL is not set # CONFIG_ARM_SCPI_PROTOCOL is not set # CONFIG_ARM_SDE_INTERFACE is not set # CONFIG_ARM_SMCCC_SOC_ID is not set @@ -568,8 +557,8 @@ CONFIG_BCMA_POSSIBLE=y # CONFIG_BCM_IPROC_ADC is not set # CONFIG_BCM_KONA_USB2_PHY is not set # CONFIG_BCM_SBA_RAID is not set -# CONFIG_BDI_SWITCH is not set # CONFIG_BCM_VK is not set +# CONFIG_BDI_SWITCH is not set # CONFIG_BE2ISCSI is not set # CONFIG_BE2NET is not set # CONFIG_BEFS_FS is not set @@ -684,7 +673,6 @@ CONFIG_BLOCK=y # CONFIG_BMC150_MAGN_SPI is not set # CONFIG_BME680 is not set # CONFIG_BMG160 is not set -# CONFIG_BMI088_ACCEL is not set # CONFIG_BMI160_I2C is not set # CONFIG_BMI160_SPI is not set # CONFIG_BMIPS_GENERIC is not set @@ -756,7 +744,6 @@ CONFIG_BROKEN_ON_SMP=y # CONFIG_BTRFS_FS_POSIX_ACL is not set # CONFIG_BTRFS_FS_REF_VERIFY is not set # CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set -# CONFIG_BT_AOSPEXT is not set # CONFIG_BT_ATH3K is not set # CONFIG_BT_BNEP is not set CONFIG_BT_BNEP_MC_FILTER=y @@ -797,7 +784,6 @@ CONFIG_BT_HCIUART_H4=y # CONFIG_BT_RFCOMM is not set CONFIG_BT_RFCOMM_TTY=y # CONFIG_BT_SELFTEST is not set -# CONFIG_BT_VIRTIO is not set CONFIG_BUG=y # CONFIG_BUG_ON_DATA_CORRUPTION is not set CONFIG_BUILDTIME_EXTABLE_SORT=y @@ -812,7 +798,6 @@ CONFIG_CACHE_L2X0_PMU=y # CONFIG_CAN_BCM is not set # CONFIG_CAN_DEBUG_DEVICES is not set # CONFIG_CAN_DEV is not set -# CONFIG_CAN_ETAS_ES58X is not set # CONFIG_CAN_GS_USB is not set # CONFIG_CAN_GW is not set # CONFIG_CAN_HI311X is not set @@ -909,13 +894,13 @@ CONFIG_CIFS_POSIX=y # CONFIG_CIFS_SMB2 is not set # CONFIG_CIFS_STATS is not set # CONFIG_CIFS_STATS2 is not set -# CONFIG_CIFS_SWN_UPCALL is not set # CONFIG_CIFS_WEAK_PW_HASH is not set CONFIG_CIFS_XATTR=y # CONFIG_CIO_DAC is not set CONFIG_CLANG_VERSION=0 # CONFIG_CLEANCACHE is not set # CONFIG_CLKSRC_VERSATILE is not set +# CONFIG_CLK_GFM_LPASS_SM8250 is not set # CONFIG_CLK_HSDK is not set # CONFIG_CLK_QORIQ is not set # CONFIG_CLK_SP810 is not set @@ -928,7 +913,6 @@ CONFIG_CLS_U32_MARK=y # CONFIG_CM3605 is not set # CONFIG_CM36651 is not set # CONFIG_CMA is not set -# CONFIG_CMA_SYSFS is not set CONFIG_CMDLINE="" # CONFIG_CMDLINE_BOOL is not set # CONFIG_CMDLINE_EXTEND is not set @@ -940,7 +924,6 @@ CONFIG_CMDLINE="" # CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_COMEDI is not set # CONFIG_COMMON_CLK_AXI_CLKGEN is not set -# CONFIG_CLK_BCM2711_DVP is not set # CONFIG_COMMON_CLK_CDCE706 is not set # CONFIG_COMMON_CLK_CDCE925 is not set # CONFIG_COMMON_CLK_CS2000_CP is not set @@ -970,7 +953,6 @@ CONFIG_CMDLINE="" CONFIG_COMPACTION=y # CONFIG_COMPAL_LAPTOP is not set # CONFIG_COMPAT is not set -# CONFIG_COMPAT_32BIT_TIME is not set # CONFIG_COMPAT_BRK is not set # CONFIG_COMPILE_TEST is not set # CONFIG_CONFIGFS_FS is not set @@ -1035,8 +1017,8 @@ CONFIG_CROSS_COMPILE="" # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_CRYPTO=y # CONFIG_CRYPTO_842 is not set -# CONFIG_CRYPTO_ADIANTUM is not set CONFIG_CRYPTO_ACOMP2=y +# CONFIG_CRYPTO_ADIANTUM is not set CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_AEAD2=y # CONFIG_CRYPTO_AEGIS128 is not set @@ -1118,7 +1100,6 @@ CONFIG_CRYPTO_CTR=y # CONFIG_CRYPTO_DEV_MXC_SCC is not set # CONFIG_CRYPTO_DEV_MXS_DCP is not set # CONFIG_CRYPTO_DEV_NITROX_CNN55XX is not set -# CONFIG_CRYPTO_DEV_QAT_4XXX is not set # CONFIG_CRYPTO_DEV_QAT_C3XXX is not set # CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set # CONFIG_CRYPTO_DEV_QAT_C62X is not set @@ -1138,6 +1119,7 @@ CONFIG_CRYPTO_CTR=y # CONFIG_CRYPTO_DRBG_MENU is not set # CONFIG_CRYPTO_ECB is not set # CONFIG_CRYPTO_ECDH is not set +# CONFIG_CRYPTO_ECDSA is not set # CONFIG_CRYPTO_ECHAINIV is not set # CONFIG_CRYPTO_ECRDSA is not set # CONFIG_CRYPTO_ESSIV is not set @@ -1155,9 +1137,9 @@ CONFIG_CRYPTO_HASH2=y # CONFIG_CRYPTO_HW is not set # CONFIG_CRYPTO_JITTERENTROPY is not set # CONFIG_CRYPTO_KEYWRAP is not set +# CONFIG_CRYPTO_KHAZAD is not set CONFIG_CRYPTO_KPP=y CONFIG_CRYPTO_KPP2=y -# CONFIG_CRYPTO_KHAZAD is not set CONFIG_CRYPTO_LIB_AES=y CONFIG_CRYPTO_LIB_ARC4=y # CONFIG_CRYPTO_LIB_BLAKE2S is not set @@ -1254,6 +1236,7 @@ CONFIG_CRYPTO_SKCIPHER2=y # CONFIG_CRYPTO_ZSTD is not set # CONFIG_CS5535_MFGPT is not set # CONFIG_CS89x0 is not set +# CONFIG_CS89x0_PLATFORM is not set # CONFIG_CSD_LOCK_WAIT_DEBUG is not set # CONFIG_CUSE is not set # CONFIG_CW1200 is not set @@ -1267,6 +1250,7 @@ CONFIG_CRYPTO_SKCIPHER2=y # CONFIG_CYPRESS_FIRMWARE is not set # CONFIG_DA280 is not set # CONFIG_DA311 is not set +# CONFIG_DAMON is not set # CONFIG_DAVICOM_PHY is not set # CONFIG_DAX is not set # CONFIG_DCB is not set @@ -1280,6 +1264,7 @@ CONFIG_CRYPTO_SKCIPHER2=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_EFI is not set # CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B is not set +# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y @@ -1292,9 +1277,13 @@ CONFIG_DEBUG_FS_ALLOW_ALL=y # CONFIG_DEBUG_INFO_BTF is not set # CONFIG_DEBUG_INFO_COMPRESSED is not set # CONFIG_DEBUG_INFO_DWARF4 is not set +# CONFIG_DEBUG_INFO_DWARF5 is not set +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_INFO_REDUCED=y # CONFIG_DEBUG_INFO_SPLIT is not set +# CONFIG_DEBUG_IRQFLAGS is not set CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_KMAP_LOCAL is not set # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_KOBJECT_RELEASE is not set @@ -1390,7 +1379,6 @@ CONFIG_DEVPORT=y # CONFIG_DLHL60D is not set # CONFIG_DLM is not set # CONFIG_DM9000 is not set -# CONFIG_DMABUF_DEBUG is not set # CONFIG_DMABUF_HEAPS is not set # CONFIG_DMABUF_MOVE_NOTIFY is not set # CONFIG_DMABUF_SELFTESTS is not set @@ -1407,6 +1395,7 @@ CONFIG_DMA_DECLARE_COHERENT=y # CONFIG_DMA_ENGINE is not set # CONFIG_DMA_FENCE_TRACE is not set # CONFIG_DMA_JZ4780 is not set +# CONFIG_DMA_MAP_BENCHMARK is not set CONFIG_DMA_NONCOHERENT_MMAP=y # CONFIG_DMA_NOOP_OPS is not set # CONFIG_DMA_PERNUMA_CMA is not set @@ -1457,9 +1446,7 @@ CONFIG_DQL=y # CONFIG_DRM_AMD_DC_DCN3_0 is not set # CONFIG_DRM_AMD_DC_HDCP is not set # CONFIG_DRM_AMD_DC_SI is not set -# CONFIG_DRM_AMD_SECURE_DISPLAY is not set # CONFIG_DRM_ANALOGIX_ANX6345 is not set -# CONFIG_DRM_ANALOGIX_ANX7625 is not set # CONFIG_DRM_ANALOGIX_ANX78XX is not set # CONFIG_DRM_ARCPGU is not set # CONFIG_DRM_ARMADA is not set @@ -1467,7 +1454,6 @@ CONFIG_DQL=y # CONFIG_DRM_BOCHS is not set # CONFIG_DRM_CDNS_DSI is not set # CONFIG_DRM_CDNS_MHDP8546 is not set -# CONFIG_DRM_CHIPONE_ICN6211 is not set # CONFIG_DRM_CHRONTEL_CH7033 is not set # CONFIG_DRM_CIRRUS_QEMU is not set # CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS is not set @@ -1485,7 +1471,6 @@ CONFIG_DQL=y # CONFIG_DRM_FSL_DCU is not set # CONFIG_DRM_GM12U320 is not set # CONFIG_DRM_GMA500 is not set -# CONFIG_DRM_GUD is not set # CONFIG_DRM_HDLCD is not set # CONFIG_DRM_HISI_HIBMC is not set # CONFIG_DRM_HISI_KIRIN is not set @@ -1495,16 +1480,12 @@ CONFIG_DQL=y # CONFIG_DRM_I2C_NXP_TDA998X is not set # CONFIG_DRM_I2C_SIL164 is not set # CONFIG_DRM_I915 is not set -# CONFIG_DRM_ITE_IT66121 is not set -DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_KOMEDA is not set # CONFIG_DRM_LEGACY is not set # CONFIG_DRM_LIB_RANDOM is not set # CONFIG_DRM_LIMA is not set # CONFIG_DRM_LOAD_EDID_FIRMWARE is not set -# CONFIG_DRM_LONTIUM_LT8912B is not set # CONFIG_DRM_LONTIUM_LT9611 is not set -# CONFIG_DRM_LONTIUM_LT9611UXC is not set # CONFIG_DRM_LVDS_CODEC is not set # CONFIG_DRM_LVDS_ENCODER is not set # CONFIG_DRM_MALI_DISPLAY is not set @@ -1516,16 +1497,13 @@ DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_NWL_MIPI_DSI is not set # CONFIG_DRM_NXP_PTN3460 is not set # CONFIG_DRM_OMAP is not set -# CONFIG_DRM_PANEL_ABT_Y030XX067A is not set -# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_ARM_VERSATILE is not set +# CONFIG_DRM_PANEL_ASUS_Z00T_TM5P5_NT35596 is not set # CONFIG_DRM_PANEL_BOE_HIMAX8279D is not set # CONFIG_DRM_PANEL_BOE_TV101WUM_NL6 is not set -# CONFIG_DRM_PANEL_DSI_CM is not set # CONFIG_DRM_PANEL_ELIDA_KD35T133 is not set -# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set # CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02 is not set -# CONFIG_DRM_PANEL_KHADAS_TS050 is not set +# CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D is not set # CONFIG_DRM_PANEL_ILITEK_IL9322 is not set # CONFIG_DRM_PANEL_ILITEK_ILI9881C is not set # CONFIG_DRM_PANEL_INNOLUX_P079ZCA is not set @@ -1539,7 +1517,6 @@ DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_PANEL_MANTIX_MLAF057WE51 is not set # CONFIG_DRM_PANEL_NEC_NL8048HL11 is not set # CONFIG_DRM_PANEL_NOVATEK_NT35510 is not set -# CONFIG_DRM_PANEL_NOVATEK_NT36672A is not set # CONFIG_DRM_PANEL_NOVATEK_NT39016 is not set # CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO is not set # CONFIG_DRM_PANEL_ORISETECH_OTM8009A is not set @@ -1551,7 +1528,6 @@ DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_PANEL_ROCKTECH_JH057N00900 is not set # CONFIG_DRM_PANEL_RONBO_RB070D30 is not set # CONFIG_DRM_PANEL_SAMSUNG_LD9040 is not set -# CONFIG_DRM_PANEL_SAMSUNG_SOFEF00 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6D16D0 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2 is not set # CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03 is not set @@ -1568,7 +1544,6 @@ DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_PANEL_SITRONIX_ST7789V is not set # CONFIG_DRM_PANEL_SONY_ACX424AKP is not set # CONFIG_DRM_PANEL_SONY_ACX565AKM is not set -# CONFIG_DRM_PANEL_TDO_TL070WSH30 is not set # CONFIG_DRM_PANEL_TPO_TD028TTEC1 is not set # CONFIG_DRM_PANEL_TPO_TD043MTEA1 is not set # CONFIG_DRM_PANEL_TPO_TPG110 is not set @@ -1588,7 +1563,6 @@ DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_SII9234 is not set # CONFIG_DRM_SIL_SII8620 is not set # CONFIG_DRM_SIMPLE_BRIDGE is not set -# CONFIG_DRM_SIMPLEDRM is not set # CONFIG_DRM_STI is not set # CONFIG_DRM_STM is not set # CONFIG_DRM_SUN4I is not set @@ -1599,7 +1573,6 @@ DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_TI_SN65DSI86 is not set # CONFIG_DRM_TI_TFP410 is not set # CONFIG_DRM_TI_TPD12S015 is not set -# CONFIG_DRM_TI_SN65DSI83 is not set # CONFIG_DRM_TOSHIBA_TC358762 is not set # CONFIG_DRM_TOSHIBA_TC358764 is not set # CONFIG_DRM_TOSHIBA_TC358767 is not set @@ -1614,7 +1587,6 @@ DRM_I915_REQUEST_TIMEOUT=20000 # CONFIG_DRM_VKMS is not set # CONFIG_DRM_VMWGFX is not set # CONFIG_DRM_XEN is not set -# CONFIG_DRM_XEN_FRONTEND is not set # CONFIG_DS1682 is not set # CONFIG_DS1803 is not set # CONFIG_DS4424 is not set @@ -1690,7 +1662,6 @@ CONFIG_DVB_MAX_ADAPTERS=16 # CONFIG_DVB_MT312 is not set # CONFIG_DVB_MT352 is not set # CONFIG_DVB_MXL5XX is not set -# CONFIG_DVB_MXL692 is not set # CONFIG_DVB_NET is not set # CONFIG_DVB_NXT200X is not set # CONFIG_DVB_NXT6000 is not set @@ -1758,6 +1729,7 @@ CONFIG_DVB_MAX_ADAPTERS=16 # CONFIG_DWMAC_DWC_QOS_ETH is not set # CONFIG_DWMAC_INTEL_PLAT is not set # CONFIG_DWMAC_IPQ806X is not set +# CONFIG_DWMAC_LOONGSON is not set # CONFIG_DWMAC_LPC18XX is not set # CONFIG_DWMAC_MESON is not set # CONFIG_DWMAC_ROCKCHIP is not set @@ -1818,7 +1790,7 @@ CONFIG_EPOLL=y # CONFIG_ET131X is not set CONFIG_ETHERNET=y # CONFIG_ETHOC is not set -# CONFIG_ETHTOOL_NETLINK is not set +CONFIG_ETHTOOL_NETLINK=y CONFIG_EVENTFD=y # CONFIG_EVM is not set # CONFIG_EXFAT_FS is not set @@ -1847,7 +1819,6 @@ CONFIG_EXT4_USE_FOR_EXT2=y # CONFIG_EXTCON_RT8973A is not set # CONFIG_EXTCON_SM5502 is not set # CONFIG_EXTCON_USB_GPIO is not set -# CONFIG_EXTCON_USBC_TUSB320 is not set CONFIG_EXTRA_FIRMWARE="" CONFIG_EXTRA_TARGETS="" # CONFIG_EXYNOS_ADC is not set @@ -2055,15 +2026,12 @@ CONFIG_FSNOTIFY=y # CONFIG_FUSION_SPI is not set CONFIG_FUTEX=y CONFIG_FUTEX_PI=y -# CONFIG_FW_CACHE is not set # CONFIG_FW_CFG_SYSFS is not set CONFIG_FW_LOADER=y # CONFIG_FW_LOADER_COMPRESS is not set CONFIG_FW_LOADER_USER_HELPER=y CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y # CONFIG_FXAS21002C is not set -# CONFIG_FXLS8962AF_I2C is not set -# CONFIG_FXLS8962AF_SPI is not set # CONFIG_FXOS8700_I2C is not set # CONFIG_FXOS8700_SPI is not set CONFIG_GACT_PROB=y @@ -2190,6 +2158,7 @@ CONFIG_HARDENED_USERCOPY=y # CONFIG_HARDENED_USERCOPY_PAGESPAN is not set CONFIG_HARDEN_EL2_VECTORS=y # CONFIG_HARDLOCKUP_DETECTOR is not set +# CONFIG_HAVE_ARM_ARCH_TIMER is not set # CONFIG_HCALL_STATS is not set # CONFIG_HDC100X is not set # CONFIG_HDC2010 is not set @@ -2209,6 +2178,7 @@ CONFIG_HARDEN_EL2_VECTORS=y # CONFIG_HFSPLUS_FS_POSIX_ACL is not set # CONFIG_HFS_FS is not set # CONFIG_HFS_FS_POSIX_ACL is not set +# CONFIG_HI6421V600_IRQ is not set # CONFIG_HI8435 is not set # CONFIG_HIBERNATION is not set # CONFIG_HID is not set @@ -2240,7 +2210,6 @@ CONFIG_HARDEN_EL2_VECTORS=y # CONFIG_HID_ELO is not set # CONFIG_HID_EMS_FF is not set # CONFIG_HID_EZKEY is not set -# CONFIG_HID_FT260 is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GENERIC is not set # CONFIG_HID_GFRM is not set @@ -2279,7 +2248,6 @@ CONFIG_HARDEN_EL2_VECTORS=y # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PID is not set # CONFIG_HID_PLANTRONICS is not set -# CONFIG_HID_PLAYSTATION is not set # CONFIG_HID_PRIMAX is not set # CONFIG_HID_PRODIKEYS is not set # CONFIG_HID_REDRAGON is not set @@ -2289,7 +2257,6 @@ CONFIG_HARDEN_EL2_VECTORS=y # CONFIG_HID_SAITEK is not set # CONFIG_HID_SAMSUNG is not set # CONFIG_HID_SENSOR_HUB is not set -# CONFIG_HID_SEMITEK is not set # CONFIG_HID_SMARTJOYPLUS is not set # CONFIG_HID_SONY is not set # CONFIG_HID_SPEEDLINK is not set @@ -2430,9 +2397,6 @@ CONFIG_HZ_100=y # CONFIG_I2C_GPIO_FAULT_INJECTOR is not set # CONFIG_I2C_HELPER_AUTO is not set # CONFIG_I2C_HID is not set -# CONFIG_I2C_HID_ACPI is not set -# CONFIG_I2C_HID_OF is not set -# CONFIG_I2C_HID_OF_GOODIX is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_IBM_IIC is not set # CONFIG_I2C_IMG is not set @@ -2484,6 +2448,7 @@ CONFIG_HZ_100=y # CONFIG_I2C_VERSATILE is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VIRTIO is not set # CONFIG_I2C_XILINX is not set # CONFIG_I3C is not set # CONFIG_I40E is not set @@ -2505,7 +2470,7 @@ CONFIG_HZ_100=y # CONFIG_ICP10100 is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_ICS932S401 is not set -♯ CONFIG_ICST is not set +# CONFIG_ICST is not set # CONFIG_IDE is not set # CONFIG_IDEAPAD_LAPTOP is not set # CONFIG_IDE_GD is not set @@ -2541,7 +2506,6 @@ CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 # CONFIG_IIO_ST_ACCEL_3AXIS is not set # CONFIG_IIO_ST_GYRO_3AXIS is not set # CONFIG_IIO_ST_LSM6DSX is not set -# CONFIG_IIO_ST_LSM9DS0 is not set # CONFIG_IIO_ST_MAGN_3AXIS is not set # CONFIG_IIO_ST_PRESS is not set # CONFIG_IIO_SW_DEVICE is not set @@ -2615,7 +2579,6 @@ CONFIG_INOTIFY_USER=y # CONFIG_INPUT_BMA150 is not set # CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_CMA3000 is not set -# CONFIG_INPUT_DA7280_HAPTICS is not set # CONFIG_INPUT_DRV260X_HAPTICS is not set # CONFIG_INPUT_DRV2665_HAPTICS is not set # CONFIG_INPUT_DRV2667_HAPTICS is not set @@ -2632,7 +2595,6 @@ CONFIG_INOTIFY_USER=y # CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set # CONFIG_INPUT_IMS_PCU is not set # CONFIG_INPUT_IQS269A is not set -# CONFIG_INPUT_IQS626A is not set # CONFIG_INPUT_JOYDEV is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_KEYBOARD is not set @@ -2738,6 +2700,7 @@ CONFIG_IO_WQ=y # CONFIG_IPV6_FOU is not set # CONFIG_IPV6_FOU_TUNNEL is not set # CONFIG_IPV6_ILA is not set +# CONFIG_IPV6_IOAM6_LWTUNNEL is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_IPV6_MROUTE is not set # CONFIG_IPV6_MROUTE_MULTIPLE_TABLES is not set @@ -2804,7 +2767,6 @@ CONFIG_IP_ROUTE_VERBOSE=y # CONFIG_IP_VS is not set # CONFIG_IP_VS_MH is not set CONFIG_IP_VS_MH_TAB_INDEX=10 -# CONFIG_IP_VS_TWOS is not set # CONFIG_IRDA is not set # CONFIG_IRQSOFF_TRACER is not set # CONFIG_IRQ_ALL_CPUS is not set @@ -2952,6 +2914,7 @@ CONFIG_KERNFS=y # CONFIG_KGDB is not set # CONFIG_KMEMCHECK is not set # CONFIG_KMX61 is not set +# CONFIG_KPC2000 is not set # CONFIG_KPROBES is not set # CONFIG_KPROBES_SANITY_TEST is not set # CONFIG_KPROBE_EVENTS_ON_NOTRACE is not set @@ -2969,7 +2932,6 @@ CONFIG_KUSER_HELPERS=y # CONFIG_KVM_GUEST is not set # CONFIG_KVM_INTEL is not set # CONFIG_KVM_WERROR is not set -# CONFIG_KVM_XEN is not set # CONFIG_KXCJK1013 is not set # CONFIG_KXSD9 is not set # CONFIG_L2TP is not set @@ -3090,6 +3052,7 @@ CONFIG_LINEAR_RANGES=y # CONFIG_LIQUIDIO is not set # CONFIG_LIQUIDIO_VF is not set # CONFIG_LIS3L02DQ is not set +# CONFIG_LITEX_LITEETH is not set # CONFIG_LITEX_SOC_CONTROLLER is not set # CONFIG_LKDTM is not set CONFIG_LLC=y @@ -3177,8 +3140,8 @@ CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" # CONFIG_MANAGER_SBS is not set # CONFIG_MANDATORY_FILE_LOCKING is not set # CONFIG_MANGLE_BOOTARGS is not set -# CONFIG_MARVELL_88X2222_PHY is not set # CONFIG_MARVELL_10G_PHY is not set +# CONFIG_MARVELL_88X2222_PHY is not set # CONFIG_MARVELL_PHY is not set # CONFIG_MAX1027 is not set # CONFIG_MAX11100 is not set @@ -3198,6 +3161,7 @@ CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" # CONFIG_MAX63XX_WATCHDOG is not set # CONFIG_MAX9611 is not set # CONFIG_MAXIM_THERMOCOUPLE is not set +# CONFIG_MAXLINEAR_GPHY is not set CONFIG_MAY_USE_DEVLINK=y # CONFIG_MB1232 is not set # CONFIG_MC3230 is not set @@ -3212,6 +3176,7 @@ CONFIG_MAY_USE_DEVLINK=y # CONFIG_MCP4725 is not set # CONFIG_MCP4922 is not set # CONFIG_MCPM is not set +# CONFIG_MCTP is not set # CONFIG_MD is not set # CONFIG_MDIO_BCM_UNIMAC is not set # CONFIG_MDIO_BITBANG is not set @@ -3228,6 +3193,7 @@ CONFIG_MAY_USE_DEVLINK=y # CONFIG_MDIO_OCTEON is not set # CONFIG_MDIO_THUNDER is not set # CONFIG_MDIO_XPCS is not set +# CONFIG_MDM_GCC_9607 is not set # CONFIG_MD_FAULTY is not set # CONFIG_MEDIATEK_GE_PHY is not set # CONFIG_MEDIA_ANALOG_TV_SUPPORT is not set @@ -3328,8 +3294,8 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_EXYNOS_LPASS is not set # CONFIG_MFD_GATEWORKS_GSC is not set # CONFIG_MFD_HI6421_PMIC is not set -# CONFIG_MFD_INTEL_PMT is not set # CONFIG_MFD_INTEL_M10_BMC is not set +# CONFIG_MFD_INTEL_PMT is not set # CONFIG_MFD_INTEL_QUARK_I2C_GPIO is not set # CONFIG_MFD_IQS62X is not set # CONFIG_MFD_JANZ_CMODIO is not set @@ -3372,6 +3338,8 @@ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_MFD_ROHM_BD71828 is not set # CONFIG_MFD_ROHM_BD718XX is not set # CONFIG_MFD_ROHM_BD957XMUF is not set +# CONFIG_MFD_RSMU_I2C is not set +# CONFIG_MFD_RSMU_SPI is not set # CONFIG_MFD_RT4831 is not set # CONFIG_MFD_RT5033 is not set # CONFIG_MFD_RTSX_PCI is not set @@ -3461,7 +3429,6 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_MLX4_CORE is not set # CONFIG_MLX4_EN is not set # CONFIG_MLX5_CORE is not set -# CONFIG_MLX5_SF is not set # CONFIG_MLX90614 is not set # CONFIG_MLX90632 is not set # CONFIG_MLXFW is not set @@ -3525,20 +3492,20 @@ CONFIG_MMC_BLOCK_MINORS=8 CONFIG_MMU=y CONFIG_MMU_GATHER_RCU_TABLE_FREE=y CONFIG_MMU_GATHER_TABLE_FREE=y +CONFIG_MODPROBE_PATH="/sbin/modprobe" CONFIG_MODULES=y # CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS is not set # CONFIG_MODULE_COMPRESS is not set # CONFIG_MODULE_COMPRESS_GZIP is not set +CONFIG_MODULE_COMPRESS_NONE=y # CONFIG_MODULE_COMPRESS_XZ is not set # CONFIG_MODULE_COMPRESS_ZSTD is not set -CONFIG_MODULE_COMPRESS_NONE=y # CONFIG_MODULE_FORCE_LOAD is not set # CONFIG_MODULE_FORCE_UNLOAD is not set # CONFIG_MODULE_SIG is not set # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_MODULE_STRIPPED=y CONFIG_MODULE_UNLOAD=y -CONFIG_MODPROBE_PATH="/sbin/modprobe" # CONFIG_MODVERSIONS is not set # CONFIG_MOST is not set # CONFIG_MOTORCOMM_PHY is not set @@ -3570,6 +3537,8 @@ CONFIG_MPTCP_IPV6=y CONFIG_MSDOS_PARTITION=y # CONFIG_MSI_BITMAP_SELFTEST is not set # CONFIG_MSI_LAPTOP is not set +# CONFIG_MSM_GCC_8953 is not set +# CONFIG_MSM_MMCC_8994 is not set # CONFIG_MST_IRQ is not set CONFIG_MTD=y # CONFIG_MTD_ABSENT is not set @@ -3612,6 +3581,7 @@ CONFIG_MTD_MAP_BANK_WIDTH_2=y CONFIG_MTD_MAP_BANK_WIDTH_4=y # CONFIG_MTD_MAP_BANK_WIDTH_8 is not set # CONFIG_MTD_MCHP23K256 is not set +# CONFIG_MTD_MCHP48L640 is not set # CONFIG_MTD_MT81xx_NOR is not set # CONFIG_MTD_MTDRAM is not set # CONFIG_MTD_MYLOADER_PARTS is not set @@ -3649,6 +3619,7 @@ CONFIG_MTD_NAND_DENALI_SCRATCH_REG_ADDR=0xff108018 # CONFIG_MTD_NAND_GPMI_NAND is not set # CONFIG_MTD_NAND_HISI504 is not set CONFIG_MTD_NAND_IDS=y +# CONFIG_MTD_NAND_INTEL_LGM is not set # CONFIG_MTD_NAND_JZ4740 is not set # CONFIG_MTD_NAND_MPC5121_NFC is not set # CONFIG_MTD_NAND_MTK is not set @@ -3665,6 +3636,7 @@ CONFIG_MTD_NAND_IDS=y # CONFIG_MTD_NAND_PXA3xx is not set # CONFIG_MTD_NAND_RB4XX is not set # CONFIG_MTD_NAND_RB750 is not set +# CONFIG_MTD_NAND_RB91X is not set # CONFIG_MTD_NAND_RICOH is not set # CONFIG_MTD_NAND_S3C2410 is not set # CONFIG_MTD_NAND_SHARPSL is not set @@ -3705,6 +3677,9 @@ CONFIG_MTD_ROOTFS_ROOT_DEV=y # CONFIG_MTD_SPINAND_MT29F is not set # CONFIG_MTD_SPI_NAND is not set # CONFIG_MTD_SPI_NOR is not set +# CONFIG_MTD_SPI_NOR_SWP_DISABLE is not set +CONFIG_MTD_SPI_NOR_SWP_DISABLE_ON_VOLATILE=y +# CONFIG_MTD_SPI_NOR_SWP_KEEP is not set # CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set CONFIG_MTD_SPI_NOR_USE_4K_SECTORS_LIMIT=4096 CONFIG_MTD_SPLIT=y @@ -3769,7 +3744,6 @@ CONFIG_NETDEVICES=y # CONFIG_NETFILTER_NETLINK is not set # CONFIG_NETFILTER_NETLINK_ACCT is not set # CONFIG_NETFILTER_NETLINK_GLUE_CT is not set -# CONFIG_NETFILTER_NETLINK_HOOK is not set # CONFIG_NETFILTER_NETLINK_LOG is not set # CONFIG_NETFILTER_NETLINK_OSF is not set # CONFIG_NETFILTER_NETLINK_QUEUE is not set @@ -3845,7 +3819,6 @@ CONFIG_NETDEVICES=y # CONFIG_NETFILTER_XT_TARGET_TEE is not set # CONFIG_NETFILTER_XT_TARGET_TPROXY is not set # CONFIG_NETFILTER_XT_TARGET_TRACE is not set -# CONFIG_NETFS_STATS is not set # CONFIG_NETLABEL is not set # CONFIG_NETLINK_DIAG is not set # CONFIG_NETLINK_MMAP is not set @@ -3919,21 +3892,27 @@ CONFIG_NET_CORE=y # CONFIG_NET_DSA_TAG_8021Q is not set # CONFIG_NET_DSA_TAG_AR9331 is not set # CONFIG_NET_DSA_TAG_BRCM is not set +# CONFIG_NET_DSA_TAG_BRCM_LEGACY is not set # CONFIG_NET_DSA_TAG_BRCM_PREPEND is not set # CONFIG_NET_DSA_TAG_DSA is not set # CONFIG_NET_DSA_TAG_EDSA is not set # CONFIG_NET_DSA_TAG_GSWIP is not set +# CONFIG_NET_DSA_TAG_HELLCREEK is not set # CONFIG_NET_DSA_TAG_KSZ is not set # CONFIG_NET_DSA_TAG_LAN9303 is not set # CONFIG_NET_DSA_TAG_MTK is not set # CONFIG_NET_DSA_TAG_OCELOT is not set +# CONFIG_NET_DSA_TAG_OCELOT_8021Q is not set # CONFIG_NET_DSA_TAG_QCA is not set # CONFIG_NET_DSA_TAG_RTL4_A is not set # CONFIG_NET_DSA_TAG_SJA1105 is not set # CONFIG_NET_DSA_TAG_TRAILER is not set +# CONFIG_NET_DSA_TAG_XRS700X is not set # CONFIG_NET_DSA_VITESSE_VSC73XX is not set # CONFIG_NET_DSA_VITESSE_VSC73XX_PLATFORM is not set # CONFIG_NET_DSA_VITESSE_VSC73XX_SPI is not set +# CONFIG_NET_DSA_XRS700X_I2C is not set +# CONFIG_NET_DSA_XRS700X_MDIO is not set # CONFIG_NET_EMATCH is not set # CONFIG_NET_EMATCH_CANID is not set # CONFIG_NET_EMATCH_CMP is not set @@ -4044,12 +4023,13 @@ CONFIG_NET_VENDOR_HUAWEI=y CONFIG_NET_VENDOR_I825XX=y CONFIG_NET_VENDOR_IBM=y CONFIG_NET_VENDOR_INTEL=y +# CONFIG_NET_VENDOR_LITEX is not set CONFIG_NET_VENDOR_MARVELL=y CONFIG_NET_VENDOR_MELLANOX=y CONFIG_NET_VENDOR_MICREL=y CONFIG_NET_VENDOR_MICROCHIP=y CONFIG_NET_VENDOR_MICROSEMI=y -CONFIG_NET_VENDOR_MICROSOFT=y +# CONFIG_NET_VENDOR_MICROSOFT is not set CONFIG_NET_VENDOR_MYRI=y CONFIG_NET_VENDOR_NATSEMI=y CONFIG_NET_VENDOR_NETERION=y @@ -4115,7 +4095,6 @@ CONFIG_NFS_V3=y # CONFIG_NFT_FLOW_OFFLOAD is not set # CONFIG_NFT_OBJREF is not set # CONFIG_NFT_OSF is not set -# CONFIG_NFT_REJECT_NETDEV is not set # CONFIG_NFT_RT is not set # CONFIG_NFT_SET_BITMAP is not set # CONFIG_NFT_SOCKET is not set @@ -4158,7 +4137,6 @@ CONFIG_NF_CONNTRACK_PROCFS=y # CONFIG_NF_LOG_ARP is not set # CONFIG_NF_LOG_BRIDGE is not set # CONFIG_NF_LOG_IPV4 is not set -# CONFIG_NF_LOG_IPV6 is not set # CONFIG_NF_LOG_NETDEV is not set # CONFIG_NF_LOG_SYSLOG is not set # CONFIG_NF_NAT is not set @@ -4263,13 +4241,13 @@ CONFIG_NMI_LOG_BUF_SHIFT=13 # CONFIG_NO_HZ_IDLE is not set # CONFIG_NS83820 is not set # CONFIG_NTB is not set +# CONFIG_NTFS3_FS is not set # CONFIG_NTFS_DEBUG is not set # CONFIG_NTFS_FS is not set # CONFIG_NTFS_RW is not set # CONFIG_NTP_PPS is not set # CONFIG_NULL_TTY is not set # CONFIG_NUMA is not set -# CONFIG_NVIDIA_CARMEL_CNP_ERRATUM is not set # CONFIG_NVM is not set # CONFIG_NVMEM is not set # CONFIG_NVMEM_BCM_OCOTP is not set @@ -4428,6 +4406,7 @@ CONFIG_PCIE_BUS_DEFAULT=y # CONFIG_PCIE_IPROC is not set # CONFIG_PCIE_KIRIN is not set # CONFIG_PCIE_LAYERSCAPE_GEN4 is not set +# CONFIG_PCIE_MICROCHIP_HOST is not set # CONFIG_PCIE_PTM is not set # CONFIG_PCIE_XILINX is not set # CONFIG_PCIPCWATCHDOG is not set @@ -4447,7 +4426,6 @@ CONFIG_PCIE_BUS_DEFAULT=y # CONFIG_PCI_J721E_HOST is not set # CONFIG_PCI_LAYERSCAPE is not set # CONFIG_PCI_MESON is not set -# CONFIG_PCIE_MICROCHIP_HOST is not set # CONFIG_PCI_MSI is not set # CONFIG_PCI_PASID is not set # CONFIG_PCI_PF_STUB is not set @@ -4502,6 +4480,7 @@ CONFIG_PCI_SYSCALL=y # CONFIG_PHY_CADENCE_SALVO is not set # CONFIG_PHY_CADENCE_SIERRA is not set # CONFIG_PHY_CADENCE_TORRENT is not set +# CONFIG_PHY_CAN_TRANSCEIVER is not set # CONFIG_PHY_CPCAP_USB is not set # CONFIG_PHY_EXYNOS_DP_VIDEO is not set # CONFIG_PHY_EXYNOS_MIPI_VIDEO is not set @@ -4524,25 +4503,31 @@ CONFIG_PCI_SYSCALL=y # CONFIG_PID_NS is not set CONFIG_PINCONF=y # CONFIG_PINCTRL is not set -# CONFIG_PINCTRL_ALDERLAKE is not set # CONFIG_PINCTRL_AMD is not set # CONFIG_PINCTRL_AXP209 is not set # CONFIG_PINCTRL_CEDARFORK is not set -# CONFIG_PINCTRL_ELKHARTLAKE is not set # CONFIG_PINCTRL_EXYNOS is not set # CONFIG_PINCTRL_EXYNOS5440 is not set # CONFIG_PINCTRL_ICELAKE is not set # CONFIG_PINCTRL_INGENIC is not set -# CONFIG_PINCTRL_LAKEFIELD is not set +# CONFIG_PINCTRL_LPASS_LPI is not set # CONFIG_PINCTRL_MCP23S08 is not set +# CONFIG_PINCTRL_MDM9607 is not set # CONFIG_PINCTRL_MICROCHIP_SGPIO is not set +# CONFIG_PINCTRL_MSM8953 is not set # CONFIG_PINCTRL_MSM8X74 is not set # CONFIG_PINCTRL_MT6779 is not set # CONFIG_PINCTRL_MT8167 is not set # CONFIG_PINCTRL_MT8192 is not set # CONFIG_PINCTRL_MTK_V2 is not set # CONFIG_PINCTRL_OCELOT is not set +# CONFIG_PINCTRL_SC7280 is not set +# CONFIG_PINCTRL_SC8180X is not set +# CONFIG_PINCTRL_SDX55 is not set CONFIG_PINCTRL_SINGLE=y +# CONFIG_PINCTRL_SM6115 is not set +# CONFIG_PINCTRL_SM6125 is not set +# CONFIG_PINCTRL_SM8350 is not set # CONFIG_PINCTRL_STMFX is not set # CONFIG_PINCTRL_SX150X is not set # CONFIG_PING is not set @@ -4627,6 +4612,7 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y # CONFIG_PRIME_NUMBERS is not set CONFIG_PRINTK=y # CONFIG_PRINTK_CALLER is not set +# CONFIG_PRINTK_INDEX is not set CONFIG_PRINTK_NMI=y CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 # CONFIG_PRINTK_TIME is not set @@ -4664,25 +4650,26 @@ CONFIG_PROC_SYSCTL=y # CONFIG_PUBLIC_KEY_ALGO_RSA is not set # CONFIG_PVPANIC is not set # CONFIG_PWM is not set -# CONFIG_PWM_ATMEL_TCB is not set # CONFIG_PWM_DEBUG is not set -# CONFIG_PWM_DWC is not set # CONFIG_PWM_FSL_FTM is not set # CONFIG_PWM_PCA9685 is not set -# CONFIG_PWM_RASPBERRYPI_POE is not set CONFIG_PWRSEQ_EMMC=y # CONFIG_PWRSEQ_SD8787 is not set CONFIG_PWRSEQ_SIMPLE=y # CONFIG_QCA7000 is not set # CONFIG_QCA7000_SPI is not set # CONFIG_QCA7000_UART is not set +# CONFIG_QCOM_A7PLL is not set # CONFIG_QCOM_EMAC is not set # CONFIG_QCOM_FALKOR_ERRATUM_1003 is not set # CONFIG_QCOM_FALKOR_ERRATUM_1009 is not set # CONFIG_QCOM_FALKOR_ERRATUM_E1041 is not set +# CONFIG_QCOM_GPI_DMA is not set # CONFIG_QCOM_HIDMA is not set # CONFIG_QCOM_HIDMA_MGMT is not set +# CONFIG_QCOM_LMH is not set # CONFIG_QCOM_QDF2400_ERRATUM_0065 is not set +# CONFIG_QCOM_QMI_HELPERS is not set # CONFIG_QCOM_SPMI_ADC5 is not set # CONFIG_QCOM_SPMI_IADC is not set # CONFIG_QCOM_SPMI_TEMP_ALARM is not set @@ -4726,7 +4713,6 @@ CONFIG_PWRSEQ_SIMPLE=y # CONFIG_RALINK is not set # CONFIG_RANDOM32_SELFTEST is not set # CONFIG_RANDOMIZE_BASE is not set -# CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT is not set # CONFIG_RANDOM_TRUST_BOOTLOADER is not set # CONFIG_RANDOM_TRUST_CPU is not set # CONFIG_RAPIDIO is not set @@ -4737,15 +4723,13 @@ CONFIG_PWRSEQ_SIMPLE=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_EQS_DEBUG is not set # CONFIG_RCU_EXPEDITE_BOOT is not set -CONFIG_RCU_EXPERT=y -CONFIG_RCU_FANOUT=32 -CONFIG_RCU_FANOUT_LEAF=16 -# CONFIG_RCU_FAST_NO_HZ is not set +# CONFIG_RCU_EXPERT is not set CONFIG_RCU_KTHREAD_PRIO=0 -# CONFIG_RCU_NOCB_CPU is not set +CONFIG_RCU_NEED_SEGCBLIST=y # CONFIG_RCU_PERF_TEST is not set # CONFIG_RCU_REF_SCALE_TEST is not set # CONFIG_RCU_SCALE_TEST is not set +CONFIG_RCU_STALL_COMMON=y # CONFIG_RCU_STRICT_GRACE_PERIOD is not set # CONFIG_RCU_TORTURE_TEST is not set CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY=3 @@ -4807,6 +4791,7 @@ CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY=3 # CONFIG_REGULATOR_MP886X is not set # CONFIG_REGULATOR_MPQ7920 is not set # CONFIG_REGULATOR_MT6311 is not set +# CONFIG_REGULATOR_MT6315 is not set # CONFIG_REGULATOR_PCA9450 is not set # CONFIG_REGULATOR_PF8X00 is not set # CONFIG_REGULATOR_PFUZE100 is not set @@ -4819,6 +4804,8 @@ CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY=3 # CONFIG_REGULATOR_RT6160 is not set # CONFIG_REGULATOR_RT6245 is not set # CONFIG_REGULATOR_RTMV20 is not set +# CONFIG_REGULATOR_RTQ2134 is not set +# CONFIG_REGULATOR_RTQ6752 is not set # CONFIG_REGULATOR_SLG51000 is not set # CONFIG_REGULATOR_SY8106A is not set # CONFIG_REGULATOR_SY8824X is not set @@ -4853,7 +4840,6 @@ CONFIG_REISERFS_FS_XATTR=y # CONFIG_RESET_LPC18XX is not set # CONFIG_RESET_MESON is not set # CONFIG_RESET_PISTACHIO is not set -# CONFIG_RESET_RASPBERRYPI is not set # CONFIG_RESET_SOCFPGA is not set # CONFIG_RESET_STM32 is not set # CONFIG_RESET_SUNXI is not set @@ -4877,7 +4863,6 @@ CONFIG_RFKILL=y # CONFIG_ROMFS_FS is not set # CONFIG_ROSE is not set # CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPI_POE_POWER is not set # CONFIG_RPMSG_QCOM_GLINK_RPM is not set # CONFIG_RPMSG_VIRTIO is not set # CONFIG_RPR0521 is not set @@ -4917,6 +4902,7 @@ CONFIG_RTC_DRV_CMOS=y # CONFIG_RTC_DRV_FM3130 is not set # CONFIG_RTC_DRV_FTRTC010 is not set # CONFIG_RTC_DRV_GENERIC is not set +# CONFIG_RTC_DRV_GOLDFISH is not set # CONFIG_RTC_DRV_HID_SENSOR_TIME is not set # CONFIG_RTC_DRV_HYM8563 is not set # CONFIG_RTC_DRV_ISL12022 is not set @@ -5034,14 +5020,12 @@ CONFIG_SATA_MOBILE_LPM_POLICY=0 CONFIG_SBITMAP=y # CONFIG_SC92031 is not set # CONFIG_SCA3000 is not set -# CONFIG_SCA3300 is not set # CONFIG_SCACHE_DEBUGFS is not set # CONFIG_SCC is not set # CONFIG_SCD30_CORE is not set # CONFIG_SCF_TORTURE_TEST is not set # CONFIG_SCHEDSTATS is not set # CONFIG_SCHED_AUTOGROUP is not set -# CONFIG_SCHED_CORE is not set # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_HRTICK=y # CONFIG_SCHED_MC is not set @@ -5139,10 +5123,19 @@ CONFIG_SCSI_PROC_FS=y # CONFIG_SCSI_ULTRASTOR is not set # CONFIG_SCSI_VIRTIO is not set # CONFIG_SCSI_WD719X is not set +# CONFIG_SC_CAMCC_7180 is not set +# CONFIG_SC_DISPCC_7280 is not set +# CONFIG_SC_GCC_7280 is not set +# CONFIG_SC_GCC_8180X is not set +# CONFIG_SC_GPUCC_7280 is not set +# CONFIG_SC_VIDEOCC_7280 is not set # CONFIG_SCx200_ACB is not set # CONFIG_SDIO_UART is not set +# CONFIG_SDM_GPUCC_660 is not set +# CONFIG_SDM_MMCC_660 is not set # CONFIG_SDR_MAX2175 is not set # CONFIG_SDR_PLATFORM_DRIVERS is not set +# CONFIG_SDX_GCC_55 is not set # CONFIG_SD_ADC_MODULATOR is not set # CONFIG_SECCOMP is not set # CONFIG_SECCOMP_CACHE_DEBUG is not set @@ -5153,6 +5146,7 @@ CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_SECURITY_DMESG_RESTRICT=y # CONFIG_SECURITY_LOADPIN is not set # CONFIG_SECURITY_LOCKDOWN_LSM is not set +# CONFIG_SECURITY_NETWORK_XFRM is not set # CONFIG_SECURITY_PATH is not set # CONFIG_SECURITY_SAFESETID is not set # CONFIG_SECURITY_SELINUX_AVC_STATS is not set @@ -5205,13 +5199,10 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_SENSORS_BEL_PFE is not set # CONFIG_SENSORS_BH1770 is not set # CONFIG_SENSORS_BH1780 is not set -# CONFIG_SENSORS_BPA_RS600 is not set # CONFIG_SENSORS_CORETEMP is not set # CONFIG_SENSORS_CORSAIR_CPRO is not set -# CONFIG_SENSORS_CORSAIR_PSU is not set # CONFIG_SENSORS_DELL_SMM is not set # CONFIG_SENSORS_DME1737 is not set -# CONFIG_SENSORS_DPS920AB is not set # CONFIG_SENSORS_DRIVETEMP is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_DS620 is not set @@ -5223,7 +5214,6 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_SENSORS_F75375S is not set # CONFIG_SENSORS_FAM15H_POWER is not set # CONFIG_SENSORS_FSCHMD is not set -# CONFIG_SENSORS_FSP_3Y is not set # CONFIG_SENSORS_FTSTEUTATES is not set # CONFIG_SENSORS_G760A is not set # CONFIG_SENSORS_G762 is not set @@ -5246,7 +5236,6 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_SENSORS_INA3221 is not set # CONFIG_SENSORS_INSPUR_IPSPS is not set # CONFIG_SENSORS_IR35221 is not set -# CONFIG_SENSORS_IR36021 is not set # CONFIG_SENSORS_IR38064 is not set # CONFIG_SENSORS_IRPS5401 is not set # CONFIG_SENSORS_ISL29018 is not set @@ -5293,7 +5282,6 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_SENSORS_LTQ_CPUTEMP is not set # CONFIG_SENSORS_MAX1111 is not set # CONFIG_SENSORS_MAX127 is not set -# CONFIG_SENSORS_MAX15301 is not set # CONFIG_SENSORS_MAX16064 is not set # CONFIG_SENSORS_MAX16065 is not set # CONFIG_SENSORS_MAX1619 is not set @@ -5314,7 +5302,6 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_SENSORS_MAX6697 is not set # CONFIG_SENSORS_MAX8688 is not set # CONFIG_SENSORS_MCP3021 is not set -# CONFIG_SENSORS_MP2888 is not set # CONFIG_SENSORS_MP2975 is not set # CONFIG_SENSORS_MR75203 is not set # CONFIG_SENSORS_NCT6683 is not set @@ -5323,21 +5310,18 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_SENSORS_NCT7904 is not set # CONFIG_SENSORS_NPCM7XX is not set # CONFIG_SENSORS_NSA320 is not set -# CONFIG_SENSORS_NZXT_KRAKEN2 is not set # CONFIG_SENSORS_NTC_THERMISTOR is not set # CONFIG_SENSORS_OCC_P8_I2C is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_PC87427 is not set # CONFIG_SENSORS_PCF8591 is not set -# CONFIG_SENSORS_PIM4328 is not set -# CONFIG_SENSORS_PM6764TR is not set # CONFIG_SENSORS_PMBUS is not set # CONFIG_SENSORS_POWR1220 is not set # CONFIG_SENSORS_PWM_FAN is not set # CONFIG_SENSORS_PXE1610 is not set -# CONFIG_SENSORS_Q54SJ108A2 is not set # CONFIG_SENSORS_RM3100_I2C is not set # CONFIG_SENSORS_RM3100_SPI is not set +# CONFIG_SENSORS_SBRMI is not set # CONFIG_SENSORS_SBTSI is not set # CONFIG_SENSORS_SCH5627 is not set # CONFIG_SENSORS_SCH5636 is not set @@ -5352,7 +5336,6 @@ CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_SENSORS_SMSC47B397 is not set # CONFIG_SENSORS_SMSC47M1 is not set # CONFIG_SENSORS_SMSC47M192 is not set -# CONFIG_SENSORS_STPDDC60 is not set # CONFIG_SENSORS_STTS751 is not set # CONFIG_SENSORS_TC654 is not set # CONFIG_SENSORS_TC74 is not set @@ -5526,6 +5509,7 @@ CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SLUB_MEMCG_SYSFS_ON is not set # CONFIG_SLUB_STATS is not set # CONFIG_SMARTJOYPLUS_FF is not set +# CONFIG_SMB_SERVER is not set # CONFIG_SMC911X is not set # CONFIG_SMC9194 is not set # CONFIG_SMC91X is not set @@ -5535,7 +5519,12 @@ CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SMSC_PHY is not set # CONFIG_SMS_SDIO_DRV is not set # CONFIG_SMS_USB_DRV is not set +# CONFIG_SM_CAMCC_8250 is not set # CONFIG_SM_FTL is not set +# CONFIG_SM_GCC_6115 is not set +# CONFIG_SM_GCC_6125 is not set +# CONFIG_SM_GCC_6350 is not set +# CONFIG_SM_GCC_8350 is not set # CONFIG_SND is not set # CONFIG_SND_AC97_POWER_SAVE is not set # CONFIG_SND_AD1816A is not set @@ -5682,8 +5671,6 @@ CONFIG_SND_PROC_FS=y # CONFIG_SND_SIS7019 is not set # CONFIG_SND_SOC is not set # CONFIG_SND_SOC_AC97_CODEC is not set -# CONFIG_SND_SOC_ADAU1372_I2C is not set -# CONFIG_SND_SOC_ADAU1372_SPI is not set # CONFIG_SND_SOC_ADAU1701 is not set # CONFIG_SND_SOC_ADAU1761_I2C is not set # CONFIG_SND_SOC_ADAU1761_SPI is not set @@ -5746,7 +5733,6 @@ CONFIG_SND_PROC_FS=y # CONFIG_SND_SOC_FSL_SAI is not set # CONFIG_SND_SOC_FSL_SPDIF is not set # CONFIG_SND_SOC_FSL_SSI is not set -# CONFIG_SND_SOC_FSL_XCVR is not set # CONFIG_SND_SOC_GTM601 is not set # CONFIG_SND_SOC_ICS43432 is not set # CONFIG_SND_SOC_IMG is not set @@ -5794,10 +5780,6 @@ CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y # CONFIG_SND_SOC_JZ4725B_CODEC is not set # CONFIG_SND_SOC_JZ4740_CODEC is not set # CONFIG_SND_SOC_JZ4770_CODEC is not set -# CONFIG_SND_SOC_LPASS_WSA_MACRO is not set -# CONFIG_SND_SOC_LPASS_VA_MACRO is not set -# CONFIG_SND_SOC_LPASS_RX_MACRO is not set -# CONFIG_SND_SOC_LPASS_TX_MACRO is not set # CONFIG_SND_SOC_MA120X0P is not set # CONFIG_SND_SOC_MAX9759 is not set # CONFIG_SND_SOC_MAX98088 is not set @@ -5822,7 +5804,6 @@ CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y # CONFIG_SND_SOC_MT8173 is not set # CONFIG_SND_SOC_MT8183 is not set # CONFIG_SND_SOC_MTK_BTCVSD is not set -# CONFIG_SND_SOC_NAU8315 is not set # CONFIG_SND_SOC_NAU8540 is not set # CONFIG_SND_SOC_NAU8810 is not set # CONFIG_SND_SOC_NAU8822 is not set @@ -5844,17 +5825,13 @@ CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y # CONFIG_SND_SOC_RK3328 is not set # CONFIG_SND_SOC_RT5616 is not set # CONFIG_SND_SOC_RT5631 is not set -# CONFIG_SND_SOC_RT5640 is not set -# CONFIG_SND_SOC_RT5659 is not set # CONFIG_SND_SOC_RT5677_SPI is not set # CONFIG_SND_SOC_SGTL5000 is not set # CONFIG_SND_SOC_SIMPLE_AMPLIFIER is not set -# CONFIG_SND_SOC_SIMPLE_MUX is not set # CONFIG_SND_SOC_SIRF_AUDIO_CODEC is not set # CONFIG_SND_SOC_SOF_TOPLEVEL is not set # CONFIG_SND_SOC_SPDIF is not set # CONFIG_SND_SOC_SSM2305 is not set -# CONFIG_SND_SOC_SSM2518 is not set # CONFIG_SND_SOC_SSM2602_I2C is not set # CONFIG_SND_SOC_SSM2602_SPI is not set # CONFIG_SND_SOC_SSM4567 is not set @@ -5871,7 +5848,6 @@ CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y # CONFIG_SND_SOC_TAS6424 is not set # CONFIG_SND_SOC_TDA7419 is not set # CONFIG_SND_SOC_TFA9879 is not set -# CONFIG_SND_SOC_TFA989X is not set # CONFIG_SND_SOC_TLV320ADCX140 is not set # CONFIG_SND_SOC_TLV320AIC23_I2C is not set # CONFIG_SND_SOC_TLV320AIC23_SPI is not set @@ -5879,8 +5855,6 @@ CONFIG_SND_SOC_INTEL_SST_TOPLEVEL=y # CONFIG_SND_SOC_TLV320AIC32X4_I2C is not set # CONFIG_SND_SOC_TLV320AIC32X4_SPI is not set # CONFIG_SND_SOC_TLV320AIC3X is not set -# CONFIG_SND_SOC_TLV320AIC3X_I2C is not set -# CONFIG_SND_SOC_TLV320AIC3X_SPI is not set # CONFIG_SND_SOC_TPA6130A2 is not set # CONFIG_SND_SOC_TS3A227E is not set # CONFIG_SND_SOC_TSCS42XX is not set @@ -5940,7 +5914,6 @@ CONFIG_SND_USB=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VIA82XX is not set # CONFIG_SND_VIA82XX_MODEM is not set -# CONFIG_SND_VIRTIO is not set # CONFIG_SND_VIRTUOSO is not set # CONFIG_SND_VX222 is not set # CONFIG_SND_VXPOCKET is not set @@ -6032,8 +6005,6 @@ CONFIG_SND_X86=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_SPMI is not set # CONFIG_SPS30 is not set -# CONFIG_SPS30_I2C is not set -# CONFIG_SPS30_SERIAL is not set CONFIG_SQUASHFS=y # CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set # CONFIG_SQUASHFS_DECOMP_MULTI is not set @@ -6064,6 +6035,7 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_STACKPROTECTOR is not set # CONFIG_STACKPROTECTOR_STRONG is not set # CONFIG_STACKTRACE is not set +# CONFIG_STACKTRACE_BUILD_ID is not set CONFIG_STACKTRACE_SUPPORT=y # CONFIG_STACK_TRACER is not set # CONFIG_STACK_VALIDATION is not set @@ -6107,7 +6079,6 @@ CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES=y # CONFIG_SUNXI_SRAM is not set # CONFIG_SUN_PARTITION is not set # CONFIG_SURFACE_3_BUTTON is not set -# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_SUSPEND is not set # CONFIG_SUSPEND_SKIP_SYNC is not set CONFIG_SWAP=y @@ -6149,7 +6120,6 @@ CONFIG_SYSVIPC_SYSCTL=y # CONFIG_TARGET_CORE is not set # CONFIG_TASKSTATS is not set # CONFIG_TASKS_RCU is not set -CONFIG_TASKS_TRACE_RCU_READ_MB=y # CONFIG_TASK_XACCT is not set # CONFIG_TC35815 is not set # CONFIG_TCG_ATMEL is not set @@ -6160,10 +6130,10 @@ CONFIG_TASKS_TRACE_RCU_READ_MB=y # CONFIG_TCG_ST33_I2C is not set # CONFIG_TCG_TIS is not set # CONFIG_TCG_TIS_I2C_ATMEL is not set -# CONFIG_TCG_TIS_I2C_CR50 is not set # CONFIG_TCG_TIS_I2C_INFINEON is not set # CONFIG_TCG_TIS_I2C_NUVOTON is not set # CONFIG_TCG_TIS_SPI is not set +# CONFIG_TCG_TIS_I2C_CR50 is not set # CONFIG_TCG_TIS_ST33ZP24_I2C is not set # CONFIG_TCG_TIS_ST33ZP24_SPI is not set # CONFIG_TCG_TPM is not set @@ -6201,6 +6171,7 @@ CONFIG_TCP_CONG_CUBIC=y # CONFIG_TEST_BITOPS is not set # CONFIG_TEST_BLACKHOLE_DEV is not set # CONFIG_TEST_BPF is not set +# CONFIG_TEST_DIV64 is not set # CONFIG_TEST_FIRMWARE is not set # CONFIG_TEST_FREE_PAGES is not set # CONFIG_TEST_HASH is not set @@ -6218,6 +6189,7 @@ CONFIG_TCP_CONG_CUBIC=y # CONFIG_TEST_POWER is not set # CONFIG_TEST_PRINTF is not set # CONFIG_TEST_RHASHTABLE is not set +# CONFIG_TEST_SCANF is not set # CONFIG_TEST_SORT is not set # CONFIG_TEST_STACKINIT is not set # CONFIG_TEST_STATIC_KEYS is not set @@ -6283,7 +6255,6 @@ CONFIG_TINY_RCU=y # CONFIG_TI_ADC161S626 is not set # CONFIG_TI_ADS1015 is not set # CONFIG_TI_ADS124S08 is not set -# CONFIG_TI_ADS131E08 is not set # CONFIG_TI_ADS7950 is not set # CONFIG_TI_ADS8344 is not set # CONFIG_TI_ADS8688 is not set @@ -6302,13 +6273,11 @@ CONFIG_TINY_RCU=y # CONFIG_TI_ST is not set # CONFIG_TI_SYSCON_RESET is not set # CONFIG_TI_TLC4541 is not set -# CONFIG_TI_TSC2046 is not set # CONFIG_TLAN is not set # CONFIG_TLS is not set # CONFIG_TMD_HERMES is not set # CONFIG_TMP006 is not set # CONFIG_TMP007 is not set -# CONFIG_TMP117 is not set CONFIG_TMPFS=y # CONFIG_TMPFS_INODE64 is not set # CONFIG_TMPFS_POSIX_ACL is not set @@ -6359,9 +6328,7 @@ CONFIG_TMPFS_XATTR=y # CONFIG_TOUCHSCREEN_HP600 is not set # CONFIG_TOUCHSCREEN_HP7XX is not set # CONFIG_TOUCHSCREEN_HTCPEN is not set -# CONFIG_TOUCHSCREEN_HYCON_HY46XX is not set # CONFIG_TOUCHSCREEN_ILI210X is not set -# CONFIG_TOUCHSCREEN_ILITEK is not set # CONFIG_TOUCHSCREEN_IMX6UL_TSC is not set # CONFIG_TOUCHSCREEN_INEXIO is not set # CONFIG_TOUCHSCREEN_IPAQ_MICRO is not set @@ -6375,7 +6342,6 @@ CONFIG_TMPFS_XATTR=y # CONFIG_TOUCHSCREEN_MIGOR is not set # CONFIG_TOUCHSCREEN_MK712 is not set # CONFIG_TOUCHSCREEN_MMS114 is not set -# CONFIG_TOUCHSCREEN_MSG2638 is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set # CONFIG_TOUCHSCREEN_MX25 is not set # CONFIG_TOUCHSCREEN_MXS_LRADC is not set @@ -6466,7 +6432,6 @@ CONFIG_TRAD_SIGNALS=y # CONFIG_TRUSTED_FOUNDATIONS is not set # CONFIG_TRUSTED_KEYS is not set # CONFIG_TSL2583 is not set -# CONFIG_TSL2591 is not set # CONFIG_TSL2772 is not set # CONFIG_TSL2x7x is not set # CONFIG_TSL4531 is not set @@ -6776,7 +6741,6 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y # CONFIG_USB_ROLE_SWITCH is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_RTL8152 is not set -# CONFIG_USB_RTL8153_ECM is not set # CONFIG_USB_S2255 is not set # CONFIG_USB_SERIAL is not set # CONFIG_USB_SERIAL_AIRCABLE is not set @@ -6844,7 +6808,6 @@ CONFIG_USB_SERIAL_SAFE_PADDED=y # CONFIG_USB_SERIAL_WHITEHEAT is not set # CONFIG_USB_SERIAL_WISHBONE is not set # CONFIG_USB_SERIAL_XIRCOM is not set -# CONFIG_USB_SERIAL_XR is not set # CONFIG_USB_SERIAL_XSENS_MT is not set # CONFIG_USB_SEVSEG is not set # CONFIG_USB_SIERRA_NET is not set @@ -6954,7 +6917,6 @@ CONFIG_VHOST_MENU=y # CONFIG_VIDEO_BT866 is not set # CONFIG_VIDEO_CADENCE is not set # CONFIG_VIDEO_CAFE_CCIC is not set -# CONFIG_VIDEO_CCS is not set # CONFIG_VIDEO_CS3308 is not set # CONFIG_VIDEO_CS5345 is not set # CONFIG_VIDEO_CS53L32A is not set @@ -6984,8 +6946,9 @@ CONFIG_VHOST_MENU=y # CONFIG_VIDEO_IMX274 is not set # CONFIG_VIDEO_IMX290 is not set # CONFIG_VIDEO_IMX319 is not set -# CONFIG_VIDEO_IMX334 is not set # CONFIG_VIDEO_IMX355 is not set +# CONFIG_VIDEO_IMX477 is not set +# CONFIG_VIDEO_IRS1125 is not set # CONFIG_VIDEO_IR_I2C is not set # CONFIG_VIDEO_IVTV is not set # CONFIG_VIDEO_KS0127 is not set @@ -7009,7 +6972,6 @@ CONFIG_VHOST_MENU=y # CONFIG_VIDEO_MXB is not set # CONFIG_VIDEO_NOON010PC30 is not set # CONFIG_VIDEO_OMAP2_VOUT is not set -# CONFIG_VIDEO_OV02A10 is not set # CONFIG_VIDEO_OV13858 is not set # CONFIG_VIDEO_OV2640 is not set # CONFIG_VIDEO_OV2659 is not set @@ -7019,7 +6981,6 @@ CONFIG_VHOST_MENU=y # CONFIG_VIDEO_OV5640 is not set # CONFIG_VIDEO_OV5645 is not set # CONFIG_VIDEO_OV5647 is not set -# CONFIG_VIDEO_OV5648 is not set # CONFIG_VIDEO_OV5670 is not set # CONFIG_VIDEO_OV5675 is not set # CONFIG_VIDEO_OV5695 is not set @@ -7030,14 +6991,11 @@ CONFIG_VHOST_MENU=y # CONFIG_VIDEO_OV772X is not set # CONFIG_VIDEO_OV7740 is not set # CONFIG_VIDEO_OV8856 is not set -# CONFIG_VIDEO_OV8865 is not set # CONFIG_VIDEO_OV9281 is not set # CONFIG_VIDEO_OV9640 is not set # CONFIG_VIDEO_OV9650 is not set -# CONFIG_VIDEO_OV9734 is not set # CONFIG_VIDEO_PVRUSB2 is not set # CONFIG_VIDEO_RDACM20 is not set -# CONFIG_VIDEO_RDACM21 is not set # CONFIG_VIDEO_RJ54N1 is not set # CONFIG_VIDEO_S5C73M3 is not set # CONFIG_VIDEO_S5K4ECGX is not set @@ -7110,6 +7068,7 @@ CONFIG_VLAN_8021Q=y # CONFIG_VLAN_8021Q_GVRP is not set # CONFIG_VLAN_8021Q_MVRP is not set # CONFIG_VME_BUS is not set +# CONFIG_VMLINUX_MAP is not set # CONFIG_VMSPLIT_1G is not set # CONFIG_VMSPLIT_2G is not set # CONFIG_VMSPLIT_2G_OPT is not set @@ -7172,6 +7131,7 @@ CONFIG_WATCHDOG_OPEN_TIMEOUT=0 # CONFIG_WD80x3 is not set # CONFIG_WDAT_WDT is not set # CONFIG_WDTPCI is not set +# CONFIG_WERROR is not set CONFIG_WEXT_CORE=y CONFIG_WEXT_PRIV=y CONFIG_WEXT_PROC=y @@ -7181,7 +7141,6 @@ CONFIG_WILINK_PLATFORM_DATA=y # CONFIG_WIREGUARD is not set CONFIG_WIRELESS=y CONFIG_WIRELESS_EXT=y -# CONFIG_WIRELESS_HOTKEY is not set # CONFIG_WIRELESS_WDS is not set # CONFIG_WIZNET_W5100 is not set # CONFIG_WIZNET_W5300 is not set @@ -7219,7 +7178,6 @@ CONFIG_X86_SYSFB=y # CONFIG_XEN is not set # CONFIG_XEN_GRANT_DMA_ALLOC is not set # CONFIG_XEN_PVCALLS_FRONTEND is not set -CONFIG_XEN_PVHVM_GUEST=y CONFIG_XEN_SCRUB_PAGES_DEFAULT=y CONFIG_XFRM=y # CONFIG_XFRM_INTERFACE is not set @@ -7262,7 +7220,6 @@ CONFIG_XZ_DEC=y # CONFIG_XZ_DEC_TEST is not set # CONFIG_XZ_DEC_X86 is not set # CONFIG_YAM is not set -# CONFIG_YAMAHA_YAS530 is not set # CONFIG_YELLOWFIN is not set # CONFIG_YENTA is not set # CONFIG_YENTA_O2 is not set @@ -7273,6 +7230,7 @@ CONFIG_XZ_DEC=y # CONFIG_ZD1211RW is not set # CONFIG_ZD1211RW_DEBUG is not set # CONFIG_ZEROPLUS_FF is not set +# CONFIG_ZERO_CALL_USED_REGS is not set # CONFIG_ZIIRAVE_WATCHDOG is not set # CONFIG_ZISOFS is not set # CONFIG_ZLIB_DEFLATE is not set @@ -7282,10 +7240,9 @@ CONFIG_ZONE_DMA=y # CONFIG_ZPA2326 is not set # CONFIG_ZPOOL is not set # CONFIG_ZRAM is not set -ZRAM_DEF_COMP_LZORLE=y -# CONFIG_ZRAM_DEF_COMP_ZSTD is not set -# CONFIG_ZRAM_DEF_COMP_LZ4 is not set -# CONFIG_ZRAM_DEF_COMP_LZO is not set # CONFIG_ZRAM_MEMORY_TRACKING is not set +# CONFIG_ZRAM_DEF_COMP_LZORLE is not set +# CONFIG_ZRAM_DEF_COMP_ZSTD is not set +# CONFIG_ZRAM_DEF_COMP_LZO is not set # CONFIG_ZSMALLOC is not set # CONFIG_ZX_TDM is not set diff --git a/root/target/linux/generic/hack-5.15/204-module_strip.patch b/root/target/linux/generic/hack-5.15/204-module_strip.patch index 9b25707f..0968d6a1 100755 --- a/root/target/linux/generic/hack-5.15/204-module_strip.patch +++ b/root/target/linux/generic/hack-5.15/204-module_strip.patch @@ -88,7 +88,7 @@ Signed-off-by: Felix Fietkau --- a/init/Kconfig +++ b/init/Kconfig -@@ -2324,6 +2324,13 @@ config UNUSED_KSYMS_WHITELIST +@@ -2347,6 +2347,13 @@ config UNUSED_KSYMS_WHITELIST one per line. The path can be absolute, or relative to the kernel source tree. @@ -104,7 +104,23 @@ Signed-off-by: Felix Fietkau config MODULES_TREE_LOOKUP --- a/kernel/module.c +++ b/kernel/module.c -@@ -3227,9 +3227,11 @@ static int setup_load_info(struct load_i +@@ -1218,6 +1218,7 @@ static struct module_attribute *modinfo_ + + static const char vermagic[] = VERMAGIC_STRING; + ++#if defined(CONFIG_MODVERSIONS) || !defined(CONFIG_MODULE_STRIPPED) + static int try_to_force_load(struct module *mod, const char *reason) + { + #ifdef CONFIG_MODULE_FORCE_LOAD +@@ -1229,6 +1230,7 @@ static int try_to_force_load(struct modu + return -ENOEXEC; + #endif + } ++#endif + + #ifdef CONFIG_MODVERSIONS + +@@ -3227,9 +3229,11 @@ static int setup_load_info(struct load_i static int check_modinfo(struct module *mod, struct load_info *info, int flags) { @@ -117,7 +133,7 @@ Signed-off-by: Felix Fietkau if (flags & MODULE_INIT_IGNORE_VERMAGIC) modmagic = NULL; -@@ -3250,6 +3252,7 @@ static int check_modinfo(struct module * +@@ -3250,6 +3254,7 @@ static int check_modinfo(struct module * mod->name); add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); } @@ -127,7 +143,7 @@ Signed-off-by: Felix Fietkau --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c -@@ -2024,7 +2024,9 @@ static void read_symbols(const char *mod +@@ -2033,7 +2033,9 @@ static void read_symbols(const char *mod symname = remove_dot(info.strtab + sym->st_name); handle_symbol(mod, &info, sym, symname); @@ -137,7 +153,7 @@ Signed-off-by: Felix Fietkau } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { -@@ -2203,8 +2205,10 @@ static void add_header(struct buffer *b, +@@ -2212,8 +2214,10 @@ static void add_header(struct buffer *b, buf_printf(b, "BUILD_SALT;\n"); buf_printf(b, "BUILD_LTO_INFO;\n"); buf_printf(b, "\n"); @@ -148,7 +164,7 @@ Signed-off-by: Felix Fietkau buf_printf(b, "\n"); buf_printf(b, "__visible struct module __this_module\n"); buf_printf(b, "__section(\".gnu.linkonce.this_module\") = {\n"); -@@ -2221,8 +2225,10 @@ static void add_header(struct buffer *b, +@@ -2230,8 +2234,10 @@ static void add_header(struct buffer *b, static void add_intree_flag(struct buffer *b, int is_intree) { @@ -159,7 +175,7 @@ Signed-off-by: Felix Fietkau } /* Cannot check for assembler */ -@@ -2235,8 +2241,10 @@ static void add_retpoline(struct buffer +@@ -2244,8 +2250,10 @@ static void add_retpoline(struct buffer static void add_staging_flag(struct buffer *b, const char *name) { @@ -170,7 +186,7 @@ Signed-off-by: Felix Fietkau } /** -@@ -2316,11 +2324,13 @@ static void add_depends(struct buffer *b +@@ -2325,11 +2333,13 @@ static void add_depends(struct buffer *b static void add_srcversion(struct buffer *b, struct module *mod) { @@ -184,7 +200,7 @@ Signed-off-by: Felix Fietkau } static void write_buf(struct buffer *b, const char *fname) -@@ -2569,7 +2579,9 @@ int main(int argc, char **argv) +@@ -2578,7 +2588,9 @@ int main(int argc, char **argv) add_staging_flag(&buf, mod->name); add_versions(&buf, mod); add_depends(&buf, mod); diff --git a/root/target/linux/generic/hack-5.15/205-kconfig-exit.patch b/root/target/linux/generic/hack-5.15/205-kconfig-exit.patch new file mode 100755 index 00000000..e61c3ffd --- /dev/null +++ b/root/target/linux/generic/hack-5.15/205-kconfig-exit.patch @@ -0,0 +1,11 @@ +--- a/scripts/kconfig/conf.c ++++ b/scripts/kconfig/conf.c +@@ -435,6 +435,8 @@ static int conf_sym(struct menu *menu) + break; + continue; + case 0: ++ if (!sym_has_value(sym) && !tty_stdio && getenv("FAIL_ON_UNCONFIGURED")) ++ exit(1); + newval = oldval; + break; + case '?': diff --git a/root/target/linux/generic/hack-5.15/210-darwin_scripts_include.patch b/root/target/linux/generic/hack-5.15/210-darwin_scripts_include.patch index d68e2f88..be6adc0d 100755 --- a/root/target/linux/generic/hack-5.15/210-darwin_scripts_include.patch +++ b/root/target/linux/generic/hack-5.15/210-darwin_scripts_include.patch @@ -3039,7 +3039,7 @@ Signed-off-by: Florian Fainelli main(int argc, char **argv) --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h -@@ -9,7 +9,11 @@ +@@ -8,7 +8,11 @@ #include #include #include diff --git a/root/target/linux/generic/hack-5.15/212-tools_portability.patch b/root/target/linux/generic/hack-5.15/212-tools_portability.patch index ffbb7d14..b488155f 100755 --- a/root/target/linux/generic/hack-5.15/212-tools_portability.patch +++ b/root/target/linux/generic/hack-5.15/212-tools_portability.patch @@ -68,14 +68,10 @@ Signed-off-by: Felix Fietkau +#endif --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h -@@ -6,12 +6,13 @@ - #include - #include - --#ifndef __SANE_USERSPACE_TYPES__ +@@ -10,8 +10,12 @@ #define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ --#endif -- + #endif + +#ifndef __linux__ +#include +#else diff --git a/root/target/linux/generic/hack-5.15/220-arm-gc_sections.patch b/root/target/linux/generic/hack-5.15/220-arm-gc_sections.patch new file mode 100755 index 00000000..305556be --- /dev/null +++ b/root/target/linux/generic/hack-5.15/220-arm-gc_sections.patch @@ -0,0 +1,122 @@ +From e3d8676f5722b7622685581e06e8f53e6138e3ab Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 15 Jul 2017 23:42:36 +0200 +Subject: use -ffunction-sections, -fdata-sections and --gc-sections + +In combination with kernel symbol export stripping this significantly reduces +the kernel image size. Used on both ARM and MIPS architectures. + +Signed-off-by: Felix Fietkau +Signed-off-by: Jonas Gorski +Signed-off-by: Gabor Juhos +--- +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -117,6 +117,7 @@ config ARM + select HAVE_UID16 + select HAVE_VIRT_CPU_ACCOUNTING_GEN + select IRQ_FORCED_THREADING ++ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION + select MODULES_USE_ELF_REL + select NEED_DMA_MAP_STATE + select OF_EARLY_FLATTREE if OF +--- a/arch/arm/boot/compressed/Makefile ++++ b/arch/arm/boot/compressed/Makefile +@@ -92,6 +92,7 @@ endif + ifeq ($(CONFIG_USE_OF),y) + OBJS += $(libfdt_objs) fdt_check_mem_start.o + endif ++KBUILD_CFLAGS_KERNEL := $(patsubst -f%-sections,,$(KBUILD_CFLAGS_KERNEL)) + + # -fstack-protector-strong triggers protection checks in this code, + # but it is being used too early to link to meaningful stack_chk logic. +--- a/arch/arm/kernel/vmlinux.lds.S ++++ b/arch/arm/kernel/vmlinux.lds.S +@@ -75,7 +75,7 @@ SECTIONS + . = ALIGN(4); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; +- ARM_MMU_KEEP(*(__ex_table)) ++ KEEP(*(__ex_table)) + __stop___ex_table = .; + } + +@@ -100,24 +100,24 @@ SECTIONS + } + .init.arch.info : { + __arch_info_begin = .; +- *(.arch.info.init) ++ KEEP(*(.arch.info.init)) + __arch_info_end = .; + } + .init.tagtable : { + __tagtable_begin = .; +- *(.taglist.init) ++ KEEP(*(.taglist.init)) + __tagtable_end = .; + } + #ifdef CONFIG_SMP_ON_UP + .init.smpalt : { + __smpalt_begin = .; +- *(.alt.smp.init) ++ KEEP(*(.alt.smp.init)) + __smpalt_end = .; + } + #endif + .init.pv_table : { + __pv_table_begin = .; +- *(.pv_table) ++ KEEP(*(.pv_table)) + __pv_table_end = .; + } + +--- a/arch/arm/include/asm/vmlinux.lds.h ++++ b/arch/arm/include/asm/vmlinux.lds.h +@@ -29,13 +29,13 @@ + #define PROC_INFO \ + . = ALIGN(4); \ + __proc_info_begin = .; \ +- *(.proc.info.init) \ ++ KEEP(*(.proc.info.init)) \ + __proc_info_end = .; + + #define IDMAP_TEXT \ + ALIGN_FUNCTION(); \ + __idmap_text_start = .; \ +- *(.idmap.text) \ ++ KEEP(*(.idmap.text)) \ + __idmap_text_end = .; \ + + #define ARM_DISCARD \ +@@ -96,12 +96,12 @@ + . = ALIGN(8); \ + .ARM.unwind_idx : { \ + __start_unwind_idx = .; \ +- *(.ARM.exidx*) \ ++ KEEP(*(.ARM.exidx*)) \ + __stop_unwind_idx = .; \ + } \ + .ARM.unwind_tab : { \ + __start_unwind_tab = .; \ +- *(.ARM.extab*) \ ++ KEEP(*(.ARM.extab*)) \ + __stop_unwind_tab = .; \ + } + +@@ -112,14 +112,14 @@ + #define ARM_VECTORS \ + __vectors_start = .; \ + .vectors 0xffff0000 : AT(__vectors_start) { \ +- *(.vectors) \ ++ KEEP(*(.vectors)) \ + } \ + . = __vectors_start + SIZEOF(.vectors); \ + __vectors_end = .; \ + \ + __stubs_start = .; \ + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \ +- *(.stubs) \ ++ KEEP(*(.stubs)) \ + } \ + . = __stubs_start + SIZEOF(.stubs); \ + __stubs_end = .; \ diff --git a/root/target/linux/generic/hack-5.15/221-module_exports.patch b/root/target/linux/generic/hack-5.15/221-module_exports.patch index 0153d3a5..65cee1a5 100755 --- a/root/target/linux/generic/hack-5.15/221-module_exports.patch +++ b/root/target/linux/generic/hack-5.15/221-module_exports.patch @@ -30,7 +30,7 @@ Signed-off-by: Felix Fietkau /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) -@@ -486,14 +496,14 @@ +@@ -484,14 +494,14 @@ /* Kernel symbol table: Normal symbols */ \ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ __start___ksymtab = .; \ @@ -47,7 +47,7 @@ Signed-off-by: Felix Fietkau __stop___ksymtab_gpl = .; \ } \ \ -@@ -513,7 +523,7 @@ +@@ -511,7 +521,7 @@ \ /* Kernel symbol table: strings */ \ __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \ @@ -56,7 +56,7 @@ Signed-off-by: Felix Fietkau } \ \ /* __*init sections */ \ -@@ -1009,6 +1019,8 @@ +@@ -1018,6 +1028,8 @@ #define COMMON_DISCARDS \ SANITIZER_DISCARDS \ @@ -91,7 +91,7 @@ Signed-off-by: Felix Fietkau "__kstrtabns_" #sym ": \n" \ --- a/scripts/Makefile.build +++ b/scripts/Makefile.build -@@ -358,7 +358,7 @@ targets += $(real-dtb-y) $(lib-y) $(alwa +@@ -385,7 +385,7 @@ targets += $(real-dtb-y) $(lib-y) $(alwa # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- quiet_cmd_cpp_lds_S = LDS $@ diff --git a/root/target/linux/generic/hack-5.15/230-openwrt_lzma_options.patch b/root/target/linux/generic/hack-5.15/230-openwrt_lzma_options.patch index 6bc5d1de..8aa5b7c5 100755 --- a/root/target/linux/generic/hack-5.15/230-openwrt_lzma_options.patch +++ b/root/target/linux/generic/hack-5.15/230-openwrt_lzma_options.patch @@ -23,7 +23,7 @@ Signed-off-by: Imre Kaloz { {0x02, 0x21}, "lz4", unlz4 }, --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib -@@ -408,7 +408,7 @@ quiet_cmd_bzip2 = BZIP2 $@ +@@ -413,7 +413,7 @@ quiet_cmd_bzip2 = BZIP2 $@ # --------------------------------------------------------------------------- quiet_cmd_lzma = LZMA $@ diff --git a/root/target/linux/generic/hack-5.15/251-kconfig.patch b/root/target/linux/generic/hack-5.15/251-kconfig.patch index 004f18c0..ea830272 100755 --- a/root/target/linux/generic/hack-5.15/251-kconfig.patch +++ b/root/target/linux/generic/hack-5.15/251-kconfig.patch @@ -92,7 +92,7 @@ Signed-off-by: John Crispin bool --- a/lib/Kconfig +++ b/lib/Kconfig -@@ -433,16 +433,16 @@ config BCH_CONST_T +@@ -439,16 +439,16 @@ config BCH_CONST_T # Textsearch support is select'ed if needed # config TEXTSEARCH diff --git a/root/target/linux/generic/hack-5.15/252-SATA_PMP.patch b/root/target/linux/generic/hack-5.15/252-SATA_PMP.patch new file mode 100755 index 00000000..6502d1d6 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/252-SATA_PMP.patch @@ -0,0 +1,23 @@ +From 8c817e33be829c7249c2cfd59ff48ad5fac6a31d Mon Sep 17 00:00:00 2001 +From: Sungbo Eo +Date: Fri, 7 Jul 2017 17:09:21 +0200 +Subject: [PATCH] kconfig: solidify SATA_PMP config + +SATA_PMP option in kernel config file disappears for every kernel_oldconfig refresh. +To prevent this, SATA_HOST is now selected automatically when SATA_PMP is enabled. +This patch can be dropped if SATA_MV is ever re-added into the config. +--- + drivers/ata/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/ata/Kconfig ++++ b/drivers/ata/Kconfig +@@ -112,7 +112,7 @@ config SATA_ZPODD + + config SATA_PMP + bool "SATA Port Multiplier support" +- depends on SATA_HOST ++ select SATA_HOST + default y + help + This option adds support for SATA Port Multipliers diff --git a/root/target/linux/generic/hack-5.15/259-regmap_dynamic.patch b/root/target/linux/generic/hack-5.15/259-regmap_dynamic.patch new file mode 100755 index 00000000..d1d56a11 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/259-regmap_dynamic.patch @@ -0,0 +1,144 @@ +From 811d9e2268a62b830cfe93cd8bc929afcb8b198b Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 15 Jul 2017 21:12:38 +0200 +Subject: kernel: move regmap bloat out of the kernel image if it is only being used in modules + +lede-commit: 96f39119815028073583e4fca3a9c5fe9141e998 +Signed-off-by: Felix Fietkau +--- + drivers/base/regmap/Kconfig | 15 ++++++++++----- + drivers/base/regmap/Makefile | 12 ++++++++---- + drivers/base/regmap/regmap.c | 3 +++ + include/linux/regmap.h | 2 +- + 4 files changed, 22 insertions(+), 10 deletions(-) + +--- a/drivers/base/regmap/Kconfig ++++ b/drivers/base/regmap/Kconfig +@@ -4,10 +4,9 @@ + # subsystems should select the appropriate symbols. + + config REGMAP +- default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO) + select IRQ_DOMAIN if REGMAP_IRQ + select MDIO_BUS if REGMAP_MDIO +- bool ++ tristate + + config REGCACHE_COMPRESSED + select LZO_COMPRESS +@@ -15,53 +14,67 @@ config REGCACHE_COMPRESSED + bool + + config REGMAP_AC97 ++ select REGMAP + tristate + + config REGMAP_I2C ++ select REGMAP + tristate + depends on I2C + + config REGMAP_SLIMBUS ++ select REGMAP + tristate + depends on SLIMBUS + + config REGMAP_SPI ++ select REGMAP + tristate + depends on SPI + + config REGMAP_SPMI ++ select REGMAP + tristate + depends on SPMI + + config REGMAP_W1 ++ select REGMAP + tristate + depends on W1 + + config REGMAP_MDIO ++ select REGMAP + tristate + + config REGMAP_MMIO ++ select REGMAP + tristate + + config REGMAP_IRQ ++ select REGMAP + bool + + config REGMAP_SOUNDWIRE ++ select REGMAP + tristate + depends on SOUNDWIRE + + config REGMAP_SOUNDWIRE_MBQ ++ select REGMAP + tristate + depends on SOUNDWIRE + + config REGMAP_SCCB ++ select REGMAP + tristate + depends on I2C + + config REGMAP_I3C ++ select REGMAP + tristate + depends on I3C + + config REGMAP_SPI_AVMM ++ select REGMAP + tristate + depends on SPI +--- a/drivers/base/regmap/Makefile ++++ b/drivers/base/regmap/Makefile +@@ -2,10 +2,14 @@ + # For include/trace/define_trace.h to include trace.h + CFLAGS_regmap.o := -I$(src) + +-obj-$(CONFIG_REGMAP) += regmap.o regcache.o +-obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o +-obj-$(CONFIG_REGCACHE_COMPRESSED) += regcache-lzo.o +-obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o ++regmap-core-objs = regmap.o regcache.o regcache-rbtree.o regcache-flat.o ++ifdef CONFIG_DEBUG_FS ++regmap-core-objs += regmap-debugfs.o ++endif ++ifdef CONFIG_REGCACHE_COMPRESSED ++regmap-core-objs += regcache-lzo.o ++endif ++obj-$(CONFIG_REGMAP) += regmap-core.o + obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o + obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o + obj-$(CONFIG_REGMAP_SLIMBUS) += regmap-slimbus.o +--- a/drivers/base/regmap/regmap.c ++++ b/drivers/base/regmap/regmap.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -3339,3 +3340,5 @@ static int __init regmap_initcall(void) + return 0; + } + postcore_initcall(regmap_initcall); ++ ++MODULE_LICENSE("GPL"); +--- a/include/linux/regmap.h ++++ b/include/linux/regmap.h +@@ -180,7 +180,7 @@ struct reg_sequence { + __ret ?: __tmp; \ + }) + +-#ifdef CONFIG_REGMAP ++#if IS_REACHABLE(CONFIG_REGMAP) + + enum regmap_endian { + /* Unspecified -> 0 -> Backwards compatible default */ diff --git a/root/target/linux/generic/hack-5.15/301-mips_image_cmdline_hack.patch b/root/target/linux/generic/hack-5.15/301-mips_image_cmdline_hack.patch index 993b1e6f..15e233ac 100755 --- a/root/target/linux/generic/hack-5.15/301-mips_image_cmdline_hack.patch +++ b/root/target/linux/generic/hack-5.15/301-mips_image_cmdline_hack.patch @@ -10,7 +10,7 @@ Signed-off-by: Gabor Juhos --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig -@@ -1202,6 +1202,10 @@ config MIPS_MSC +@@ -1180,6 +1180,10 @@ config MIPS_MSC config SYNC_R4K bool diff --git a/root/target/linux/generic/hack-5.15/401-mtd-super-don-t-reply-on-mtdblock-device-minor.patch b/root/target/linux/generic/hack-5.15/401-mtd-super-don-t-reply-on-mtdblock-device-minor.patch new file mode 100755 index 00000000..8f985c0b --- /dev/null +++ b/root/target/linux/generic/hack-5.15/401-mtd-super-don-t-reply-on-mtdblock-device-minor.patch @@ -0,0 +1,84 @@ +From f9760b158f610b1792a222cc924073724c061bfb Mon Sep 17 00:00:00 2001 +From: Daniel Golle +Date: Wed, 7 Apr 2021 22:37:57 +0100 +Subject: [PATCH 1/2] mtd: super: don't reply on mtdblock device minor +To: linux-mtd@lists.infradead.org +Cc: Vignesh Raghavendra , + Richard Weinberger , + Miquel Raynal , + David Woodhouse + +For blktrans devices with partitions (ie. part_bits != 0) the +assumption that the minor number of the mtdblock device matches +the mtdnum doesn't hold true. +Properly resolve mtd device from blktrans layer instead. + +Signed-off-by: Daniel Golle +--- + drivers/mtd/mtdsuper.c | 33 ++++++++++++++++++++++++++------- + 1 file changed, 26 insertions(+), 7 deletions(-) + +--- a/drivers/mtd/mtdsuper.c ++++ b/drivers/mtd/mtdsuper.c +@@ -9,6 +9,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -120,8 +121,9 @@ int get_tree_mtd(struct fs_context *fc, + struct fs_context *fc)) + { + #ifdef CONFIG_BLOCK +- dev_t dev; +- int ret; ++ struct mtd_blktrans_dev *blktrans_dev; ++ struct block_device *bdev; ++ int ret, part_bits; + #endif + int mtdnr; + +@@ -169,16 +171,36 @@ int get_tree_mtd(struct fs_context *fc, + /* try the old way - the hack where we allowed users to mount + * /dev/mtdblock$(n) but didn't actually _use_ the blockdev + */ +- ret = lookup_bdev(fc->source, &dev); +- if (ret) { ++ bdev = blkdev_get_by_path(fc->source, FMODE_READ, NULL); ++ if (IS_ERR(bdev)) { ++ ret = PTR_ERR(bdev); + errorf(fc, "MTD: Couldn't look up '%s': %d", fc->source, ret); + return ret; + } +- pr_debug("MTDSB: lookup_bdev() returned 0\n"); ++ pr_debug("MTDSB: blkdev_get_by_path() returned 0\n"); + +- if (MAJOR(dev) == MTD_BLOCK_MAJOR) +- return mtd_get_sb_by_nr(fc, MINOR(dev), fill_super); ++ if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) { ++ if (!bdev->bd_disk) { ++ blkdev_put(bdev, FMODE_READ); ++ BUG(); ++ return -EINVAL; ++ } + ++ blktrans_dev = (struct mtd_blktrans_dev *)(bdev->bd_disk->private_data); ++ if (!blktrans_dev || !blktrans_dev->tr) { ++ blkdev_put(bdev, FMODE_READ); ++ BUG(); ++ return -EINVAL; ++ } ++ mtdnr = blktrans_dev->devnum; ++ part_bits = blktrans_dev->tr->part_bits; ++ blkdev_put(bdev, FMODE_READ); ++ if (MINOR(bdev->bd_dev) != (mtdnr << part_bits)) ++ return -EINVAL; ++ ++ return mtd_get_sb_by_nr(fc, mtdnr, fill_super); ++ } ++ blkdev_put(bdev, FMODE_READ); + #endif /* CONFIG_BLOCK */ + + if (!(fc->sb_flags & SB_SILENT)) diff --git a/root/target/linux/generic/hack-5.15/402-mtd-blktrans-call-add-disks-after-mtd-device.patch b/root/target/linux/generic/hack-5.15/402-mtd-blktrans-call-add-disks-after-mtd-device.patch new file mode 100755 index 00000000..c9821b57 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/402-mtd-blktrans-call-add-disks-after-mtd-device.patch @@ -0,0 +1,98 @@ +From 0bccc3722bdd88e8ae995e77ef9f7b77ee4cbdee Mon Sep 17 00:00:00 2001 +From: Daniel Golle +Date: Wed, 7 Apr 2021 22:45:54 +0100 +Subject: [PATCH 2/2] mtd: blktrans: call add disks after mtd device +To: linux-mtd@lists.infradead.org +Cc: Vignesh Raghavendra , + Richard Weinberger , + Miquel Raynal , + David Woodhouse + +Calling device_add_disk while holding mtd_table_mutex leads +to deadlock in case part_bits!=0 as block partition parsers +will try to open the newly created disks, trying to acquire +mutex once again. +Move device_add_disk to additional function called after +add partitions of an MTD device have been added and locks +have been released. + +Signed-off-by: Daniel Golle +--- + drivers/mtd/mtd_blkdevs.c | 33 ++++++++++++++++++++++++++------- + drivers/mtd/mtdcore.c | 3 +++ + include/linux/mtd/blktrans.h | 1 + + 3 files changed, 30 insertions(+), 7 deletions(-) + +--- a/drivers/mtd/mtd_blkdevs.c ++++ b/drivers/mtd/mtd_blkdevs.c +@@ -384,13 +384,6 @@ int add_mtd_blktrans_dev(struct mtd_blkt + if (new->readonly) + set_disk_ro(gd, 1); + +- device_add_disk(&new->mtd->dev, gd, NULL); +- +- if (new->disk_attributes) { +- ret = sysfs_create_group(&disk_to_dev(gd)->kobj, +- new->disk_attributes); +- WARN_ON(ret); +- } + return 0; + + out_free_tag_set: +@@ -402,6 +395,27 @@ out_list_del: + return ret; + } + ++void register_mtd_blktrans_devs(void) ++{ ++ struct mtd_blktrans_ops *tr; ++ struct mtd_blktrans_dev *dev, *next; ++ int ret; ++ ++ list_for_each_entry(tr, &blktrans_majors, list) { ++ list_for_each_entry_safe(dev, next, &tr->devs, list) { ++ if (disk_live(dev->disk)) ++ continue; ++ ++ device_add_disk(&dev->mtd->dev, dev->disk, NULL); ++ if (dev->disk_attributes) { ++ ret = sysfs_create_group(&disk_to_dev(dev->disk)->kobj, ++ dev->disk_attributes); ++ WARN_ON(ret); ++ } ++ } ++ } ++} ++ + int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old) + { + unsigned long flags; +--- a/drivers/mtd/mtdcore.c ++++ b/drivers/mtd/mtdcore.c +@@ -31,6 +31,7 @@ + + #include + #include ++#include + + #include "mtdcore.h" + +@@ -1000,6 +1001,8 @@ int mtd_device_parse_register(struct mtd + + ret = mtd_otp_nvmem_add(mtd); + ++ register_mtd_blktrans_devs(); ++ + out: + if (ret && device_is_registered(&mtd->dev)) + del_mtd_device(mtd); +--- a/include/linux/mtd/blktrans.h ++++ b/include/linux/mtd/blktrans.h +@@ -76,6 +76,7 @@ extern int deregister_mtd_blktrans(struc + extern int add_mtd_blktrans_dev(struct mtd_blktrans_dev *dev); + extern int del_mtd_blktrans_dev(struct mtd_blktrans_dev *dev); + extern int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev); ++extern void register_mtd_blktrans_devs(void); + + /** + * module_mtd_blktrans() - Helper macro for registering a mtd blktrans driver diff --git a/root/target/linux/generic/hack-5.15/410-block-fit-partition-parser.patch b/root/target/linux/generic/hack-5.15/410-block-fit-partition-parser.patch new file mode 100755 index 00000000..2ac6cb03 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/410-block-fit-partition-parser.patch @@ -0,0 +1,220 @@ +--- a/block/blk.h ++++ b/block/blk.h +@@ -354,6 +354,7 @@ void blk_free_ext_minor(unsigned int min + #define ADDPART_FLAG_NONE 0 + #define ADDPART_FLAG_RAID 1 + #define ADDPART_FLAG_WHOLEDISK 2 ++#define ADDPART_FLAG_ROOTDEV 4 + int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, + sector_t length); + int bdev_del_partition(struct gendisk *disk, int partno); +--- a/block/partitions/Kconfig ++++ b/block/partitions/Kconfig +@@ -101,6 +101,13 @@ config ATARI_PARTITION + Say Y here if you would like to use hard disks under Linux which + were partitioned under the Atari OS. + ++config FIT_PARTITION ++ bool "Flattened-Image-Tree (FIT) partition support" if PARTITION_ADVANCED ++ default n ++ help ++ Say Y here if your system needs to mount the filesystem part of ++ a Flattened-Image-Tree (FIT) image commonly used with Das U-Boot. ++ + config IBM_PARTITION + bool "IBM disk label and partition support" + depends on PARTITION_ADVANCED && S390 +--- a/block/partitions/Makefile ++++ b/block/partitions/Makefile +@@ -8,6 +8,7 @@ obj-$(CONFIG_ACORN_PARTITION) += acorn.o + obj-$(CONFIG_AMIGA_PARTITION) += amiga.o + obj-$(CONFIG_ATARI_PARTITION) += atari.o + obj-$(CONFIG_AIX_PARTITION) += aix.o ++obj-$(CONFIG_FIT_PARTITION) += fit.o + obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o + obj-$(CONFIG_MAC_PARTITION) += mac.o + obj-$(CONFIG_LDM_PARTITION) += ldm.o +--- a/block/partitions/check.h ++++ b/block/partitions/check.h +@@ -58,6 +58,7 @@ int amiga_partition(struct parsed_partit + int atari_partition(struct parsed_partitions *state); + int cmdline_partition(struct parsed_partitions *state); + int efi_partition(struct parsed_partitions *state); ++int fit_partition(struct parsed_partitions *state); + int ibm_partition(struct parsed_partitions *); + int karma_partition(struct parsed_partitions *state); + int ldm_partition(struct parsed_partitions *state); +@@ -68,3 +69,5 @@ int sgi_partition(struct parsed_partitio + int sun_partition(struct parsed_partitions *state); + int sysv68_partition(struct parsed_partitions *state); + int ultrix_partition(struct parsed_partitions *state); ++ ++int parse_fit_partitions(struct parsed_partitions *state, u64 start_sector, u64 nr_sectors, int *slot, int add_remain); +--- a/block/partitions/core.c ++++ b/block/partitions/core.c +@@ -11,6 +11,10 @@ + #include + #include + #include ++#ifdef CONFIG_FIT_PARTITION ++#include ++#endif ++ + #include "check.h" + + static int (*check_part[])(struct parsed_partitions *) = { +@@ -47,6 +51,9 @@ static int (*check_part[])(struct parsed + #ifdef CONFIG_EFI_PARTITION + efi_partition, /* this must come before msdos */ + #endif ++#ifdef CONFIG_FIT_PARTITION ++ fit_partition, ++#endif + #ifdef CONFIG_SGI_PARTITION + sgi_partition, + #endif +@@ -597,6 +604,11 @@ static bool blk_add_partition(struct gen + (state->parts[p].flags & ADDPART_FLAG_RAID)) + md_autodetect_dev(part->bd_dev); + ++#ifdef CONFIG_FIT_PARTITION ++ if ((state->parts[p].flags & ADDPART_FLAG_ROOTDEV) && ROOT_DEV == 0) ++ ROOT_DEV = part_to_dev(part)->devt; ++#endif ++ + return true; + } + +--- a/drivers/mtd/ubi/block.c ++++ b/drivers/mtd/ubi/block.c +@@ -419,7 +419,11 @@ int ubiblock_create(struct ubi_volume_in + + gd->fops = &ubiblock_ops; + gd->major = ubiblock_major; ++#ifdef CONFIG_FIT_PARTITION ++ gd->minors = 0; ++#else + gd->minors = 1; ++#endif + gd->first_minor = idr_alloc(&ubiblock_minor_idr, dev, 0, 0, GFP_KERNEL); + if (gd->first_minor < 0) { + dev_err(disk_to_dev(gd), +@@ -428,6 +432,9 @@ int ubiblock_create(struct ubi_volume_in + goto out_cleanup_disk; + } + gd->private_data = dev; ++#ifdef CONFIG_FIT_PARTITION ++ gd->flags |= GENHD_FL_EXT_DEVT; ++#endif + sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id); + set_capacity(gd, disk_capacity); + dev->gd = gd; +--- a/block/partitions/efi.c ++++ b/block/partitions/efi.c +@@ -716,6 +716,9 @@ int efi_partition(struct parsed_partitio + gpt_entry *ptes = NULL; + u32 i; + unsigned ssz = queue_logical_block_size(state->disk->queue) / 512; ++#ifdef CONFIG_FIT_PARTITION ++ u32 extra_slot = 64; ++#endif + + if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { + kfree(gpt); +@@ -749,6 +752,11 @@ int efi_partition(struct parsed_partitio + ARRAY_SIZE(ptes[i].partition_name)); + utf16_le_to_7bit(ptes[i].partition_name, label_max, info->volname); + state->parts[i + 1].has_info = true; ++#ifdef CONFIG_FIT_PARTITION ++ /* If this is a U-Boot FIT volume it may have subpartitions */ ++ if (!efi_guidcmp(ptes[i].partition_type_guid, PARTITION_LINUX_FIT_GUID)) ++ (void) parse_fit_partitions(state, start * ssz, size * ssz, &extra_slot, 1); ++#endif + } + kfree(ptes); + kfree(gpt); +--- a/block/partitions/efi.h ++++ b/block/partitions/efi.h +@@ -52,6 +52,9 @@ + #define PARTITION_LINUX_LVM_GUID \ + EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \ + 0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28) ++#define PARTITION_LINUX_FIT_GUID \ ++ EFI_GUID( 0xcae9be83, 0xb15f, 0x49cc, \ ++ 0x86, 0x3f, 0x08, 0x1b, 0x74, 0x4a, 0x2d, 0x93) + + typedef struct _gpt_header { + __le64 signature; +--- a/drivers/mtd/mtdblock.c ++++ b/drivers/mtd/mtdblock.c +@@ -338,7 +338,11 @@ static void mtdblock_remove_dev(struct m + static struct mtd_blktrans_ops mtdblock_tr = { + .name = "mtdblock", + .major = MTD_BLOCK_MAJOR, ++#ifdef CONFIG_FIT_PARTITION ++ .part_bits = 1, ++#else + .part_bits = 0, ++#endif + .blksize = 512, + .open = mtdblock_open, + .flush = mtdblock_flush, +--- a/drivers/mtd/mtd_blkdevs.c ++++ b/drivers/mtd/mtd_blkdevs.c +@@ -346,18 +346,8 @@ int add_mtd_blktrans_dev(struct mtd_blkt + gd->minors = 1 << tr->part_bits; + gd->fops = &mtd_block_ops; + +- if (tr->part_bits) +- if (new->devnum < 26) +- snprintf(gd->disk_name, sizeof(gd->disk_name), +- "%s%c", tr->name, 'a' + new->devnum); +- else +- snprintf(gd->disk_name, sizeof(gd->disk_name), +- "%s%c%c", tr->name, +- 'a' - 1 + new->devnum / 26, +- 'a' + new->devnum % 26); +- else +- snprintf(gd->disk_name, sizeof(gd->disk_name), +- "%s%d", tr->name, new->devnum); ++ snprintf(gd->disk_name, sizeof(gd->disk_name), ++ "%s%d", tr->name, new->devnum); + + set_capacity(gd, ((u64)new->size * tr->blksize) >> 9); + +--- a/block/partitions/msdos.c ++++ b/block/partitions/msdos.c +@@ -564,6 +564,15 @@ static void parse_minix(struct parsed_pa + #endif /* CONFIG_MINIX_SUBPARTITION */ + } + ++static void parse_fit_mbr(struct parsed_partitions *state, ++ sector_t offset, sector_t size, int origin) ++{ ++#ifdef CONFIG_FIT_PARTITION ++ u32 extra_slot = 64; ++ (void) parse_fit_partitions(state, offset, size, &extra_slot, 1); ++#endif /* CONFIG_FIT_PARTITION */ ++} ++ + static struct { + unsigned char id; + void (*parse)(struct parsed_partitions *, sector_t, sector_t, int); +@@ -575,6 +584,7 @@ static struct { + {UNIXWARE_PARTITION, parse_unixware}, + {SOLARIS_X86_PARTITION, parse_solaris_x86}, + {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86}, ++ {FIT_PARTITION, parse_fit_mbr}, + {0, NULL}, + }; + +--- a/include/linux/msdos_partition.h ++++ b/include/linux/msdos_partition.h +@@ -31,6 +31,7 @@ enum msdos_sys_ind { + LINUX_LVM_PARTITION = 0x8e, + LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ + ++ FIT_PARTITION = 0x2e, /* U-Boot uImage.FIT */ + SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */ + NEW_SOLARIS_X86_PARTITION = 0xbf, + diff --git a/root/target/linux/generic/hack-5.15/420-mtd-set-rootfs-to-be-root-dev.patch b/root/target/linux/generic/hack-5.15/420-mtd-set-rootfs-to-be-root-dev.patch index 91a91b36..aa1d4df0 100755 --- a/root/target/linux/generic/hack-5.15/420-mtd-set-rootfs-to-be-root-dev.patch +++ b/root/target/linux/generic/hack-5.15/420-mtd-set-rootfs-to-be-root-dev.patch @@ -20,7 +20,7 @@ Signed-off-by: Gabor Juhos #include #include -@@ -694,6 +695,19 @@ int add_mtd_device(struct mtd_info *mtd) +@@ -696,6 +697,19 @@ int add_mtd_device(struct mtd_info *mtd) of this try_ nonsense, and no bitching about it either. :) */ __module_get(THIS_MODULE); diff --git a/root/target/linux/generic/hack-5.15/640-bridge-only-accept-EAP-locally.patch b/root/target/linux/generic/hack-5.15/640-bridge-only-accept-EAP-locally.patch index 29a4f7f3..15c1e342 100755 --- a/root/target/linux/generic/hack-5.15/640-bridge-only-accept-EAP-locally.patch +++ b/root/target/linux/generic/hack-5.15/640-bridge-only-accept-EAP-locally.patch @@ -12,7 +12,7 @@ Signed-off-by: Etienne Champetier --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c -@@ -103,10 +103,14 @@ int br_handle_frame_finish(struct net *n +@@ -108,10 +108,14 @@ int br_handle_frame_finish(struct net *n } } @@ -30,7 +30,7 @@ Signed-off-by: Etienne Champetier if (IS_ENABLED(CONFIG_INET) && --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h -@@ -402,6 +402,8 @@ struct net_bridge { +@@ -468,6 +468,8 @@ struct net_bridge { u16 group_fwd_mask; u16 group_fwd_mask_required; @@ -39,3 +39,45 @@ Signed-off-by: Etienne Champetier /* STP */ bridge_id designated_root; bridge_id bridge_id; +--- a/net/bridge/br_sysfs_br.c ++++ b/net/bridge/br_sysfs_br.c +@@ -197,6 +197,31 @@ static ssize_t group_fwd_mask_store(stru + } + static DEVICE_ATTR_RW(group_fwd_mask); + ++static ssize_t disable_eap_hack_show(struct device *d, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct net_bridge *br = to_bridge(d); ++ return sprintf(buf, "%u\n", br->disable_eap_hack); ++} ++ ++static int set_disable_eap_hack(struct net_bridge *br, unsigned long val, ++ struct netlink_ext_ack *extack) ++{ ++ br->disable_eap_hack = !!val; ++ ++ return 0; ++} ++ ++static ssize_t disable_eap_hack_store(struct device *d, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t len) ++{ ++ return store_bridge_parm(d, buf, len, set_disable_eap_hack); ++} ++static DEVICE_ATTR_RW(disable_eap_hack); ++ + static ssize_t priority_show(struct device *d, struct device_attribute *attr, + char *buf) + { +@@ -937,6 +962,7 @@ static struct attribute *bridge_attrs[] + &dev_attr_ageing_time.attr, + &dev_attr_stp_state.attr, + &dev_attr_group_fwd_mask.attr, ++ &dev_attr_disable_eap_hack.attr, + &dev_attr_priority.attr, + &dev_attr_bridge_id.attr, + &dev_attr_root_id.attr, diff --git a/root/target/linux/generic/hack-5.15/650-netfilter-add-xt_FLOWOFFLOAD-target.patch b/root/target/linux/generic/hack-5.15/650-netfilter-add-xt_FLOWOFFLOAD-target.patch index c303114c..b48f981f 100755 --- a/root/target/linux/generic/hack-5.15/650-netfilter-add-xt_FLOWOFFLOAD-target.patch +++ b/root/target/linux/generic/hack-5.15/650-netfilter-add-xt_FLOWOFFLOAD-target.patch @@ -98,7 +98,7 @@ Signed-off-by: Felix Fietkau obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o --- /dev/null +++ b/net/netfilter/xt_FLOWOFFLOAD.c -@@ -0,0 +1,658 @@ +@@ -0,0 +1,656 @@ +/* + * Copyright (C) 2018-2021 Felix Fietkau + * @@ -278,8 +278,6 @@ Signed-off-by: Felix Fietkau + hook->used = true; + } + spin_unlock_bh(&hooks_lock); -+ -+ cond_resched(); +} + +static void @@ -767,7 +765,7 @@ Signed-off-by: Felix Fietkau #include #include #include -@@ -407,8 +406,7 @@ flow_offload_lookup(struct nf_flowtable +@@ -397,8 +396,7 @@ flow_offload_lookup(struct nf_flowtable } EXPORT_SYMBOL_GPL(flow_offload_lookup); @@ -777,7 +775,7 @@ Signed-off-by: Felix Fietkau void (*iter)(struct flow_offload *flow, void *data), void *data) { -@@ -440,6 +438,7 @@ nf_flow_table_iterate(struct nf_flowtabl +@@ -430,6 +428,7 @@ nf_flow_table_iterate(struct nf_flowtabl return err; } diff --git a/root/target/linux/generic/hack-5.15/651-wireless_mesh_header.patch b/root/target/linux/generic/hack-5.15/651-wireless_mesh_header.patch index 0639ad4e..12a031ec 100755 --- a/root/target/linux/generic/hack-5.15/651-wireless_mesh_header.patch +++ b/root/target/linux/generic/hack-5.15/651-wireless_mesh_header.patch @@ -11,7 +11,7 @@ Signed-off-by: Imre Kaloz --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -144,8 +144,8 @@ static inline bool dev_xmit_complete(int +@@ -145,8 +145,8 @@ static inline bool dev_xmit_complete(int #if defined(CONFIG_HYPERV_NET) # define LL_MAX_HEADER 128 diff --git a/root/target/linux/generic/hack-5.15/661-use_fq_codel_by_default.patch b/root/target/linux/generic/hack-5.15/661-use_fq_codel_by_default.patch index c4168e2a..35dbe426 100755 --- a/root/target/linux/generic/hack-5.15/661-use_fq_codel_by_default.patch +++ b/root/target/linux/generic/hack-5.15/661-use_fq_codel_by_default.patch @@ -14,7 +14,7 @@ Signed-off-by: Felix Fietkau --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h -@@ -624,12 +624,13 @@ extern struct Qdisc_ops noop_qdisc_ops; +@@ -626,12 +626,13 @@ extern struct Qdisc_ops noop_qdisc_ops; extern struct Qdisc_ops pfifo_fast_ops; extern struct Qdisc_ops mq_qdisc_ops; extern struct Qdisc_ops noqueue_qdisc_ops; diff --git a/root/target/linux/generic/hack-5.15/710-net-dsa-mv88e6xxx-default-VID-1.patch b/root/target/linux/generic/hack-5.15/710-net-dsa-mv88e6xxx-default-VID-1.patch index 3c5d1b1d..d0cefbfb 100755 --- a/root/target/linux/generic/hack-5.15/710-net-dsa-mv88e6xxx-default-VID-1.patch +++ b/root/target/linux/generic/hack-5.15/710-net-dsa-mv88e6xxx-default-VID-1.patch @@ -1,6 +1,6 @@ --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c -@@ -2225,6 +2225,7 @@ static int mv88e6xxx_port_fdb_add(struct +@@ -2317,6 +2317,7 @@ static int mv88e6xxx_port_fdb_add(struct struct mv88e6xxx_chip *chip = ds->priv; int err; @@ -8,7 +8,7 @@ mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); -@@ -2239,6 +2240,7 @@ static int mv88e6xxx_port_fdb_del(struct +@@ -2331,6 +2332,7 @@ static int mv88e6xxx_port_fdb_del(struct struct mv88e6xxx_chip *chip = ds->priv; int err; diff --git a/root/target/linux/generic/hack-5.15/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch b/root/target/linux/generic/hack-5.15/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch index 95b3894b..b94851cf 100755 --- a/root/target/linux/generic/hack-5.15/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch +++ b/root/target/linux/generic/hack-5.15/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch @@ -1,6 +1,6 @@ --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c -@@ -2817,6 +2817,9 @@ static int mv88e6xxx_setup_port(struct m +@@ -2979,6 +2979,9 @@ static int mv88e6xxx_setup_port(struct m else reg = 1 << port; diff --git a/root/target/linux/generic/hack-5.15/720-net-phy-add-aqr-phys.patch b/root/target/linux/generic/hack-5.15/720-net-phy-add-aqr-phys.patch new file mode 100755 index 00000000..59124990 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/720-net-phy-add-aqr-phys.patch @@ -0,0 +1,142 @@ +From: Birger Koblitz +Date: Sun, 5 Sep 2021 15:13:10 +0200 +Subject: [PATCH] kernel: Add AQR113C and AQR813 support + +This hack adds support for the Aquantia 4th generation, 10GBit +PHYs AQR113C and AQR813. + +Signed-off-by: Birger Koblitz + +--- a/drivers/net/phy/aquantia_main.c ++++ b/drivers/net/phy/aquantia_main.c +@@ -20,8 +20,10 @@ + #define PHY_ID_AQR105 0x03a1b4a2 + #define PHY_ID_AQR106 0x03a1b4d0 + #define PHY_ID_AQR107 0x03a1b4e0 ++#define PHY_ID_AQR113C 0x31c31c12 + #define PHY_ID_AQCS109 0x03a1b5c2 + #define PHY_ID_AQR405 0x03a1b4b0 ++#define PHY_ID_AQR813 0x31c31cb2 + + #define MDIO_PHYXS_VEND_IF_STATUS 0xe812 + #define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK GENMASK(7, 3) +@@ -359,6 +361,49 @@ static int aqr107_read_rate(struct phy_d + return 0; + } + ++static int aqr113c_read_status(struct phy_device *phydev) ++{ ++ int val, ret; ++ ++ ret = aqr_read_status(phydev); ++ if (ret) ++ return ret; ++ ++ if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE) ++ return 0; ++ ++ // On AQR113C, the speed returned by aqr_read_status is wrong ++ aqr107_read_rate(phydev); ++ ++ val = phy_read_mmd(phydev, MDIO_MMD_PHYXS, MDIO_PHYXS_VEND_IF_STATUS); ++ if (val < 0) ++ return val; ++ ++ switch (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val)) { ++ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR: ++ phydev->interface = PHY_INTERFACE_MODE_10GKR; ++ break; ++ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI: ++ phydev->interface = PHY_INTERFACE_MODE_10GBASER; ++ break; ++ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII: ++ phydev->interface = PHY_INTERFACE_MODE_USXGMII; ++ break; ++ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII: ++ phydev->interface = PHY_INTERFACE_MODE_SGMII; ++ break; ++ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII: ++ phydev->interface = PHY_INTERFACE_MODE_2500BASEX; ++ break; ++ default: ++ phydev->interface = PHY_INTERFACE_MODE_NA; ++ break; ++ } ++ ++ /* Read downshifted rate from vendor register */ ++ return aqr107_read_rate(phydev); ++} ++ + static int aqr107_read_status(struct phy_device *phydev) + { + int val, ret; +@@ -489,7 +534,7 @@ static void aqr107_chip_info(struct phy_ + build_id = FIELD_GET(VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID, val); + prov_id = FIELD_GET(VEND1_GLOBAL_RSVD_STAT1_PROV_ID, val); + +- phydev_dbg(phydev, "FW %u.%u, Build %u, Provisioning %u\n", ++ phydev_info(phydev, "FW %u.%u, Build %u, Provisioning %u\n", + fw_major, fw_minor, build_id, prov_id); + } + +@@ -661,6 +706,24 @@ static struct phy_driver aqr_driver[] = + .link_change_notify = aqr107_link_change_notify, + }, + { ++ PHY_ID_MATCH_MODEL(PHY_ID_AQR113C), ++ .name = "Aquantia AQR113C", ++ .probe = aqr107_probe, ++ .config_init = aqr107_config_init, ++ .config_aneg = aqr_config_aneg, ++ .config_intr = aqr_config_intr, ++ .handle_interrupt = aqr_handle_interrupt, ++ .read_status = aqr113c_read_status, ++ .get_tunable = aqr107_get_tunable, ++ .set_tunable = aqr107_set_tunable, ++ .suspend = aqr107_suspend, ++ .resume = aqr107_resume, ++ .get_sset_count = aqr107_get_sset_count, ++ .get_strings = aqr107_get_strings, ++ .get_stats = aqr107_get_stats, ++ .link_change_notify = aqr107_link_change_notify, ++}, ++{ + PHY_ID_MATCH_MODEL(PHY_ID_AQCS109), + .name = "Aquantia AQCS109", + .probe = aqr107_probe, +@@ -686,6 +749,24 @@ static struct phy_driver aqr_driver[] = + .handle_interrupt = aqr_handle_interrupt, + .read_status = aqr_read_status, + }, ++{ ++ PHY_ID_MATCH_MODEL(PHY_ID_AQR813), ++ .name = "Aquantia AQR813", ++ .probe = aqr107_probe, ++ .config_init = aqr107_config_init, ++ .config_aneg = aqr_config_aneg, ++ .config_intr = aqr_config_intr, ++ .handle_interrupt = aqr_handle_interrupt, ++ .read_status = aqr113c_read_status, ++ .get_tunable = aqr107_get_tunable, ++ .set_tunable = aqr107_set_tunable, ++ .suspend = aqr107_suspend, ++ .resume = aqr107_resume, ++ .get_sset_count = aqr107_get_sset_count, ++ .get_strings = aqr107_get_strings, ++ .get_stats = aqr107_get_stats, ++ .link_change_notify = aqr107_link_change_notify, ++}, + }; + + module_phy_driver(aqr_driver); +@@ -696,8 +777,10 @@ static struct mdio_device_id __maybe_unu + { PHY_ID_MATCH_MODEL(PHY_ID_AQR105) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQR106) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQR107) }, ++ { PHY_ID_MATCH_MODEL(PHY_ID_AQR113C) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQCS109) }, + { PHY_ID_MATCH_MODEL(PHY_ID_AQR405) }, ++ { PHY_ID_MATCH_MODEL(PHY_ID_AQR813) }, + { } + }; + diff --git a/root/target/linux/generic/hack-5.15/721-net-add-packet-mangeling.patch b/root/target/linux/generic/hack-5.15/721-net-add-packet-mangeling.patch new file mode 100755 index 00000000..cea52fdc --- /dev/null +++ b/root/target/linux/generic/hack-5.15/721-net-add-packet-mangeling.patch @@ -0,0 +1,178 @@ +From ffe387740bbe88dd88bbe04d6375902708003d6e Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:25:00 +0200 +Subject: net: add packet mangeling + +ar8216 switches have a hardware bug, which renders normal 802.1q support +unusable. Packet mangling is required to fix up the vlan for incoming +packets. + +Signed-off-by: Felix Fietkau +--- + include/linux/netdevice.h | 11 +++++++++++ + include/linux/skbuff.h | 14 ++++---------- + net/Kconfig | 6 ++++++ + net/core/dev.c | 20 +++++++++++++++----- + net/core/skbuff.c | 17 +++++++++++++++++ + net/ethernet/eth.c | 6 ++++++ + 6 files changed, 59 insertions(+), 15 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1648,6 +1648,10 @@ enum netdev_priv_flags { + IFF_TX_SKB_NO_LINEAR = 1<<31, + }; + ++enum netdev_extra_priv_flags { ++ IFF_NO_IP_ALIGN = 1<<0, ++}; ++ + #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN + #define IFF_EBRIDGE IFF_EBRIDGE + #define IFF_BONDING IFF_BONDING +@@ -1680,6 +1684,7 @@ enum netdev_priv_flags { + #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER + #define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK + #define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR ++#define IFF_NO_IP_ALIGN IFF_NO_IP_ALIGN + + /* Specifies the type of the struct net_device::ml_priv pointer */ + enum netdev_ml_priv_type { +@@ -1981,6 +1986,7 @@ struct net_device { + /* Read-mostly cache-line for fast-path access */ + unsigned int flags; + unsigned int priv_flags; ++ unsigned int extra_priv_flags; + const struct net_device_ops *netdev_ops; + int ifindex; + unsigned short gflags; +@@ -2041,6 +2047,11 @@ struct net_device { + const struct tlsdev_ops *tlsdev_ops; + #endif + ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ void (*eth_mangle_rx)(struct net_device *dev, struct sk_buff *skb); ++ struct sk_buff *(*eth_mangle_tx)(struct net_device *dev, struct sk_buff *skb); ++#endif ++ + const struct header_ops *header_ops; + + unsigned char operstate; +@@ -2115,6 +2126,10 @@ struct net_device { + struct mctp_dev __rcu *mctp_ptr; + #endif + ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ void *phy_ptr; /* PHY device specific data */ ++#endif ++ + /* + * Cache lines mostly used on receive path (including eth_type_trans()) + */ +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2758,6 +2758,10 @@ static inline int pskb_trim(struct sk_bu + return (len < skb->len) ? __pskb_trim(skb, len) : 0; + } + ++extern struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, ++ unsigned int length, gfp_t gfp); ++ ++ + /** + * pskb_trim_unique - remove end from a paged unique (not cloned) buffer + * @skb: buffer to alter +@@ -2908,16 +2912,6 @@ static inline struct sk_buff *dev_alloc_ + } + + +-static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, +- unsigned int length, gfp_t gfp) +-{ +- struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); +- +- if (NET_IP_ALIGN && skb) +- skb_reserve(skb, NET_IP_ALIGN); +- return skb; +-} +- + static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) + { +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -26,6 +26,12 @@ menuconfig NET + + if NET + ++config ETHERNET_PACKET_MANGLE ++ bool ++ help ++ This option can be selected by phy drivers that need to mangle ++ packets going in or out of an ethernet device. ++ + config WANT_COMPAT_NETLINK_MESSAGES + bool + help +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3578,6 +3578,11 @@ static int xmit_one(struct sk_buff *skb, + if (dev_nit_active(dev)) + dev_queue_xmit_nit(skb, dev); + ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ if (dev->eth_mangle_tx && !(skb = dev->eth_mangle_tx(dev, skb))) ++ return NETDEV_TX_OK; ++#endif ++ + len = skb->len; + PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies); + trace_net_dev_start_xmit(skb, dev); +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -61,6 +61,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -602,6 +603,22 @@ skb_fail: + } + EXPORT_SYMBOL(__napi_alloc_skb); + ++struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, ++ unsigned int length, gfp_t gfp) ++{ ++ struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); ++ ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ if (dev && (dev->extra_priv_flags & IFF_NO_IP_ALIGN)) ++ return skb; ++#endif ++ ++ if (NET_IP_ALIGN && skb) ++ skb_reserve(skb, NET_IP_ALIGN); ++ return skb; ++} ++EXPORT_SYMBOL(__netdev_alloc_skb_ip_align); ++ + void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size, unsigned int truesize) + { +--- a/net/ethernet/eth.c ++++ b/net/ethernet/eth.c +@@ -170,6 +170,12 @@ __be16 eth_type_trans(struct sk_buff *sk + const struct ethhdr *eth; + + skb->dev = dev; ++ ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ if (dev->eth_mangle_rx) ++ dev->eth_mangle_rx(dev, skb); ++#endif ++ + skb_reset_mac_header(skb); + + eth = (struct ethhdr *)skb->data; diff --git a/root/target/linux/generic/hack-5.15/760-net-usb-r8152-add-LED-configuration-from-OF.patch b/root/target/linux/generic/hack-5.15/760-net-usb-r8152-add-LED-configuration-from-OF.patch new file mode 100755 index 00000000..1b854608 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/760-net-usb-r8152-add-LED-configuration-from-OF.patch @@ -0,0 +1,74 @@ +From 82985725e071f2a5735052f18e109a32aeac3a0b Mon Sep 17 00:00:00 2001 +From: David Bauer +Date: Sun, 26 Jul 2020 02:38:31 +0200 +Subject: [PATCH] net: usb: r8152: add LED configuration from OF + +This adds the ability to configure the LED configuration register using +OF. This way, the correct value for board specific LED configuration can +be determined. + +Signed-off-by: David Bauer +--- + drivers/net/usb/r8152.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -6822,6 +6823,22 @@ static void rtl_tally_reset(struct r8152 + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY, ocp_data); + } + ++static int r8152_led_configuration(struct r8152 *tp) ++{ ++ u32 led_data; ++ int ret; ++ ++ ret = of_property_read_u32(tp->udev->dev.of_node, "realtek,led-data", ++ &led_data); ++ ++ if (ret) ++ return ret; ++ ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_LEDSEL, led_data); ++ ++ return 0; ++} ++ + static void r8152b_init(struct r8152 *tp) + { + u32 ocp_data; +@@ -6863,6 +6880,8 @@ static void r8152b_init(struct r8152 *tp + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); + ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); + ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); ++ ++ r8152_led_configuration(tp); + } + + static void r8153_init(struct r8152 *tp) +@@ -7003,6 +7022,8 @@ static void r8153_init(struct r8152 *tp) + tp->coalesce = COALESCE_SLOW; + break; + } ++ ++ r8152_led_configuration(tp); + } + + static void r8153b_init(struct r8152 *tp) +@@ -7085,6 +7106,8 @@ static void r8153b_init(struct r8152 *tp + rtl_tally_reset(tp); + + tp->coalesce = 15000; /* 15 us */ ++ ++ r8152_led_configuration(tp); + } + + static void r8153c_init(struct r8152 *tp) diff --git a/root/target/linux/generic/hack-5.15/761-dt-bindings-net-add-RTL8152-binding-documentation.patch b/root/target/linux/generic/hack-5.15/761-dt-bindings-net-add-RTL8152-binding-documentation.patch new file mode 100755 index 00000000..be262b99 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/761-dt-bindings-net-add-RTL8152-binding-documentation.patch @@ -0,0 +1,54 @@ +From 3ee05f4aa64fc86af3be5bc176ba5808de9260a7 Mon Sep 17 00:00:00 2001 +From: David Bauer +Date: Sun, 26 Jul 2020 15:30:33 +0200 +Subject: [PATCH] dt-bindings: net: add RTL8152 binding documentation + +Add binding documentation for the Realtek RTL8152 / RTL8153 USB ethernet +adapters. + +Signed-off-by: David Bauer +--- + .../bindings/net/realtek,rtl8152.yaml | 36 +++++++++++++++++++ + 1 file changed, 36 insertions(+) + create mode 100644 Documentation/devicetree/bindings/net/realtek,rtl8152.yaml + +--- /dev/null ++++ b/Documentation/devicetree/bindings/net/realtek,rtl8152.yaml +@@ -0,0 +1,36 @@ ++# SPDX-License-Identifier: GPL-2.0 ++%YAML 1.2 ++--- ++$id: http://devicetree.org/schemas/net/realtek,rtl8152.yaml# ++$schema: http://devicetree.org/meta-schemas/core.yaml# ++ ++title: Realtek RTL8152/RTL8153 series USB ethernet ++ ++maintainers: ++ - David Bauer ++ ++properties: ++ compatible: ++ oneOf: ++ - items: ++ - enum: ++ - realtek,rtl8152 ++ - realtek,rtl8153 ++ ++ reg: ++ description: The device number on the USB bus ++ ++ realtek,led-data: ++ description: Value to be written to the LED configuration register. ++ ++required: ++ - compatible ++ - reg ++ ++examples: ++ - | ++ usb-eth@2 { ++ compatible = "realtek,rtl8153"; ++ reg = <2>; ++ realtek,led-data = <0x87>; ++ }; +\ No newline at end of file diff --git a/root/target/linux/generic/hack-5.15/773-bgmac-add-srab-switch.patch b/root/target/linux/generic/hack-5.15/773-bgmac-add-srab-switch.patch index cc6eddbf..1e4fc446 100755 --- a/root/target/linux/generic/hack-5.15/773-bgmac-add-srab-switch.patch +++ b/root/target/linux/generic/hack-5.15/773-bgmac-add-srab-switch.patch @@ -14,7 +14,7 @@ Signed-off-by: Hauke Mehrtens --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c -@@ -268,6 +268,7 @@ static int bgmac_probe(struct bcma_devic +@@ -280,6 +280,7 @@ static int bgmac_probe(struct bcma_devic bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; bgmac->feature_flags |= BGMAC_FEAT_NO_RESET; bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; @@ -55,7 +55,7 @@ Signed-off-by: Hauke Mehrtens net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN; + if ((bgmac->feature_flags & BGMAC_FEAT_SRAB) && !bgmac_b53_pdata.regs) { -+ bgmac_b53_pdata.regs = ioremap_nocache(0x18007000, 0x1000); ++ bgmac_b53_pdata.regs = ioremap(0x18007000, 0x1000); + + err = platform_device_register(&bgmac_b53_dev); + if (!err) diff --git a/root/target/linux/generic/hack-5.15/800-GPIO-add-named-gpio-exports.patch b/root/target/linux/generic/hack-5.15/800-GPIO-add-named-gpio-exports.patch index 76f89acd..40c3309f 100755 --- a/root/target/linux/generic/hack-5.15/800-GPIO-add-named-gpio-exports.patch +++ b/root/target/linux/generic/hack-5.15/800-GPIO-add-named-gpio-exports.patch @@ -15,9 +15,9 @@ Signed-off-by: John Crispin #include "gpiolib.h" #include "gpiolib-of.h" -@@ -1039,3 +1041,72 @@ void of_gpiochip_remove(struct gpio_chip - { - of_node_put(chip->of_node); +@@ -1052,3 +1054,72 @@ void of_gpio_dev_init(struct gpio_chip * + else + gc->of_node = gdev->dev.of_node; } + +#ifdef CONFIG_GPIO_SYSFS @@ -129,7 +129,7 @@ Signed-off-by: John Crispin { --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c -@@ -572,7 +572,7 @@ static struct class gpio_class = { +@@ -564,7 +564,7 @@ static struct class gpio_class = { * * Returns zero on success, else an error. */ @@ -138,7 +138,7 @@ Signed-off-by: John Crispin { struct gpio_chip *chip; struct gpio_device *gdev; -@@ -634,6 +634,8 @@ int gpiod_export(struct gpio_desc *desc, +@@ -626,6 +626,8 @@ int gpiod_export(struct gpio_desc *desc, offset = gpio_chip_hwgpio(desc); if (chip->names && chip->names[offset]) ioname = chip->names[offset]; @@ -147,7 +147,7 @@ Signed-off-by: John Crispin dev = device_create_with_groups(&gpio_class, &gdev->dev, MKDEV(0, 0), data, gpio_groups, -@@ -655,6 +657,12 @@ err_unlock: +@@ -647,6 +649,12 @@ err_unlock: gpiod_dbg(desc, "%s: status %d\n", __func__, status); return status; } diff --git a/root/target/linux/generic/hack-5.15/901-debloat_sock_diag.patch b/root/target/linux/generic/hack-5.15/901-debloat_sock_diag.patch index 34e73831..44f0e617 100755 --- a/root/target/linux/generic/hack-5.15/901-debloat_sock_diag.patch +++ b/root/target/linux/generic/hack-5.15/901-debloat_sock_diag.patch @@ -16,7 +16,7 @@ Signed-off-by: Felix Fietkau --- a/net/Kconfig +++ b/net/Kconfig -@@ -98,6 +98,9 @@ source "net/mptcp/Kconfig" +@@ -104,6 +104,9 @@ source "net/mptcp/Kconfig" endif # if INET @@ -58,7 +58,7 @@ Signed-off-by: Felix Fietkau static void sock_inuse_add(struct net *net, int val); -@@ -544,6 +546,18 @@ discard_and_relse: +@@ -545,6 +547,18 @@ discard_and_relse: } EXPORT_SYMBOL(__sk_receive_skb); @@ -77,7 +77,7 @@ Signed-off-by: Felix Fietkau INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, u32)); INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, -@@ -1967,9 +1981,11 @@ static void __sk_free(struct sock *sk) +@@ -1981,9 +1995,11 @@ static void __sk_free(struct sock *sk) if (likely(sk->sk_net_refcnt)) sock_inuse_add(sock_net(sk), -1); @@ -152,7 +152,7 @@ Signed-off-by: Felix Fietkau Support for PF_PACKET sockets monitoring interface used by the ss tool. --- a/net/unix/Kconfig +++ b/net/unix/Kconfig -@@ -28,6 +28,7 @@ config UNIX_SCM +@@ -33,6 +33,7 @@ config AF_UNIX_OOB config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX diff --git a/root/target/linux/generic/hack-5.15/902-debloat_proc.patch b/root/target/linux/generic/hack-5.15/902-debloat_proc.patch index 349a2c02..0b2d5e31 100755 --- a/root/target/linux/generic/hack-5.15/902-debloat_proc.patch +++ b/root/target/linux/generic/hack-5.15/902-debloat_proc.patch @@ -29,7 +29,7 @@ Signed-off-by: Felix Fietkau --- a/fs/locks.c +++ b/fs/locks.c -@@ -3044,6 +3044,8 @@ static const struct seq_operations locks +@@ -2929,6 +2929,8 @@ static const struct seq_operations locks static int __init proc_locks_init(void) { @@ -158,7 +158,7 @@ Signed-off-by: Felix Fietkau IPC_SEM_IDS, sysvipc_sem_proc_show); --- a/ipc/shm.c +++ b/ipc/shm.c -@@ -144,6 +144,8 @@ pure_initcall(ipc_ns_init); +@@ -154,6 +154,8 @@ pure_initcall(ipc_ns_init); void __init shm_init(void) { @@ -235,7 +235,7 @@ Signed-off-by: Felix Fietkau if (!pe) --- a/mm/vmalloc.c +++ b/mm/vmalloc.c -@@ -3899,6 +3899,8 @@ static const struct seq_operations vmall +@@ -3961,6 +3961,8 @@ static const struct seq_operations vmall static int __init proc_vmalloc_init(void) { @@ -246,7 +246,7 @@ Signed-off-by: Felix Fietkau &vmalloc_op, --- a/mm/vmstat.c +++ b/mm/vmstat.c -@@ -2044,10 +2044,12 @@ void __init init_mm_internals(void) +@@ -2083,10 +2083,12 @@ void __init init_mm_internals(void) start_shepherd_timer(); #endif #ifdef CONFIG_PROC_FS @@ -330,7 +330,7 @@ Signed-off-by: Felix Fietkau --- a/net/core/sock.c +++ b/net/core/sock.c -@@ -3839,6 +3839,8 @@ static __net_initdata struct pernet_oper +@@ -3853,6 +3853,8 @@ static __net_initdata struct pernet_oper static int __init proto_init(void) { @@ -396,7 +396,7 @@ Signed-off-by: Felix Fietkau } --- a/net/ipv4/route.c +++ b/net/ipv4/route.c -@@ -386,6 +386,9 @@ static struct pernet_operations ip_rt_pr +@@ -387,6 +387,9 @@ static struct pernet_operations ip_rt_pr static int __init ip_rt_proc_init(void) { diff --git a/root/target/linux/generic/hack-5.15/904-debloat_dma_buf.patch b/root/target/linux/generic/hack-5.15/904-debloat_dma_buf.patch new file mode 100755 index 00000000..fc7cd209 --- /dev/null +++ b/root/target/linux/generic/hack-5.15/904-debloat_dma_buf.patch @@ -0,0 +1,92 @@ +From e3692cb2fcd5ba1244512a0f43b8118f65f1c375 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 8 Jul 2017 08:20:43 +0200 +Subject: debloat: dmabuf + +Signed-off-by: Felix Fietkau +--- + drivers/base/Kconfig | 2 +- + drivers/dma-buf/Makefile | 10 +++++++--- + drivers/dma-buf/dma-buf.c | 4 +++- + kernel/sched/core.c | 1 + + 4 files changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/base/Kconfig ++++ b/drivers/base/Kconfig +@@ -187,7 +187,7 @@ config SOC_BUS + source "drivers/base/regmap/Kconfig" + + config DMA_SHARED_BUFFER +- bool ++ tristate + default n + select IRQ_WORK + help +--- a/drivers/dma-buf/heaps/Makefile ++++ b/drivers/dma-buf/heaps/Makefile +@@ -1,3 +1,3 @@ + # SPDX-License-Identifier: GPL-2.0 +-obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o +-obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o ++dma-buf-objs-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o ++dma-buf-objs-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o +--- a/drivers/dma-buf/Makefile ++++ b/drivers/dma-buf/Makefile +@@ -1,16 +1,20 @@ + # SPDX-License-Identifier: GPL-2.0-only +-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ ++obj-$(CONFIG_DMA_SHARED_BUFFER) := dma-shared-buffer.o ++ ++dma-buf-objs-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ + dma-resv.o seqno-fence.o +-obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o +-obj-$(CONFIG_DMABUF_HEAPS) += heaps/ +-obj-$(CONFIG_SYNC_FILE) += sync_file.o +-obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o +-obj-$(CONFIG_UDMABUF) += udmabuf.o +-obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o ++dma-buf-objs-$(CONFIG_DMABUF_HEAPS) += dma-heap.o ++obj-$(CONFIG_DMABUF_HEAPS) += heaps/ ++dma-buf-objs-$(CONFIG_SYNC_FILE) += sync_file.o ++dma-buf-objs-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o ++dma-buf-objs-$(CONFIG_UDMABUF) += udmabuf.o ++dma-buf-objs-$(CONFIG_DMABUF_SYSFS_STATS) += udmabuf.o + + dmabuf_selftests-y := \ + selftest.o \ + st-dma-fence.o \ + st-dma-fence-chain.o + +-obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o ++dma-buf-objs-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o ++ ++dma-shared-buffer-objs := $(dma-buf-objs-y) +--- a/drivers/dma-buf/dma-buf.c ++++ b/drivers/dma-buf/dma-buf.c +@@ -1498,4 +1498,5 @@ static void __exit dma_buf_deinit(void) + kern_unmount(dma_buf_mnt); + dma_buf_uninit_sysfs_statistics(); + } +-__exitcall(dma_buf_deinit); ++module_exit(dma_buf_deinit); ++MODULE_LICENSE("GPL"); +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -4174,6 +4174,7 @@ int wake_up_state(struct task_struct *p, + { + return try_to_wake_up(p, state, 0); + } ++EXPORT_SYMBOL_GPL(wake_up_state); + + /* + * Perform scheduler related setup for a newly forked process p. +--- a/fs/d_path.c ++++ b/fs/d_path.c +@@ -316,6 +316,7 @@ char *dynamic_dname(struct dentry *dentr + buffer += buflen - sz; + return memcpy(buffer, temp, sz); + } ++EXPORT_SYMBOL_GPL(dynamic_dname); + + char *simple_dname(struct dentry *dentry, char *buffer, int buflen) + { diff --git a/root/target/linux/generic/hack-5.4/204-module_strip.patch b/root/target/linux/generic/hack-5.4/204-module_strip.patch new file mode 100755 index 00000000..d6e25f31 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/204-module_strip.patch @@ -0,0 +1,220 @@ +From a779a482fb9b9f8fcdf8b2519c789b4b9bb5dd05 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 16:56:48 +0200 +Subject: build: add a hack for removing non-essential module info + +Signed-off-by: Felix Fietkau +--- + include/linux/module.h | 13 ++++++++----- + include/linux/moduleparam.h | 15 ++++++++++++--- + init/Kconfig | 7 +++++++ + kernel/module.c | 5 ++++- + scripts/mod/modpost.c | 12 ++++++++++++ + 5 files changed, 43 insertions(+), 9 deletions(-) + +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -157,6 +157,7 @@ extern void cleanup_module(void); + + /* Generic info of form tag = "info" */ + #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) ++#define MODULE_INFO_STRIP(tag, info) __MODULE_INFO_STRIP(tag, tag, info) + + /* For userspace: you can also call me... */ + #define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias) +@@ -216,12 +217,12 @@ extern void cleanup_module(void); + * Author(s), use "Name " or just "Name", for multiple + * authors use multiple MODULE_AUTHOR() statements/lines. + */ +-#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) ++#define MODULE_AUTHOR(_author) MODULE_INFO_STRIP(author, _author) + + /* What your module does. */ +-#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) ++#define MODULE_DESCRIPTION(_description) MODULE_INFO_STRIP(description, _description) + +-#ifdef MODULE ++#if defined(MODULE) && !defined(CONFIG_MODULE_STRIPPED) + /* Creates an alias so file2alias.c can find device table. */ + #define MODULE_DEVICE_TABLE(type, name) \ + extern typeof(name) __mod_##type##__##name##_device_table \ +@@ -248,7 +249,9 @@ extern typeof(name) __mod_##type##__##na + */ + + #if defined(MODULE) || !defined(CONFIG_SYSFS) +-#define MODULE_VERSION(_version) MODULE_INFO(version, _version) ++#define MODULE_VERSION(_version) MODULE_INFO_STRIP(version, _version) ++#elif defined(CONFIG_MODULE_STRIPPED) ++#define MODULE_VERSION(_version) __MODULE_INFO_DISABLED(version) + #else + #define MODULE_VERSION(_version) \ + MODULE_INFO(version, _version); \ +@@ -271,7 +274,7 @@ extern typeof(name) __mod_##type##__##na + /* Optional firmware file (or files) needed by the module + * format is simply firmware file name. Multiple firmware + * files require multiple MODULE_FIRMWARE() specifiers */ +-#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) ++#define MODULE_FIRMWARE(_firmware) MODULE_INFO_STRIP(firmware, _firmware) + + #define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) + +--- a/include/linux/moduleparam.h ++++ b/include/linux/moduleparam.h +@@ -20,10 +20,24 @@ + /* Chosen so that structs with an unsigned long line up. */ + #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) + ++/* This struct is here for syntactic coherency, it is not used */ ++#define __MODULE_INFO_DISABLED(name) \ ++ struct __UNIQUE_ID(name) {} ++ ++#ifdef CONFIG_MODULE_STRIPPED ++#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO_DISABLED(name) ++#else ++#define __MODULE_INFO_STRIP(tag, name, info) __MODULE_INFO(tag, name, info) ++#endif ++ ++#ifdef MODULE + #define __MODULE_INFO(tag, name, info) \ + static const char __UNIQUE_ID(name)[] \ + __used __attribute__((section(".modinfo"), unused, aligned(1))) \ + = __MODULE_INFO_PREFIX __stringify(tag) "=" info ++#else ++#define __MODULE_INFO(tag, name, info) __MODULE_INFO_DISABLED(name) ++#endif + + #define __MODULE_PARM_TYPE(name, _type) \ + __MODULE_INFO(parmtype, name##type, #name ":" _type) +@@ -31,7 +45,7 @@ static const char __UNIQUE_ID(name)[] + /* One for each parameter, describing how to use it. Some files do + multiple of these per line, so can't just use MODULE_INFO. */ + #define MODULE_PARM_DESC(_parm, desc) \ +- __MODULE_INFO(parm, _parm, #_parm ":" desc) ++ __MODULE_INFO_STRIP(parm, _parm, #_parm ":" desc) + + struct kernel_param; + +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -2198,6 +2198,13 @@ config TRIM_UNUSED_KSYMS + + If unsure, or if you need to build out-of-tree modules, say N. + ++config MODULE_STRIPPED ++ bool "Reduce module size" ++ depends on MODULES ++ help ++ Remove module parameter descriptions, author info, version, aliases, ++ device tables, etc. ++ + endif # MODULES + + config MODULES_TREE_LOOKUP +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -1281,6 +1281,7 @@ static struct module_attribute *modinfo_ + + static const char vermagic[] = VERMAGIC_STRING; + ++#if defined(CONFIG_MODVERSIONS) || !defined(CONFIG_MODULE_STRIPPED) + static int try_to_force_load(struct module *mod, const char *reason) + { + #ifdef CONFIG_MODULE_FORCE_LOAD +@@ -1292,6 +1293,7 @@ static int try_to_force_load(struct modu + return -ENOEXEC; + #endif + } ++#endif + + #ifdef CONFIG_MODVERSIONS + +@@ -3256,9 +3258,11 @@ static int setup_load_info(struct load_i + + static int check_modinfo(struct module *mod, struct load_info *info, int flags) + { +- const char *modmagic = get_modinfo(info, "vermagic"); + int err; + ++#ifndef CONFIG_MODULE_STRIPPED ++ const char *modmagic = get_modinfo(info, "vermagic"); ++ + if (flags & MODULE_INIT_IGNORE_VERMAGIC) + modmagic = NULL; + +@@ -3279,6 +3283,7 @@ static int check_modinfo(struct module * + mod->name); + add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); + } ++#endif + + check_modinfo_retpoline(mod, info); + +--- a/scripts/mod/modpost.c ++++ b/scripts/mod/modpost.c +@@ -2056,7 +2056,9 @@ static void read_symbols(const char *mod + symname = remove_dot(info.strtab + sym->st_name); + + handle_modversions(mod, &info, sym, symname); ++#ifndef CONFIG_MODULE_STRIPPED + handle_moddevtable(mod, &info, sym, symname); ++#endif + } + + /* Apply symbol namespaces from __kstrtabns_ entries. */ +@@ -2270,8 +2272,10 @@ static void add_header(struct buffer *b, + buf_printf(b, "\n"); + buf_printf(b, "BUILD_SALT;\n"); + buf_printf(b, "\n"); ++#ifndef CONFIG_MODULE_STRIPPED + buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n"); + buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n"); ++#endif + buf_printf(b, "\n"); + buf_printf(b, "__visible struct module __this_module\n"); + buf_printf(b, "__section(.gnu.linkonce.this_module) = {\n"); +@@ -2288,8 +2292,10 @@ static void add_header(struct buffer *b, + + static void add_intree_flag(struct buffer *b, int is_intree) + { ++#ifndef CONFIG_MODULE_STRIPPED + if (is_intree) + buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); ++#endif + } + + /* Cannot check for assembler */ +@@ -2302,8 +2308,10 @@ static void add_retpoline(struct buffer + + static void add_staging_flag(struct buffer *b, const char *name) + { ++#ifndef CONFIG_MODULE_STRIPPED + if (strstarts(name, "drivers/staging")) + buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n"); ++#endif + } + + /** +@@ -2387,11 +2395,13 @@ static void add_depends(struct buffer *b + + static void add_srcversion(struct buffer *b, struct module *mod) + { ++#ifndef CONFIG_MODULE_STRIPPED + if (mod->srcversion[0]) { + buf_printf(b, "\n"); + buf_printf(b, "MODULE_INFO(srcversion, \"%s\");\n", + mod->srcversion); + } ++#endif + } + + static void write_if_changed(struct buffer *b, const char *fname) +@@ -2661,7 +2671,9 @@ int main(int argc, char **argv) + add_staging_flag(&buf, mod->name); + err |= add_versions(&buf, mod); + add_depends(&buf, mod); ++#ifndef CONFIG_MODULE_STRIPPED + add_moddevtable(&buf, mod); ++#endif + add_srcversion(&buf, mod); + + sprintf(fname, "%s.mod.c", mod->name); diff --git a/root/target/linux/generic/hack-5.4/205-kconfig-exit.patch b/root/target/linux/generic/hack-5.4/205-kconfig-exit.patch new file mode 100755 index 00000000..8931ad32 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/205-kconfig-exit.patch @@ -0,0 +1,11 @@ +--- a/scripts/kconfig/conf.c ++++ b/scripts/kconfig/conf.c +@@ -212,6 +212,8 @@ static int conf_sym(struct menu *menu) + break; + continue; + case 0: ++ if (!sym_has_value(sym) && !tty_stdio && getenv("FAIL_ON_UNCONFIGURED")) ++ exit(1); + newval = oldval; + break; + case '?': diff --git a/root/target/linux/generic/hack-5.4/210-darwin_scripts_include.patch b/root/target/linux/generic/hack-5.4/210-darwin_scripts_include.patch new file mode 100755 index 00000000..be6adc0d --- /dev/null +++ b/root/target/linux/generic/hack-5.4/210-darwin_scripts_include.patch @@ -0,0 +1,3053 @@ +From db7c30dcd9a0391bf13b62c9f91e144d762ef43a Mon Sep 17 00:00:00 2001 +From: Florian Fainelli +Date: Fri, 7 Jul 2017 17:00:49 +0200 +Subject: Add an OSX specific patch to make the kernel be compiled + +lede-commit: 3fc2a24f0422b2f55f9ed43f116db3111f700526 +Signed-off-by: Florian Fainelli +--- + scripts/kconfig/Makefile | 3 + + scripts/mod/elf.h | 3007 ++++++++++++++++++++++++++++++++++++++++++++ + scripts/mod/mk_elfconfig.c | 4 + + scripts/mod/modpost.h | 4 + + 4 files changed, 3018 insertions(+) + create mode 100644 scripts/mod/elf.h + +--- /dev/null ++++ b/scripts/mod/elf.h +@@ -0,0 +1,3007 @@ ++/* This file defines standard ELF types, structures, and macros. ++ Copyright (C) 1995-2012 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _ELF_H ++#define _ELF_H 1 ++ ++/* Standard ELF types. */ ++ ++#include ++ ++/* Type for a 16-bit quantity. */ ++typedef uint16_t Elf32_Half; ++typedef uint16_t Elf64_Half; ++ ++/* Types for signed and unsigned 32-bit quantities. */ ++typedef uint32_t Elf32_Word; ++typedef int32_t Elf32_Sword; ++typedef uint32_t Elf64_Word; ++typedef int32_t Elf64_Sword; ++ ++/* Types for signed and unsigned 64-bit quantities. */ ++typedef uint64_t Elf32_Xword; ++typedef int64_t Elf32_Sxword; ++typedef uint64_t Elf64_Xword; ++typedef int64_t Elf64_Sxword; ++ ++/* Type of addresses. */ ++typedef uint32_t Elf32_Addr; ++typedef uint64_t Elf64_Addr; ++ ++/* Type of file offsets. */ ++typedef uint32_t Elf32_Off; ++typedef uint64_t Elf64_Off; ++ ++/* Type for section indices, which are 16-bit quantities. */ ++typedef uint16_t Elf32_Section; ++typedef uint16_t Elf64_Section; ++ ++/* Type for version symbol information. */ ++typedef Elf32_Half Elf32_Versym; ++typedef Elf64_Half Elf64_Versym; ++ ++ ++/* The ELF file header. This appears at the start of every ELF file. */ ++ ++#define EI_NIDENT (16) ++ ++typedef struct ++{ ++ unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ ++ Elf32_Half e_type; /* Object file type */ ++ Elf32_Half e_machine; /* Architecture */ ++ Elf32_Word e_version; /* Object file version */ ++ Elf32_Addr e_entry; /* Entry point virtual address */ ++ Elf32_Off e_phoff; /* Program header table file offset */ ++ Elf32_Off e_shoff; /* Section header table file offset */ ++ Elf32_Word e_flags; /* Processor-specific flags */ ++ Elf32_Half e_ehsize; /* ELF header size in bytes */ ++ Elf32_Half e_phentsize; /* Program header table entry size */ ++ Elf32_Half e_phnum; /* Program header table entry count */ ++ Elf32_Half e_shentsize; /* Section header table entry size */ ++ Elf32_Half e_shnum; /* Section header table entry count */ ++ Elf32_Half e_shstrndx; /* Section header string table index */ ++} Elf32_Ehdr; ++ ++typedef struct ++{ ++ unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */ ++ Elf64_Half e_type; /* Object file type */ ++ Elf64_Half e_machine; /* Architecture */ ++ Elf64_Word e_version; /* Object file version */ ++ Elf64_Addr e_entry; /* Entry point virtual address */ ++ Elf64_Off e_phoff; /* Program header table file offset */ ++ Elf64_Off e_shoff; /* Section header table file offset */ ++ Elf64_Word e_flags; /* Processor-specific flags */ ++ Elf64_Half e_ehsize; /* ELF header size in bytes */ ++ Elf64_Half e_phentsize; /* Program header table entry size */ ++ Elf64_Half e_phnum; /* Program header table entry count */ ++ Elf64_Half e_shentsize; /* Section header table entry size */ ++ Elf64_Half e_shnum; /* Section header table entry count */ ++ Elf64_Half e_shstrndx; /* Section header string table index */ ++} Elf64_Ehdr; ++ ++/* Fields in the e_ident array. The EI_* macros are indices into the ++ array. The macros under each EI_* macro are the values the byte ++ may have. */ ++ ++#define EI_MAG0 0 /* File identification byte 0 index */ ++#define ELFMAG0 0x7f /* Magic number byte 0 */ ++ ++#define EI_MAG1 1 /* File identification byte 1 index */ ++#define ELFMAG1 'E' /* Magic number byte 1 */ ++ ++#define EI_MAG2 2 /* File identification byte 2 index */ ++#define ELFMAG2 'L' /* Magic number byte 2 */ ++ ++#define EI_MAG3 3 /* File identification byte 3 index */ ++#define ELFMAG3 'F' /* Magic number byte 3 */ ++ ++/* Conglomeration of the identification bytes, for easy testing as a word. */ ++#define ELFMAG "\177ELF" ++#define SELFMAG 4 ++ ++#define EI_CLASS 4 /* File class byte index */ ++#define ELFCLASSNONE 0 /* Invalid class */ ++#define ELFCLASS32 1 /* 32-bit objects */ ++#define ELFCLASS64 2 /* 64-bit objects */ ++#define ELFCLASSNUM 3 ++ ++#define EI_DATA 5 /* Data encoding byte index */ ++#define ELFDATANONE 0 /* Invalid data encoding */ ++#define ELFDATA2LSB 1 /* 2's complement, little endian */ ++#define ELFDATA2MSB 2 /* 2's complement, big endian */ ++#define ELFDATANUM 3 ++ ++#define EI_VERSION 6 /* File version byte index */ ++ /* Value must be EV_CURRENT */ ++ ++#define EI_OSABI 7 /* OS ABI identification */ ++#define ELFOSABI_NONE 0 /* UNIX System V ABI */ ++#define ELFOSABI_SYSV 0 /* Alias. */ ++#define ELFOSABI_HPUX 1 /* HP-UX */ ++#define ELFOSABI_NETBSD 2 /* NetBSD. */ ++#define ELFOSABI_GNU 3 /* Object uses GNU ELF extensions. */ ++#define ELFOSABI_LINUX ELFOSABI_GNU /* Compatibility alias. */ ++#define ELFOSABI_SOLARIS 6 /* Sun Solaris. */ ++#define ELFOSABI_AIX 7 /* IBM AIX. */ ++#define ELFOSABI_IRIX 8 /* SGI Irix. */ ++#define ELFOSABI_FREEBSD 9 /* FreeBSD. */ ++#define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */ ++#define ELFOSABI_MODESTO 11 /* Novell Modesto. */ ++#define ELFOSABI_OPENBSD 12 /* OpenBSD. */ ++#define ELFOSABI_ARM_AEABI 64 /* ARM EABI */ ++#define ELFOSABI_ARM 97 /* ARM */ ++#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ ++ ++#define EI_ABIVERSION 8 /* ABI version */ ++ ++#define EI_PAD 9 /* Byte index of padding bytes */ ++ ++/* Legal values for e_type (object file type). */ ++ ++#define ET_NONE 0 /* No file type */ ++#define ET_REL 1 /* Relocatable file */ ++#define ET_EXEC 2 /* Executable file */ ++#define ET_DYN 3 /* Shared object file */ ++#define ET_CORE 4 /* Core file */ ++#define ET_NUM 5 /* Number of defined types */ ++#define ET_LOOS 0xfe00 /* OS-specific range start */ ++#define ET_HIOS 0xfeff /* OS-specific range end */ ++#define ET_LOPROC 0xff00 /* Processor-specific range start */ ++#define ET_HIPROC 0xffff /* Processor-specific range end */ ++ ++/* Legal values for e_machine (architecture). */ ++ ++#define EM_NONE 0 /* No machine */ ++#define EM_M32 1 /* AT&T WE 32100 */ ++#define EM_SPARC 2 /* SUN SPARC */ ++#define EM_386 3 /* Intel 80386 */ ++#define EM_68K 4 /* Motorola m68k family */ ++#define EM_88K 5 /* Motorola m88k family */ ++#define EM_860 7 /* Intel 80860 */ ++#define EM_MIPS 8 /* MIPS R3000 big-endian */ ++#define EM_S370 9 /* IBM System/370 */ ++#define EM_MIPS_RS3_LE 10 /* MIPS R3000 little-endian */ ++ ++#define EM_PARISC 15 /* HPPA */ ++#define EM_VPP500 17 /* Fujitsu VPP500 */ ++#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ ++#define EM_960 19 /* Intel 80960 */ ++#define EM_PPC 20 /* PowerPC */ ++#define EM_PPC64 21 /* PowerPC 64-bit */ ++#define EM_S390 22 /* IBM S390 */ ++ ++#define EM_V800 36 /* NEC V800 series */ ++#define EM_FR20 37 /* Fujitsu FR20 */ ++#define EM_RH32 38 /* TRW RH-32 */ ++#define EM_RCE 39 /* Motorola RCE */ ++#define EM_ARM 40 /* ARM */ ++#define EM_FAKE_ALPHA 41 /* Digital Alpha */ ++#define EM_SH 42 /* Hitachi SH */ ++#define EM_SPARCV9 43 /* SPARC v9 64-bit */ ++#define EM_TRICORE 44 /* Siemens Tricore */ ++#define EM_ARC 45 /* Argonaut RISC Core */ ++#define EM_H8_300 46 /* Hitachi H8/300 */ ++#define EM_H8_300H 47 /* Hitachi H8/300H */ ++#define EM_H8S 48 /* Hitachi H8S */ ++#define EM_H8_500 49 /* Hitachi H8/500 */ ++#define EM_IA_64 50 /* Intel Merced */ ++#define EM_MIPS_X 51 /* Stanford MIPS-X */ ++#define EM_COLDFIRE 52 /* Motorola Coldfire */ ++#define EM_68HC12 53 /* Motorola M68HC12 */ ++#define EM_MMA 54 /* Fujitsu MMA Multimedia Accelerator*/ ++#define EM_PCP 55 /* Siemens PCP */ ++#define EM_NCPU 56 /* Sony nCPU embeeded RISC */ ++#define EM_NDR1 57 /* Denso NDR1 microprocessor */ ++#define EM_STARCORE 58 /* Motorola Start*Core processor */ ++#define EM_ME16 59 /* Toyota ME16 processor */ ++#define EM_ST100 60 /* STMicroelectronic ST100 processor */ ++#define EM_TINYJ 61 /* Advanced Logic Corp. Tinyj emb.fam*/ ++#define EM_X86_64 62 /* AMD x86-64 architecture */ ++#define EM_PDSP 63 /* Sony DSP Processor */ ++ ++#define EM_FX66 66 /* Siemens FX66 microcontroller */ ++#define EM_ST9PLUS 67 /* STMicroelectronics ST9+ 8/16 mc */ ++#define EM_ST7 68 /* STmicroelectronics ST7 8 bit mc */ ++#define EM_68HC16 69 /* Motorola MC68HC16 microcontroller */ ++#define EM_68HC11 70 /* Motorola MC68HC11 microcontroller */ ++#define EM_68HC08 71 /* Motorola MC68HC08 microcontroller */ ++#define EM_68HC05 72 /* Motorola MC68HC05 microcontroller */ ++#define EM_SVX 73 /* Silicon Graphics SVx */ ++#define EM_ST19 74 /* STMicroelectronics ST19 8 bit mc */ ++#define EM_VAX 75 /* Digital VAX */ ++#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ ++#define EM_JAVELIN 77 /* Infineon Technologies 32-bit embedded processor */ ++#define EM_FIREPATH 78 /* Element 14 64-bit DSP Processor */ ++#define EM_ZSP 79 /* LSI Logic 16-bit DSP Processor */ ++#define EM_MMIX 80 /* Donald Knuth's educational 64-bit processor */ ++#define EM_HUANY 81 /* Harvard University machine-independent object files */ ++#define EM_PRISM 82 /* SiTera Prism */ ++#define EM_AVR 83 /* Atmel AVR 8-bit microcontroller */ ++#define EM_FR30 84 /* Fujitsu FR30 */ ++#define EM_D10V 85 /* Mitsubishi D10V */ ++#define EM_D30V 86 /* Mitsubishi D30V */ ++#define EM_V850 87 /* NEC v850 */ ++#define EM_M32R 88 /* Mitsubishi M32R */ ++#define EM_MN10300 89 /* Matsushita MN10300 */ ++#define EM_MN10200 90 /* Matsushita MN10200 */ ++#define EM_PJ 91 /* picoJava */ ++#define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ ++#define EM_ARC_A5 93 /* ARC Cores Tangent-A5 */ ++#define EM_XTENSA 94 /* Tensilica Xtensa Architecture */ ++#define EM_TILEPRO 188 /* Tilera TILEPro */ ++#define EM_TILEGX 191 /* Tilera TILE-Gx */ ++#define EM_NUM 192 ++ ++/* If it is necessary to assign new unofficial EM_* values, please ++ pick large random numbers (0x8523, 0xa7f2, etc.) to minimize the ++ chances of collision with official or non-GNU unofficial values. */ ++ ++#define EM_ALPHA 0x9026 ++ ++/* Legal values for e_version (version). */ ++ ++#define EV_NONE 0 /* Invalid ELF version */ ++#define EV_CURRENT 1 /* Current version */ ++#define EV_NUM 2 ++ ++/* Section header. */ ++ ++typedef struct ++{ ++ Elf32_Word sh_name; /* Section name (string tbl index) */ ++ Elf32_Word sh_type; /* Section type */ ++ Elf32_Word sh_flags; /* Section flags */ ++ Elf32_Addr sh_addr; /* Section virtual addr at execution */ ++ Elf32_Off sh_offset; /* Section file offset */ ++ Elf32_Word sh_size; /* Section size in bytes */ ++ Elf32_Word sh_link; /* Link to another section */ ++ Elf32_Word sh_info; /* Additional section information */ ++ Elf32_Word sh_addralign; /* Section alignment */ ++ Elf32_Word sh_entsize; /* Entry size if section holds table */ ++} Elf32_Shdr; ++ ++typedef struct ++{ ++ Elf64_Word sh_name; /* Section name (string tbl index) */ ++ Elf64_Word sh_type; /* Section type */ ++ Elf64_Xword sh_flags; /* Section flags */ ++ Elf64_Addr sh_addr; /* Section virtual addr at execution */ ++ Elf64_Off sh_offset; /* Section file offset */ ++ Elf64_Xword sh_size; /* Section size in bytes */ ++ Elf64_Word sh_link; /* Link to another section */ ++ Elf64_Word sh_info; /* Additional section information */ ++ Elf64_Xword sh_addralign; /* Section alignment */ ++ Elf64_Xword sh_entsize; /* Entry size if section holds table */ ++} Elf64_Shdr; ++ ++/* Special section indices. */ ++ ++#define SHN_UNDEF 0 /* Undefined section */ ++#define SHN_LORESERVE 0xff00 /* Start of reserved indices */ ++#define SHN_LOPROC 0xff00 /* Start of processor-specific */ ++#define SHN_BEFORE 0xff00 /* Order section before all others ++ (Solaris). */ ++#define SHN_AFTER 0xff01 /* Order section after all others ++ (Solaris). */ ++#define SHN_HIPROC 0xff1f /* End of processor-specific */ ++#define SHN_LOOS 0xff20 /* Start of OS-specific */ ++#define SHN_HIOS 0xff3f /* End of OS-specific */ ++#define SHN_ABS 0xfff1 /* Associated symbol is absolute */ ++#define SHN_COMMON 0xfff2 /* Associated symbol is common */ ++#define SHN_XINDEX 0xffff /* Index is in extra table. */ ++#define SHN_HIRESERVE 0xffff /* End of reserved indices */ ++ ++/* Legal values for sh_type (section type). */ ++ ++#define SHT_NULL 0 /* Section header table entry unused */ ++#define SHT_PROGBITS 1 /* Program data */ ++#define SHT_SYMTAB 2 /* Symbol table */ ++#define SHT_STRTAB 3 /* String table */ ++#define SHT_RELA 4 /* Relocation entries with addends */ ++#define SHT_HASH 5 /* Symbol hash table */ ++#define SHT_DYNAMIC 6 /* Dynamic linking information */ ++#define SHT_NOTE 7 /* Notes */ ++#define SHT_NOBITS 8 /* Program space with no data (bss) */ ++#define SHT_REL 9 /* Relocation entries, no addends */ ++#define SHT_SHLIB 10 /* Reserved */ ++#define SHT_DYNSYM 11 /* Dynamic linker symbol table */ ++#define SHT_INIT_ARRAY 14 /* Array of constructors */ ++#define SHT_FINI_ARRAY 15 /* Array of destructors */ ++#define SHT_PREINIT_ARRAY 16 /* Array of pre-constructors */ ++#define SHT_GROUP 17 /* Section group */ ++#define SHT_SYMTAB_SHNDX 18 /* Extended section indeces */ ++#define SHT_NUM 19 /* Number of defined types. */ ++#define SHT_LOOS 0x60000000 /* Start OS-specific. */ ++#define SHT_GNU_ATTRIBUTES 0x6ffffff5 /* Object attributes. */ ++#define SHT_GNU_HASH 0x6ffffff6 /* GNU-style hash table. */ ++#define SHT_GNU_LIBLIST 0x6ffffff7 /* Prelink library list */ ++#define SHT_CHECKSUM 0x6ffffff8 /* Checksum for DSO content. */ ++#define SHT_LOSUNW 0x6ffffffa /* Sun-specific low bound. */ ++#define SHT_SUNW_move 0x6ffffffa ++#define SHT_SUNW_COMDAT 0x6ffffffb ++#define SHT_SUNW_syminfo 0x6ffffffc ++#define SHT_GNU_verdef 0x6ffffffd /* Version definition section. */ ++#define SHT_GNU_verneed 0x6ffffffe /* Version needs section. */ ++#define SHT_GNU_versym 0x6fffffff /* Version symbol table. */ ++#define SHT_HISUNW 0x6fffffff /* Sun-specific high bound. */ ++#define SHT_HIOS 0x6fffffff /* End OS-specific type */ ++#define SHT_LOPROC 0x70000000 /* Start of processor-specific */ ++#define SHT_HIPROC 0x7fffffff /* End of processor-specific */ ++#define SHT_LOUSER 0x80000000 /* Start of application-specific */ ++#define SHT_HIUSER 0x8fffffff /* End of application-specific */ ++ ++/* Legal values for sh_flags (section flags). */ ++ ++#define SHF_WRITE (1 << 0) /* Writable */ ++#define SHF_ALLOC (1 << 1) /* Occupies memory during execution */ ++#define SHF_EXECINSTR (1 << 2) /* Executable */ ++#define SHF_MERGE (1 << 4) /* Might be merged */ ++#define SHF_STRINGS (1 << 5) /* Contains nul-terminated strings */ ++#define SHF_INFO_LINK (1 << 6) /* `sh_info' contains SHT index */ ++#define SHF_LINK_ORDER (1 << 7) /* Preserve order after combining */ ++#define SHF_OS_NONCONFORMING (1 << 8) /* Non-standard OS specific handling ++ required */ ++#define SHF_GROUP (1 << 9) /* Section is member of a group. */ ++#define SHF_TLS (1 << 10) /* Section hold thread-local data. */ ++#define SHF_MASKOS 0x0ff00000 /* OS-specific. */ ++#define SHF_MASKPROC 0xf0000000 /* Processor-specific */ ++#define SHF_ORDERED (1 << 30) /* Special ordering requirement ++ (Solaris). */ ++#define SHF_EXCLUDE (1 << 31) /* Section is excluded unless ++ referenced or allocated (Solaris).*/ ++ ++/* Section group handling. */ ++#define GRP_COMDAT 0x1 /* Mark group as COMDAT. */ ++ ++/* Symbol table entry. */ ++ ++typedef struct ++{ ++ Elf32_Word st_name; /* Symbol name (string tbl index) */ ++ Elf32_Addr st_value; /* Symbol value */ ++ Elf32_Word st_size; /* Symbol size */ ++ unsigned char st_info; /* Symbol type and binding */ ++ unsigned char st_other; /* Symbol visibility */ ++ Elf32_Section st_shndx; /* Section index */ ++} Elf32_Sym; ++ ++typedef struct ++{ ++ Elf64_Word st_name; /* Symbol name (string tbl index) */ ++ unsigned char st_info; /* Symbol type and binding */ ++ unsigned char st_other; /* Symbol visibility */ ++ Elf64_Section st_shndx; /* Section index */ ++ Elf64_Addr st_value; /* Symbol value */ ++ Elf64_Xword st_size; /* Symbol size */ ++} Elf64_Sym; ++ ++/* The syminfo section if available contains additional information about ++ every dynamic symbol. */ ++ ++typedef struct ++{ ++ Elf32_Half si_boundto; /* Direct bindings, symbol bound to */ ++ Elf32_Half si_flags; /* Per symbol flags */ ++} Elf32_Syminfo; ++ ++typedef struct ++{ ++ Elf64_Half si_boundto; /* Direct bindings, symbol bound to */ ++ Elf64_Half si_flags; /* Per symbol flags */ ++} Elf64_Syminfo; ++ ++/* Possible values for si_boundto. */ ++#define SYMINFO_BT_SELF 0xffff /* Symbol bound to self */ ++#define SYMINFO_BT_PARENT 0xfffe /* Symbol bound to parent */ ++#define SYMINFO_BT_LOWRESERVE 0xff00 /* Beginning of reserved entries */ ++ ++/* Possible bitmasks for si_flags. */ ++#define SYMINFO_FLG_DIRECT 0x0001 /* Direct bound symbol */ ++#define SYMINFO_FLG_PASSTHRU 0x0002 /* Pass-thru symbol for translator */ ++#define SYMINFO_FLG_COPY 0x0004 /* Symbol is a copy-reloc */ ++#define SYMINFO_FLG_LAZYLOAD 0x0008 /* Symbol bound to object to be lazy ++ loaded */ ++/* Syminfo version values. */ ++#define SYMINFO_NONE 0 ++#define SYMINFO_CURRENT 1 ++#define SYMINFO_NUM 2 ++ ++ ++/* How to extract and insert information held in the st_info field. */ ++ ++#define ELF32_ST_BIND(val) (((unsigned char) (val)) >> 4) ++#define ELF32_ST_TYPE(val) ((val) & 0xf) ++#define ELF32_ST_INFO(bind, type) (((bind) << 4) + ((type) & 0xf)) ++ ++/* Both Elf32_Sym and Elf64_Sym use the same one-byte st_info field. */ ++#define ELF64_ST_BIND(val) ELF32_ST_BIND (val) ++#define ELF64_ST_TYPE(val) ELF32_ST_TYPE (val) ++#define ELF64_ST_INFO(bind, type) ELF32_ST_INFO ((bind), (type)) ++ ++/* Legal values for ST_BIND subfield of st_info (symbol binding). */ ++ ++#define STB_LOCAL 0 /* Local symbol */ ++#define STB_GLOBAL 1 /* Global symbol */ ++#define STB_WEAK 2 /* Weak symbol */ ++#define STB_NUM 3 /* Number of defined types. */ ++#define STB_LOOS 10 /* Start of OS-specific */ ++#define STB_GNU_UNIQUE 10 /* Unique symbol. */ ++#define STB_HIOS 12 /* End of OS-specific */ ++#define STB_LOPROC 13 /* Start of processor-specific */ ++#define STB_HIPROC 15 /* End of processor-specific */ ++ ++/* Legal values for ST_TYPE subfield of st_info (symbol type). */ ++ ++#define STT_NOTYPE 0 /* Symbol type is unspecified */ ++#define STT_OBJECT 1 /* Symbol is a data object */ ++#define STT_FUNC 2 /* Symbol is a code object */ ++#define STT_SECTION 3 /* Symbol associated with a section */ ++#define STT_FILE 4 /* Symbol's name is file name */ ++#define STT_COMMON 5 /* Symbol is a common data object */ ++#define STT_TLS 6 /* Symbol is thread-local data object*/ ++#define STT_NUM 7 /* Number of defined types. */ ++#define STT_LOOS 10 /* Start of OS-specific */ ++#define STT_GNU_IFUNC 10 /* Symbol is indirect code object */ ++#define STT_HIOS 12 /* End of OS-specific */ ++#define STT_LOPROC 13 /* Start of processor-specific */ ++#define STT_HIPROC 15 /* End of processor-specific */ ++ ++ ++/* Symbol table indices are found in the hash buckets and chain table ++ of a symbol hash table section. This special index value indicates ++ the end of a chain, meaning no further symbols are found in that bucket. */ ++ ++#define STN_UNDEF 0 /* End of a chain. */ ++ ++ ++/* How to extract and insert information held in the st_other field. */ ++ ++#define ELF32_ST_VISIBILITY(o) ((o) & 0x03) ++ ++/* For ELF64 the definitions are the same. */ ++#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o) ++ ++/* Symbol visibility specification encoded in the st_other field. */ ++#define STV_DEFAULT 0 /* Default symbol visibility rules */ ++#define STV_INTERNAL 1 /* Processor specific hidden class */ ++#define STV_HIDDEN 2 /* Sym unavailable in other modules */ ++#define STV_PROTECTED 3 /* Not preemptible, not exported */ ++ ++ ++/* Relocation table entry without addend (in section of type SHT_REL). */ ++ ++typedef struct ++{ ++ Elf32_Addr r_offset; /* Address */ ++ Elf32_Word r_info; /* Relocation type and symbol index */ ++} Elf32_Rel; ++ ++/* I have seen two different definitions of the Elf64_Rel and ++ Elf64_Rela structures, so we'll leave them out until Novell (or ++ whoever) gets their act together. */ ++/* The following, at least, is used on Sparc v9, MIPS, and Alpha. */ ++ ++typedef struct ++{ ++ Elf64_Addr r_offset; /* Address */ ++ Elf64_Xword r_info; /* Relocation type and symbol index */ ++} Elf64_Rel; ++ ++/* Relocation table entry with addend (in section of type SHT_RELA). */ ++ ++typedef struct ++{ ++ Elf32_Addr r_offset; /* Address */ ++ Elf32_Word r_info; /* Relocation type and symbol index */ ++ Elf32_Sword r_addend; /* Addend */ ++} Elf32_Rela; ++ ++typedef struct ++{ ++ Elf64_Addr r_offset; /* Address */ ++ Elf64_Xword r_info; /* Relocation type and symbol index */ ++ Elf64_Sxword r_addend; /* Addend */ ++} Elf64_Rela; ++ ++/* How to extract and insert information held in the r_info field. */ ++ ++#define ELF32_R_SYM(val) ((val) >> 8) ++#define ELF32_R_TYPE(val) ((val) & 0xff) ++#define ELF32_R_INFO(sym, type) (((sym) << 8) + ((type) & 0xff)) ++ ++#define ELF64_R_SYM(i) ((i) >> 32) ++#define ELF64_R_TYPE(i) ((i) & 0xffffffff) ++#define ELF64_R_INFO(sym,type) ((((Elf64_Xword) (sym)) << 32) + (type)) ++ ++/* Program segment header. */ ++ ++typedef struct ++{ ++ Elf32_Word p_type; /* Segment type */ ++ Elf32_Off p_offset; /* Segment file offset */ ++ Elf32_Addr p_vaddr; /* Segment virtual address */ ++ Elf32_Addr p_paddr; /* Segment physical address */ ++ Elf32_Word p_filesz; /* Segment size in file */ ++ Elf32_Word p_memsz; /* Segment size in memory */ ++ Elf32_Word p_flags; /* Segment flags */ ++ Elf32_Word p_align; /* Segment alignment */ ++} Elf32_Phdr; ++ ++typedef struct ++{ ++ Elf64_Word p_type; /* Segment type */ ++ Elf64_Word p_flags; /* Segment flags */ ++ Elf64_Off p_offset; /* Segment file offset */ ++ Elf64_Addr p_vaddr; /* Segment virtual address */ ++ Elf64_Addr p_paddr; /* Segment physical address */ ++ Elf64_Xword p_filesz; /* Segment size in file */ ++ Elf64_Xword p_memsz; /* Segment size in memory */ ++ Elf64_Xword p_align; /* Segment alignment */ ++} Elf64_Phdr; ++ ++/* Special value for e_phnum. This indicates that the real number of ++ program headers is too large to fit into e_phnum. Instead the real ++ value is in the field sh_info of section 0. */ ++ ++#define PN_XNUM 0xffff ++ ++/* Legal values for p_type (segment type). */ ++ ++#define PT_NULL 0 /* Program header table entry unused */ ++#define PT_LOAD 1 /* Loadable program segment */ ++#define PT_DYNAMIC 2 /* Dynamic linking information */ ++#define PT_INTERP 3 /* Program interpreter */ ++#define PT_NOTE 4 /* Auxiliary information */ ++#define PT_SHLIB 5 /* Reserved */ ++#define PT_PHDR 6 /* Entry for header table itself */ ++#define PT_TLS 7 /* Thread-local storage segment */ ++#define PT_NUM 8 /* Number of defined types */ ++#define PT_LOOS 0x60000000 /* Start of OS-specific */ ++#define PT_GNU_EH_FRAME 0x6474e550 /* GCC .eh_frame_hdr segment */ ++#define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */ ++#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */ ++#define PT_LOSUNW 0x6ffffffa ++#define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */ ++#define PT_SUNWSTACK 0x6ffffffb /* Stack segment */ ++#define PT_HISUNW 0x6fffffff ++#define PT_HIOS 0x6fffffff /* End of OS-specific */ ++#define PT_LOPROC 0x70000000 /* Start of processor-specific */ ++#define PT_HIPROC 0x7fffffff /* End of processor-specific */ ++ ++/* Legal values for p_flags (segment flags). */ ++ ++#define PF_X (1 << 0) /* Segment is executable */ ++#define PF_W (1 << 1) /* Segment is writable */ ++#define PF_R (1 << 2) /* Segment is readable */ ++#define PF_MASKOS 0x0ff00000 /* OS-specific */ ++#define PF_MASKPROC 0xf0000000 /* Processor-specific */ ++ ++/* Legal values for note segment descriptor types for core files. */ ++ ++#define NT_PRSTATUS 1 /* Contains copy of prstatus struct */ ++#define NT_FPREGSET 2 /* Contains copy of fpregset struct */ ++#define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */ ++#define NT_PRXREG 4 /* Contains copy of prxregset struct */ ++#define NT_TASKSTRUCT 4 /* Contains copy of task structure */ ++#define NT_PLATFORM 5 /* String from sysinfo(SI_PLATFORM) */ ++#define NT_AUXV 6 /* Contains copy of auxv array */ ++#define NT_GWINDOWS 7 /* Contains copy of gwindows struct */ ++#define NT_ASRS 8 /* Contains copy of asrset struct */ ++#define NT_PSTATUS 10 /* Contains copy of pstatus struct */ ++#define NT_PSINFO 13 /* Contains copy of psinfo struct */ ++#define NT_PRCRED 14 /* Contains copy of prcred struct */ ++#define NT_UTSNAME 15 /* Contains copy of utsname struct */ ++#define NT_LWPSTATUS 16 /* Contains copy of lwpstatus struct */ ++#define NT_LWPSINFO 17 /* Contains copy of lwpinfo struct */ ++#define NT_PRFPXREG 20 /* Contains copy of fprxregset struct */ ++#define NT_PRXFPREG 0x46e62b7f /* Contains copy of user_fxsr_struct */ ++#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ ++#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ ++#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ ++#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ ++#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ ++#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ ++ ++/* Legal values for the note segment descriptor types for object files. */ ++ ++#define NT_VERSION 1 /* Contains a version string. */ ++ ++ ++/* Dynamic section entry. */ ++ ++typedef struct ++{ ++ Elf32_Sword d_tag; /* Dynamic entry type */ ++ union ++ { ++ Elf32_Word d_val; /* Integer value */ ++ Elf32_Addr d_ptr; /* Address value */ ++ } d_un; ++} Elf32_Dyn; ++ ++typedef struct ++{ ++ Elf64_Sxword d_tag; /* Dynamic entry type */ ++ union ++ { ++ Elf64_Xword d_val; /* Integer value */ ++ Elf64_Addr d_ptr; /* Address value */ ++ } d_un; ++} Elf64_Dyn; ++ ++/* Legal values for d_tag (dynamic entry type). */ ++ ++#define DT_NULL 0 /* Marks end of dynamic section */ ++#define DT_NEEDED 1 /* Name of needed library */ ++#define DT_PLTRELSZ 2 /* Size in bytes of PLT relocs */ ++#define DT_PLTGOT 3 /* Processor defined value */ ++#define DT_HASH 4 /* Address of symbol hash table */ ++#define DT_STRTAB 5 /* Address of string table */ ++#define DT_SYMTAB 6 /* Address of symbol table */ ++#define DT_RELA 7 /* Address of Rela relocs */ ++#define DT_RELASZ 8 /* Total size of Rela relocs */ ++#define DT_RELAENT 9 /* Size of one Rela reloc */ ++#define DT_STRSZ 10 /* Size of string table */ ++#define DT_SYMENT 11 /* Size of one symbol table entry */ ++#define DT_INIT 12 /* Address of init function */ ++#define DT_FINI 13 /* Address of termination function */ ++#define DT_SONAME 14 /* Name of shared object */ ++#define DT_RPATH 15 /* Library search path (deprecated) */ ++#define DT_SYMBOLIC 16 /* Start symbol search here */ ++#define DT_REL 17 /* Address of Rel relocs */ ++#define DT_RELSZ 18 /* Total size of Rel relocs */ ++#define DT_RELENT 19 /* Size of one Rel reloc */ ++#define DT_PLTREL 20 /* Type of reloc in PLT */ ++#define DT_DEBUG 21 /* For debugging; unspecified */ ++#define DT_TEXTREL 22 /* Reloc might modify .text */ ++#define DT_JMPREL 23 /* Address of PLT relocs */ ++#define DT_BIND_NOW 24 /* Process relocations of object */ ++#define DT_INIT_ARRAY 25 /* Array with addresses of init fct */ ++#define DT_FINI_ARRAY 26 /* Array with addresses of fini fct */ ++#define DT_INIT_ARRAYSZ 27 /* Size in bytes of DT_INIT_ARRAY */ ++#define DT_FINI_ARRAYSZ 28 /* Size in bytes of DT_FINI_ARRAY */ ++#define DT_RUNPATH 29 /* Library search path */ ++#define DT_FLAGS 30 /* Flags for the object being loaded */ ++#define DT_ENCODING 32 /* Start of encoded range */ ++#define DT_PREINIT_ARRAY 32 /* Array with addresses of preinit fct*/ ++#define DT_PREINIT_ARRAYSZ 33 /* size in bytes of DT_PREINIT_ARRAY */ ++#define DT_NUM 34 /* Number used */ ++#define DT_LOOS 0x6000000d /* Start of OS-specific */ ++#define DT_HIOS 0x6ffff000 /* End of OS-specific */ ++#define DT_LOPROC 0x70000000 /* Start of processor-specific */ ++#define DT_HIPROC 0x7fffffff /* End of processor-specific */ ++#define DT_PROCNUM DT_MIPS_NUM /* Most used by any processor */ ++ ++/* DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the ++ Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's ++ approach. */ ++#define DT_VALRNGLO 0x6ffffd00 ++#define DT_GNU_PRELINKED 0x6ffffdf5 /* Prelinking timestamp */ ++#define DT_GNU_CONFLICTSZ 0x6ffffdf6 /* Size of conflict section */ ++#define DT_GNU_LIBLISTSZ 0x6ffffdf7 /* Size of library list */ ++#define DT_CHECKSUM 0x6ffffdf8 ++#define DT_PLTPADSZ 0x6ffffdf9 ++#define DT_MOVEENT 0x6ffffdfa ++#define DT_MOVESZ 0x6ffffdfb ++#define DT_FEATURE_1 0x6ffffdfc /* Feature selection (DTF_*). */ ++#define DT_POSFLAG_1 0x6ffffdfd /* Flags for DT_* entries, effecting ++ the following DT_* entry. */ ++#define DT_SYMINSZ 0x6ffffdfe /* Size of syminfo table (in bytes) */ ++#define DT_SYMINENT 0x6ffffdff /* Entry size of syminfo */ ++#define DT_VALRNGHI 0x6ffffdff ++#define DT_VALTAGIDX(tag) (DT_VALRNGHI - (tag)) /* Reverse order! */ ++#define DT_VALNUM 12 ++ ++/* DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the ++ Dyn.d_un.d_ptr field of the Elf*_Dyn structure. ++ ++ If any adjustment is made to the ELF object after it has been ++ built these entries will need to be adjusted. */ ++#define DT_ADDRRNGLO 0x6ffffe00 ++#define DT_GNU_HASH 0x6ffffef5 /* GNU-style hash table. */ ++#define DT_TLSDESC_PLT 0x6ffffef6 ++#define DT_TLSDESC_GOT 0x6ffffef7 ++#define DT_GNU_CONFLICT 0x6ffffef8 /* Start of conflict section */ ++#define DT_GNU_LIBLIST 0x6ffffef9 /* Library list */ ++#define DT_CONFIG 0x6ffffefa /* Configuration information. */ ++#define DT_DEPAUDIT 0x6ffffefb /* Dependency auditing. */ ++#define DT_AUDIT 0x6ffffefc /* Object auditing. */ ++#define DT_PLTPAD 0x6ffffefd /* PLT padding. */ ++#define DT_MOVETAB 0x6ffffefe /* Move table. */ ++#define DT_SYMINFO 0x6ffffeff /* Syminfo table. */ ++#define DT_ADDRRNGHI 0x6ffffeff ++#define DT_ADDRTAGIDX(tag) (DT_ADDRRNGHI - (tag)) /* Reverse order! */ ++#define DT_ADDRNUM 11 ++ ++/* The versioning entry types. The next are defined as part of the ++ GNU extension. */ ++#define DT_VERSYM 0x6ffffff0 ++ ++#define DT_RELACOUNT 0x6ffffff9 ++#define DT_RELCOUNT 0x6ffffffa ++ ++/* These were chosen by Sun. */ ++#define DT_FLAGS_1 0x6ffffffb /* State flags, see DF_1_* below. */ ++#define DT_VERDEF 0x6ffffffc /* Address of version definition ++ table */ ++#define DT_VERDEFNUM 0x6ffffffd /* Number of version definitions */ ++#define DT_VERNEED 0x6ffffffe /* Address of table with needed ++ versions */ ++#define DT_VERNEEDNUM 0x6fffffff /* Number of needed versions */ ++#define DT_VERSIONTAGIDX(tag) (DT_VERNEEDNUM - (tag)) /* Reverse order! */ ++#define DT_VERSIONTAGNUM 16 ++ ++/* Sun added these machine-independent extensions in the "processor-specific" ++ range. Be compatible. */ ++#define DT_AUXILIARY 0x7ffffffd /* Shared object to load before self */ ++#define DT_FILTER 0x7fffffff /* Shared object to get values from */ ++#define DT_EXTRATAGIDX(tag) ((Elf32_Word)-((Elf32_Sword) (tag) <<1>>1)-1) ++#define DT_EXTRANUM 3 ++ ++/* Values of `d_un.d_val' in the DT_FLAGS entry. */ ++#define DF_ORIGIN 0x00000001 /* Object may use DF_ORIGIN */ ++#define DF_SYMBOLIC 0x00000002 /* Symbol resolutions starts here */ ++#define DF_TEXTREL 0x00000004 /* Object contains text relocations */ ++#define DF_BIND_NOW 0x00000008 /* No lazy binding for this object */ ++#define DF_STATIC_TLS 0x00000010 /* Module uses the static TLS model */ ++ ++/* State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 ++ entry in the dynamic section. */ ++#define DF_1_NOW 0x00000001 /* Set RTLD_NOW for this object. */ ++#define DF_1_GLOBAL 0x00000002 /* Set RTLD_GLOBAL for this object. */ ++#define DF_1_GROUP 0x00000004 /* Set RTLD_GROUP for this object. */ ++#define DF_1_NODELETE 0x00000008 /* Set RTLD_NODELETE for this object.*/ ++#define DF_1_LOADFLTR 0x00000010 /* Trigger filtee loading at runtime.*/ ++#define DF_1_INITFIRST 0x00000020 /* Set RTLD_INITFIRST for this object*/ ++#define DF_1_NOOPEN 0x00000040 /* Set RTLD_NOOPEN for this object. */ ++#define DF_1_ORIGIN 0x00000080 /* $ORIGIN must be handled. */ ++#define DF_1_DIRECT 0x00000100 /* Direct binding enabled. */ ++#define DF_1_TRANS 0x00000200 ++#define DF_1_INTERPOSE 0x00000400 /* Object is used to interpose. */ ++#define DF_1_NODEFLIB 0x00000800 /* Ignore default lib search path. */ ++#define DF_1_NODUMP 0x00001000 /* Object can't be dldump'ed. */ ++#define DF_1_CONFALT 0x00002000 /* Configuration alternative created.*/ ++#define DF_1_ENDFILTEE 0x00004000 /* Filtee terminates filters search. */ ++#define DF_1_DISPRELDNE 0x00008000 /* Disp reloc applied at build time. */ ++#define DF_1_DISPRELPND 0x00010000 /* Disp reloc applied at run-time. */ ++ ++/* Flags for the feature selection in DT_FEATURE_1. */ ++#define DTF_1_PARINIT 0x00000001 ++#define DTF_1_CONFEXP 0x00000002 ++ ++/* Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. */ ++#define DF_P1_LAZYLOAD 0x00000001 /* Lazyload following object. */ ++#define DF_P1_GROUPPERM 0x00000002 /* Symbols from next object are not ++ generally available. */ ++ ++/* Version definition sections. */ ++ ++typedef struct ++{ ++ Elf32_Half vd_version; /* Version revision */ ++ Elf32_Half vd_flags; /* Version information */ ++ Elf32_Half vd_ndx; /* Version Index */ ++ Elf32_Half vd_cnt; /* Number of associated aux entries */ ++ Elf32_Word vd_hash; /* Version name hash value */ ++ Elf32_Word vd_aux; /* Offset in bytes to verdaux array */ ++ Elf32_Word vd_next; /* Offset in bytes to next verdef ++ entry */ ++} Elf32_Verdef; ++ ++typedef struct ++{ ++ Elf64_Half vd_version; /* Version revision */ ++ Elf64_Half vd_flags; /* Version information */ ++ Elf64_Half vd_ndx; /* Version Index */ ++ Elf64_Half vd_cnt; /* Number of associated aux entries */ ++ Elf64_Word vd_hash; /* Version name hash value */ ++ Elf64_Word vd_aux; /* Offset in bytes to verdaux array */ ++ Elf64_Word vd_next; /* Offset in bytes to next verdef ++ entry */ ++} Elf64_Verdef; ++ ++ ++/* Legal values for vd_version (version revision). */ ++#define VER_DEF_NONE 0 /* No version */ ++#define VER_DEF_CURRENT 1 /* Current version */ ++#define VER_DEF_NUM 2 /* Given version number */ ++ ++/* Legal values for vd_flags (version information flags). */ ++#define VER_FLG_BASE 0x1 /* Version definition of file itself */ ++#define VER_FLG_WEAK 0x2 /* Weak version identifier */ ++ ++/* Versym symbol index values. */ ++#define VER_NDX_LOCAL 0 /* Symbol is local. */ ++#define VER_NDX_GLOBAL 1 /* Symbol is global. */ ++#define VER_NDX_LORESERVE 0xff00 /* Beginning of reserved entries. */ ++#define VER_NDX_ELIMINATE 0xff01 /* Symbol is to be eliminated. */ ++ ++/* Auxialiary version information. */ ++ ++typedef struct ++{ ++ Elf32_Word vda_name; /* Version or dependency names */ ++ Elf32_Word vda_next; /* Offset in bytes to next verdaux ++ entry */ ++} Elf32_Verdaux; ++ ++typedef struct ++{ ++ Elf64_Word vda_name; /* Version or dependency names */ ++ Elf64_Word vda_next; /* Offset in bytes to next verdaux ++ entry */ ++} Elf64_Verdaux; ++ ++ ++/* Version dependency section. */ ++ ++typedef struct ++{ ++ Elf32_Half vn_version; /* Version of structure */ ++ Elf32_Half vn_cnt; /* Number of associated aux entries */ ++ Elf32_Word vn_file; /* Offset of filename for this ++ dependency */ ++ Elf32_Word vn_aux; /* Offset in bytes to vernaux array */ ++ Elf32_Word vn_next; /* Offset in bytes to next verneed ++ entry */ ++} Elf32_Verneed; ++ ++typedef struct ++{ ++ Elf64_Half vn_version; /* Version of structure */ ++ Elf64_Half vn_cnt; /* Number of associated aux entries */ ++ Elf64_Word vn_file; /* Offset of filename for this ++ dependency */ ++ Elf64_Word vn_aux; /* Offset in bytes to vernaux array */ ++ Elf64_Word vn_next; /* Offset in bytes to next verneed ++ entry */ ++} Elf64_Verneed; ++ ++ ++/* Legal values for vn_version (version revision). */ ++#define VER_NEED_NONE 0 /* No version */ ++#define VER_NEED_CURRENT 1 /* Current version */ ++#define VER_NEED_NUM 2 /* Given version number */ ++ ++/* Auxiliary needed version information. */ ++ ++typedef struct ++{ ++ Elf32_Word vna_hash; /* Hash value of dependency name */ ++ Elf32_Half vna_flags; /* Dependency specific information */ ++ Elf32_Half vna_other; /* Unused */ ++ Elf32_Word vna_name; /* Dependency name string offset */ ++ Elf32_Word vna_next; /* Offset in bytes to next vernaux ++ entry */ ++} Elf32_Vernaux; ++ ++typedef struct ++{ ++ Elf64_Word vna_hash; /* Hash value of dependency name */ ++ Elf64_Half vna_flags; /* Dependency specific information */ ++ Elf64_Half vna_other; /* Unused */ ++ Elf64_Word vna_name; /* Dependency name string offset */ ++ Elf64_Word vna_next; /* Offset in bytes to next vernaux ++ entry */ ++} Elf64_Vernaux; ++ ++ ++/* Legal values for vna_flags. */ ++#define VER_FLG_WEAK 0x2 /* Weak version identifier */ ++ ++ ++/* Auxiliary vector. */ ++ ++/* This vector is normally only used by the program interpreter. The ++ usual definition in an ABI supplement uses the name auxv_t. The ++ vector is not usually defined in a standard file, but it ++ can't hurt. We rename it to avoid conflicts. The sizes of these ++ types are an arrangement between the exec server and the program ++ interpreter, so we don't fully specify them here. */ ++ ++typedef struct ++{ ++ uint32_t a_type; /* Entry type */ ++ union ++ { ++ uint32_t a_val; /* Integer value */ ++ /* We use to have pointer elements added here. We cannot do that, ++ though, since it does not work when using 32-bit definitions ++ on 64-bit platforms and vice versa. */ ++ } a_un; ++} Elf32_auxv_t; ++ ++typedef struct ++{ ++ uint64_t a_type; /* Entry type */ ++ union ++ { ++ uint64_t a_val; /* Integer value */ ++ /* We use to have pointer elements added here. We cannot do that, ++ though, since it does not work when using 32-bit definitions ++ on 64-bit platforms and vice versa. */ ++ } a_un; ++} Elf64_auxv_t; ++ ++/* Legal values for a_type (entry type). */ ++ ++#define AT_NULL 0 /* End of vector */ ++#define AT_IGNORE 1 /* Entry should be ignored */ ++#define AT_EXECFD 2 /* File descriptor of program */ ++#define AT_PHDR 3 /* Program headers for program */ ++#define AT_PHENT 4 /* Size of program header entry */ ++#define AT_PHNUM 5 /* Number of program headers */ ++#define AT_PAGESZ 6 /* System page size */ ++#define AT_BASE 7 /* Base address of interpreter */ ++#define AT_FLAGS 8 /* Flags */ ++#define AT_ENTRY 9 /* Entry point of program */ ++#define AT_NOTELF 10 /* Program is not ELF */ ++#define AT_UID 11 /* Real uid */ ++#define AT_EUID 12 /* Effective uid */ ++#define AT_GID 13 /* Real gid */ ++#define AT_EGID 14 /* Effective gid */ ++#define AT_CLKTCK 17 /* Frequency of times() */ ++ ++/* Some more special a_type values describing the hardware. */ ++#define AT_PLATFORM 15 /* String identifying platform. */ ++#define AT_HWCAP 16 /* Machine dependent hints about ++ processor capabilities. */ ++ ++/* This entry gives some information about the FPU initialization ++ performed by the kernel. */ ++#define AT_FPUCW 18 /* Used FPU control word. */ ++ ++/* Cache block sizes. */ ++#define AT_DCACHEBSIZE 19 /* Data cache block size. */ ++#define AT_ICACHEBSIZE 20 /* Instruction cache block size. */ ++#define AT_UCACHEBSIZE 21 /* Unified cache block size. */ ++ ++/* A special ignored value for PPC, used by the kernel to control the ++ interpretation of the AUXV. Must be > 16. */ ++#define AT_IGNOREPPC 22 /* Entry should be ignored. */ ++ ++#define AT_SECURE 23 /* Boolean, was exec setuid-like? */ ++ ++#define AT_BASE_PLATFORM 24 /* String identifying real platforms.*/ ++ ++#define AT_RANDOM 25 /* Address of 16 random bytes. */ ++ ++#define AT_EXECFN 31 /* Filename of executable. */ ++ ++/* Pointer to the global system page used for system calls and other ++ nice things. */ ++#define AT_SYSINFO 32 ++#define AT_SYSINFO_EHDR 33 ++ ++/* Shapes of the caches. Bits 0-3 contains associativity; bits 4-7 contains ++ log2 of line size; mask those to get cache size. */ ++#define AT_L1I_CACHESHAPE 34 ++#define AT_L1D_CACHESHAPE 35 ++#define AT_L2_CACHESHAPE 36 ++#define AT_L3_CACHESHAPE 37 ++ ++/* Note section contents. Each entry in the note section begins with ++ a header of a fixed form. */ ++ ++typedef struct ++{ ++ Elf32_Word n_namesz; /* Length of the note's name. */ ++ Elf32_Word n_descsz; /* Length of the note's descriptor. */ ++ Elf32_Word n_type; /* Type of the note. */ ++} Elf32_Nhdr; ++ ++typedef struct ++{ ++ Elf64_Word n_namesz; /* Length of the note's name. */ ++ Elf64_Word n_descsz; /* Length of the note's descriptor. */ ++ Elf64_Word n_type; /* Type of the note. */ ++} Elf64_Nhdr; ++ ++/* Known names of notes. */ ++ ++/* Solaris entries in the note section have this name. */ ++#define ELF_NOTE_SOLARIS "SUNW Solaris" ++ ++/* Note entries for GNU systems have this name. */ ++#define ELF_NOTE_GNU "GNU" ++ ++ ++/* Defined types of notes for Solaris. */ ++ ++/* Value of descriptor (one word) is desired pagesize for the binary. */ ++#define ELF_NOTE_PAGESIZE_HINT 1 ++ ++ ++/* Defined note types for GNU systems. */ ++ ++/* ABI information. The descriptor consists of words: ++ word 0: OS descriptor ++ word 1: major version of the ABI ++ word 2: minor version of the ABI ++ word 3: subminor version of the ABI ++*/ ++#define NT_GNU_ABI_TAG 1 ++#define ELF_NOTE_ABI NT_GNU_ABI_TAG /* Old name. */ ++ ++/* Known OSes. These values can appear in word 0 of an ++ NT_GNU_ABI_TAG note section entry. */ ++#define ELF_NOTE_OS_LINUX 0 ++#define ELF_NOTE_OS_GNU 1 ++#define ELF_NOTE_OS_SOLARIS2 2 ++#define ELF_NOTE_OS_FREEBSD 3 ++ ++/* Synthetic hwcap information. The descriptor begins with two words: ++ word 0: number of entries ++ word 1: bitmask of enabled entries ++ Then follow variable-length entries, one byte followed by a ++ '\0'-terminated hwcap name string. The byte gives the bit ++ number to test if enabled, (1U << bit) & bitmask. */ ++#define NT_GNU_HWCAP 2 ++ ++/* Build ID bits as generated by ld --build-id. ++ The descriptor consists of any nonzero number of bytes. */ ++#define NT_GNU_BUILD_ID 3 ++ ++/* Version note generated by GNU gold containing a version string. */ ++#define NT_GNU_GOLD_VERSION 4 ++ ++ ++/* Move records. */ ++typedef struct ++{ ++ Elf32_Xword m_value; /* Symbol value. */ ++ Elf32_Word m_info; /* Size and index. */ ++ Elf32_Word m_poffset; /* Symbol offset. */ ++ Elf32_Half m_repeat; /* Repeat count. */ ++ Elf32_Half m_stride; /* Stride info. */ ++} Elf32_Move; ++ ++typedef struct ++{ ++ Elf64_Xword m_value; /* Symbol value. */ ++ Elf64_Xword m_info; /* Size and index. */ ++ Elf64_Xword m_poffset; /* Symbol offset. */ ++ Elf64_Half m_repeat; /* Repeat count. */ ++ Elf64_Half m_stride; /* Stride info. */ ++} Elf64_Move; ++ ++/* Macro to construct move records. */ ++#define ELF32_M_SYM(info) ((info) >> 8) ++#define ELF32_M_SIZE(info) ((unsigned char) (info)) ++#define ELF32_M_INFO(sym, size) (((sym) << 8) + (unsigned char) (size)) ++ ++#define ELF64_M_SYM(info) ELF32_M_SYM (info) ++#define ELF64_M_SIZE(info) ELF32_M_SIZE (info) ++#define ELF64_M_INFO(sym, size) ELF32_M_INFO (sym, size) ++ ++ ++/* Motorola 68k specific definitions. */ ++ ++/* Values for Elf32_Ehdr.e_flags. */ ++#define EF_CPU32 0x00810000 ++ ++/* m68k relocs. */ ++ ++#define R_68K_NONE 0 /* No reloc */ ++#define R_68K_32 1 /* Direct 32 bit */ ++#define R_68K_16 2 /* Direct 16 bit */ ++#define R_68K_8 3 /* Direct 8 bit */ ++#define R_68K_PC32 4 /* PC relative 32 bit */ ++#define R_68K_PC16 5 /* PC relative 16 bit */ ++#define R_68K_PC8 6 /* PC relative 8 bit */ ++#define R_68K_GOT32 7 /* 32 bit PC relative GOT entry */ ++#define R_68K_GOT16 8 /* 16 bit PC relative GOT entry */ ++#define R_68K_GOT8 9 /* 8 bit PC relative GOT entry */ ++#define R_68K_GOT32O 10 /* 32 bit GOT offset */ ++#define R_68K_GOT16O 11 /* 16 bit GOT offset */ ++#define R_68K_GOT8O 12 /* 8 bit GOT offset */ ++#define R_68K_PLT32 13 /* 32 bit PC relative PLT address */ ++#define R_68K_PLT16 14 /* 16 bit PC relative PLT address */ ++#define R_68K_PLT8 15 /* 8 bit PC relative PLT address */ ++#define R_68K_PLT32O 16 /* 32 bit PLT offset */ ++#define R_68K_PLT16O 17 /* 16 bit PLT offset */ ++#define R_68K_PLT8O 18 /* 8 bit PLT offset */ ++#define R_68K_COPY 19 /* Copy symbol at runtime */ ++#define R_68K_GLOB_DAT 20 /* Create GOT entry */ ++#define R_68K_JMP_SLOT 21 /* Create PLT entry */ ++#define R_68K_RELATIVE 22 /* Adjust by program base */ ++#define R_68K_TLS_GD32 25 /* 32 bit GOT offset for GD */ ++#define R_68K_TLS_GD16 26 /* 16 bit GOT offset for GD */ ++#define R_68K_TLS_GD8 27 /* 8 bit GOT offset for GD */ ++#define R_68K_TLS_LDM32 28 /* 32 bit GOT offset for LDM */ ++#define R_68K_TLS_LDM16 29 /* 16 bit GOT offset for LDM */ ++#define R_68K_TLS_LDM8 30 /* 8 bit GOT offset for LDM */ ++#define R_68K_TLS_LDO32 31 /* 32 bit module-relative offset */ ++#define R_68K_TLS_LDO16 32 /* 16 bit module-relative offset */ ++#define R_68K_TLS_LDO8 33 /* 8 bit module-relative offset */ ++#define R_68K_TLS_IE32 34 /* 32 bit GOT offset for IE */ ++#define R_68K_TLS_IE16 35 /* 16 bit GOT offset for IE */ ++#define R_68K_TLS_IE8 36 /* 8 bit GOT offset for IE */ ++#define R_68K_TLS_LE32 37 /* 32 bit offset relative to ++ static TLS block */ ++#define R_68K_TLS_LE16 38 /* 16 bit offset relative to ++ static TLS block */ ++#define R_68K_TLS_LE8 39 /* 8 bit offset relative to ++ static TLS block */ ++#define R_68K_TLS_DTPMOD32 40 /* 32 bit module number */ ++#define R_68K_TLS_DTPREL32 41 /* 32 bit module-relative offset */ ++#define R_68K_TLS_TPREL32 42 /* 32 bit TP-relative offset */ ++/* Keep this the last entry. */ ++#define R_68K_NUM 43 ++ ++/* Intel 80386 specific definitions. */ ++ ++/* i386 relocs. */ ++ ++#define R_386_NONE 0 /* No reloc */ ++#define R_386_32 1 /* Direct 32 bit */ ++#define R_386_PC32 2 /* PC relative 32 bit */ ++#define R_386_GOT32 3 /* 32 bit GOT entry */ ++#define R_386_PLT32 4 /* 32 bit PLT address */ ++#define R_386_COPY 5 /* Copy symbol at runtime */ ++#define R_386_GLOB_DAT 6 /* Create GOT entry */ ++#define R_386_JMP_SLOT 7 /* Create PLT entry */ ++#define R_386_RELATIVE 8 /* Adjust by program base */ ++#define R_386_GOTOFF 9 /* 32 bit offset to GOT */ ++#define R_386_GOTPC 10 /* 32 bit PC relative offset to GOT */ ++#define R_386_32PLT 11 ++#define R_386_TLS_TPOFF 14 /* Offset in static TLS block */ ++#define R_386_TLS_IE 15 /* Address of GOT entry for static TLS ++ block offset */ ++#define R_386_TLS_GOTIE 16 /* GOT entry for static TLS block ++ offset */ ++#define R_386_TLS_LE 17 /* Offset relative to static TLS ++ block */ ++#define R_386_TLS_GD 18 /* Direct 32 bit for GNU version of ++ general dynamic thread local data */ ++#define R_386_TLS_LDM 19 /* Direct 32 bit for GNU version of ++ local dynamic thread local data ++ in LE code */ ++#define R_386_16 20 ++#define R_386_PC16 21 ++#define R_386_8 22 ++#define R_386_PC8 23 ++#define R_386_TLS_GD_32 24 /* Direct 32 bit for general dynamic ++ thread local data */ ++#define R_386_TLS_GD_PUSH 25 /* Tag for pushl in GD TLS code */ ++#define R_386_TLS_GD_CALL 26 /* Relocation for call to ++ __tls_get_addr() */ ++#define R_386_TLS_GD_POP 27 /* Tag for popl in GD TLS code */ ++#define R_386_TLS_LDM_32 28 /* Direct 32 bit for local dynamic ++ thread local data in LE code */ ++#define R_386_TLS_LDM_PUSH 29 /* Tag for pushl in LDM TLS code */ ++#define R_386_TLS_LDM_CALL 30 /* Relocation for call to ++ __tls_get_addr() in LDM code */ ++#define R_386_TLS_LDM_POP 31 /* Tag for popl in LDM TLS code */ ++#define R_386_TLS_LDO_32 32 /* Offset relative to TLS block */ ++#define R_386_TLS_IE_32 33 /* GOT entry for negated static TLS ++ block offset */ ++#define R_386_TLS_LE_32 34 /* Negated offset relative to static ++ TLS block */ ++#define R_386_TLS_DTPMOD32 35 /* ID of module containing symbol */ ++#define R_386_TLS_DTPOFF32 36 /* Offset in TLS block */ ++#define R_386_TLS_TPOFF32 37 /* Negated offset in static TLS block */ ++/* 38? */ ++#define R_386_TLS_GOTDESC 39 /* GOT offset for TLS descriptor. */ ++#define R_386_TLS_DESC_CALL 40 /* Marker of call through TLS ++ descriptor for ++ relaxation. */ ++#define R_386_TLS_DESC 41 /* TLS descriptor containing ++ pointer to code and to ++ argument, returning the TLS ++ offset for the symbol. */ ++#define R_386_IRELATIVE 42 /* Adjust indirectly by program base */ ++/* Keep this the last entry. */ ++#define R_386_NUM 43 ++ ++/* SUN SPARC specific definitions. */ ++ ++/* Legal values for ST_TYPE subfield of st_info (symbol type). */ ++ ++#define STT_SPARC_REGISTER 13 /* Global register reserved to app. */ ++ ++/* Values for Elf64_Ehdr.e_flags. */ ++ ++#define EF_SPARCV9_MM 3 ++#define EF_SPARCV9_TSO 0 ++#define EF_SPARCV9_PSO 1 ++#define EF_SPARCV9_RMO 2 ++#define EF_SPARC_LEDATA 0x800000 /* little endian data */ ++#define EF_SPARC_EXT_MASK 0xFFFF00 ++#define EF_SPARC_32PLUS 0x000100 /* generic V8+ features */ ++#define EF_SPARC_SUN_US1 0x000200 /* Sun UltraSPARC1 extensions */ ++#define EF_SPARC_HAL_R1 0x000400 /* HAL R1 extensions */ ++#define EF_SPARC_SUN_US3 0x000800 /* Sun UltraSPARCIII extensions */ ++ ++/* SPARC relocs. */ ++ ++#define R_SPARC_NONE 0 /* No reloc */ ++#define R_SPARC_8 1 /* Direct 8 bit */ ++#define R_SPARC_16 2 /* Direct 16 bit */ ++#define R_SPARC_32 3 /* Direct 32 bit */ ++#define R_SPARC_DISP8 4 /* PC relative 8 bit */ ++#define R_SPARC_DISP16 5 /* PC relative 16 bit */ ++#define R_SPARC_DISP32 6 /* PC relative 32 bit */ ++#define R_SPARC_WDISP30 7 /* PC relative 30 bit shifted */ ++#define R_SPARC_WDISP22 8 /* PC relative 22 bit shifted */ ++#define R_SPARC_HI22 9 /* High 22 bit */ ++#define R_SPARC_22 10 /* Direct 22 bit */ ++#define R_SPARC_13 11 /* Direct 13 bit */ ++#define R_SPARC_LO10 12 /* Truncated 10 bit */ ++#define R_SPARC_GOT10 13 /* Truncated 10 bit GOT entry */ ++#define R_SPARC_GOT13 14 /* 13 bit GOT entry */ ++#define R_SPARC_GOT22 15 /* 22 bit GOT entry shifted */ ++#define R_SPARC_PC10 16 /* PC relative 10 bit truncated */ ++#define R_SPARC_PC22 17 /* PC relative 22 bit shifted */ ++#define R_SPARC_WPLT30 18 /* 30 bit PC relative PLT address */ ++#define R_SPARC_COPY 19 /* Copy symbol at runtime */ ++#define R_SPARC_GLOB_DAT 20 /* Create GOT entry */ ++#define R_SPARC_JMP_SLOT 21 /* Create PLT entry */ ++#define R_SPARC_RELATIVE 22 /* Adjust by program base */ ++#define R_SPARC_UA32 23 /* Direct 32 bit unaligned */ ++ ++/* Additional Sparc64 relocs. */ ++ ++#define R_SPARC_PLT32 24 /* Direct 32 bit ref to PLT entry */ ++#define R_SPARC_HIPLT22 25 /* High 22 bit PLT entry */ ++#define R_SPARC_LOPLT10 26 /* Truncated 10 bit PLT entry */ ++#define R_SPARC_PCPLT32 27 /* PC rel 32 bit ref to PLT entry */ ++#define R_SPARC_PCPLT22 28 /* PC rel high 22 bit PLT entry */ ++#define R_SPARC_PCPLT10 29 /* PC rel trunc 10 bit PLT entry */ ++#define R_SPARC_10 30 /* Direct 10 bit */ ++#define R_SPARC_11 31 /* Direct 11 bit */ ++#define R_SPARC_64 32 /* Direct 64 bit */ ++#define R_SPARC_OLO10 33 /* 10bit with secondary 13bit addend */ ++#define R_SPARC_HH22 34 /* Top 22 bits of direct 64 bit */ ++#define R_SPARC_HM10 35 /* High middle 10 bits of ... */ ++#define R_SPARC_LM22 36 /* Low middle 22 bits of ... */ ++#define R_SPARC_PC_HH22 37 /* Top 22 bits of pc rel 64 bit */ ++#define R_SPARC_PC_HM10 38 /* High middle 10 bit of ... */ ++#define R_SPARC_PC_LM22 39 /* Low miggle 22 bits of ... */ ++#define R_SPARC_WDISP16 40 /* PC relative 16 bit shifted */ ++#define R_SPARC_WDISP19 41 /* PC relative 19 bit shifted */ ++#define R_SPARC_GLOB_JMP 42 /* was part of v9 ABI but was removed */ ++#define R_SPARC_7 43 /* Direct 7 bit */ ++#define R_SPARC_5 44 /* Direct 5 bit */ ++#define R_SPARC_6 45 /* Direct 6 bit */ ++#define R_SPARC_DISP64 46 /* PC relative 64 bit */ ++#define R_SPARC_PLT64 47 /* Direct 64 bit ref to PLT entry */ ++#define R_SPARC_HIX22 48 /* High 22 bit complemented */ ++#define R_SPARC_LOX10 49 /* Truncated 11 bit complemented */ ++#define R_SPARC_H44 50 /* Direct high 12 of 44 bit */ ++#define R_SPARC_M44 51 /* Direct mid 22 of 44 bit */ ++#define R_SPARC_L44 52 /* Direct low 10 of 44 bit */ ++#define R_SPARC_REGISTER 53 /* Global register usage */ ++#define R_SPARC_UA64 54 /* Direct 64 bit unaligned */ ++#define R_SPARC_UA16 55 /* Direct 16 bit unaligned */ ++#define R_SPARC_TLS_GD_HI22 56 ++#define R_SPARC_TLS_GD_LO10 57 ++#define R_SPARC_TLS_GD_ADD 58 ++#define R_SPARC_TLS_GD_CALL 59 ++#define R_SPARC_TLS_LDM_HI22 60 ++#define R_SPARC_TLS_LDM_LO10 61 ++#define R_SPARC_TLS_LDM_ADD 62 ++#define R_SPARC_TLS_LDM_CALL 63 ++#define R_SPARC_TLS_LDO_HIX22 64 ++#define R_SPARC_TLS_LDO_LOX10 65 ++#define R_SPARC_TLS_LDO_ADD 66 ++#define R_SPARC_TLS_IE_HI22 67 ++#define R_SPARC_TLS_IE_LO10 68 ++#define R_SPARC_TLS_IE_LD 69 ++#define R_SPARC_TLS_IE_LDX 70 ++#define R_SPARC_TLS_IE_ADD 71 ++#define R_SPARC_TLS_LE_HIX22 72 ++#define R_SPARC_TLS_LE_LOX10 73 ++#define R_SPARC_TLS_DTPMOD32 74 ++#define R_SPARC_TLS_DTPMOD64 75 ++#define R_SPARC_TLS_DTPOFF32 76 ++#define R_SPARC_TLS_DTPOFF64 77 ++#define R_SPARC_TLS_TPOFF32 78 ++#define R_SPARC_TLS_TPOFF64 79 ++#define R_SPARC_GOTDATA_HIX22 80 ++#define R_SPARC_GOTDATA_LOX10 81 ++#define R_SPARC_GOTDATA_OP_HIX22 82 ++#define R_SPARC_GOTDATA_OP_LOX10 83 ++#define R_SPARC_GOTDATA_OP 84 ++#define R_SPARC_H34 85 ++#define R_SPARC_SIZE32 86 ++#define R_SPARC_SIZE64 87 ++#define R_SPARC_WDISP10 88 ++#define R_SPARC_JMP_IREL 248 ++#define R_SPARC_IRELATIVE 249 ++#define R_SPARC_GNU_VTINHERIT 250 ++#define R_SPARC_GNU_VTENTRY 251 ++#define R_SPARC_REV32 252 ++/* Keep this the last entry. */ ++#define R_SPARC_NUM 253 ++ ++/* For Sparc64, legal values for d_tag of Elf64_Dyn. */ ++ ++#define DT_SPARC_REGISTER 0x70000001 ++#define DT_SPARC_NUM 2 ++ ++/* MIPS R3000 specific definitions. */ ++ ++/* Legal values for e_flags field of Elf32_Ehdr. */ ++ ++#define EF_MIPS_NOREORDER 1 /* A .noreorder directive was used */ ++#define EF_MIPS_PIC 2 /* Contains PIC code */ ++#define EF_MIPS_CPIC 4 /* Uses PIC calling sequence */ ++#define EF_MIPS_XGOT 8 ++#define EF_MIPS_64BIT_WHIRL 16 ++#define EF_MIPS_ABI2 32 ++#define EF_MIPS_ABI_ON32 64 ++#define EF_MIPS_ARCH 0xf0000000 /* MIPS architecture level */ ++ ++/* Legal values for MIPS architecture level. */ ++ ++#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code. */ ++#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code. */ ++#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code. */ ++#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code. */ ++#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code. */ ++#define EF_MIPS_ARCH_32 0x60000000 /* MIPS32 code. */ ++#define EF_MIPS_ARCH_64 0x70000000 /* MIPS64 code. */ ++ ++/* The following are non-official names and should not be used. */ ++ ++#define E_MIPS_ARCH_1 0x00000000 /* -mips1 code. */ ++#define E_MIPS_ARCH_2 0x10000000 /* -mips2 code. */ ++#define E_MIPS_ARCH_3 0x20000000 /* -mips3 code. */ ++#define E_MIPS_ARCH_4 0x30000000 /* -mips4 code. */ ++#define E_MIPS_ARCH_5 0x40000000 /* -mips5 code. */ ++#define E_MIPS_ARCH_32 0x60000000 /* MIPS32 code. */ ++#define E_MIPS_ARCH_64 0x70000000 /* MIPS64 code. */ ++ ++/* Special section indices. */ ++ ++#define SHN_MIPS_ACOMMON 0xff00 /* Allocated common symbols */ ++#define SHN_MIPS_TEXT 0xff01 /* Allocated test symbols. */ ++#define SHN_MIPS_DATA 0xff02 /* Allocated data symbols. */ ++#define SHN_MIPS_SCOMMON 0xff03 /* Small common symbols */ ++#define SHN_MIPS_SUNDEFINED 0xff04 /* Small undefined symbols */ ++ ++/* Legal values for sh_type field of Elf32_Shdr. */ ++ ++#define SHT_MIPS_LIBLIST 0x70000000 /* Shared objects used in link */ ++#define SHT_MIPS_MSYM 0x70000001 ++#define SHT_MIPS_CONFLICT 0x70000002 /* Conflicting symbols */ ++#define SHT_MIPS_GPTAB 0x70000003 /* Global data area sizes */ ++#define SHT_MIPS_UCODE 0x70000004 /* Reserved for SGI/MIPS compilers */ ++#define SHT_MIPS_DEBUG 0x70000005 /* MIPS ECOFF debugging information*/ ++#define SHT_MIPS_REGINFO 0x70000006 /* Register usage information */ ++#define SHT_MIPS_PACKAGE 0x70000007 ++#define SHT_MIPS_PACKSYM 0x70000008 ++#define SHT_MIPS_RELD 0x70000009 ++#define SHT_MIPS_IFACE 0x7000000b ++#define SHT_MIPS_CONTENT 0x7000000c ++#define SHT_MIPS_OPTIONS 0x7000000d /* Miscellaneous options. */ ++#define SHT_MIPS_SHDR 0x70000010 ++#define SHT_MIPS_FDESC 0x70000011 ++#define SHT_MIPS_EXTSYM 0x70000012 ++#define SHT_MIPS_DENSE 0x70000013 ++#define SHT_MIPS_PDESC 0x70000014 ++#define SHT_MIPS_LOCSYM 0x70000015 ++#define SHT_MIPS_AUXSYM 0x70000016 ++#define SHT_MIPS_OPTSYM 0x70000017 ++#define SHT_MIPS_LOCSTR 0x70000018 ++#define SHT_MIPS_LINE 0x70000019 ++#define SHT_MIPS_RFDESC 0x7000001a ++#define SHT_MIPS_DELTASYM 0x7000001b ++#define SHT_MIPS_DELTAINST 0x7000001c ++#define SHT_MIPS_DELTACLASS 0x7000001d ++#define SHT_MIPS_DWARF 0x7000001e /* DWARF debugging information. */ ++#define SHT_MIPS_DELTADECL 0x7000001f ++#define SHT_MIPS_SYMBOL_LIB 0x70000020 ++#define SHT_MIPS_EVENTS 0x70000021 /* Event section. */ ++#define SHT_MIPS_TRANSLATE 0x70000022 ++#define SHT_MIPS_PIXIE 0x70000023 ++#define SHT_MIPS_XLATE 0x70000024 ++#define SHT_MIPS_XLATE_DEBUG 0x70000025 ++#define SHT_MIPS_WHIRL 0x70000026 ++#define SHT_MIPS_EH_REGION 0x70000027 ++#define SHT_MIPS_XLATE_OLD 0x70000028 ++#define SHT_MIPS_PDR_EXCEPTION 0x70000029 ++ ++/* Legal values for sh_flags field of Elf32_Shdr. */ ++ ++#define SHF_MIPS_GPREL 0x10000000 /* Must be part of global data area */ ++#define SHF_MIPS_MERGE 0x20000000 ++#define SHF_MIPS_ADDR 0x40000000 ++#define SHF_MIPS_STRINGS 0x80000000 ++#define SHF_MIPS_NOSTRIP 0x08000000 ++#define SHF_MIPS_LOCAL 0x04000000 ++#define SHF_MIPS_NAMES 0x02000000 ++#define SHF_MIPS_NODUPE 0x01000000 ++ ++ ++/* Symbol tables. */ ++ ++/* MIPS specific values for `st_other'. */ ++#define STO_MIPS_DEFAULT 0x0 ++#define STO_MIPS_INTERNAL 0x1 ++#define STO_MIPS_HIDDEN 0x2 ++#define STO_MIPS_PROTECTED 0x3 ++#define STO_MIPS_PLT 0x8 ++#define STO_MIPS_SC_ALIGN_UNUSED 0xff ++ ++/* MIPS specific values for `st_info'. */ ++#define STB_MIPS_SPLIT_COMMON 13 ++ ++/* Entries found in sections of type SHT_MIPS_GPTAB. */ ++ ++typedef union ++{ ++ struct ++ { ++ Elf32_Word gt_current_g_value; /* -G value used for compilation */ ++ Elf32_Word gt_unused; /* Not used */ ++ } gt_header; /* First entry in section */ ++ struct ++ { ++ Elf32_Word gt_g_value; /* If this value were used for -G */ ++ Elf32_Word gt_bytes; /* This many bytes would be used */ ++ } gt_entry; /* Subsequent entries in section */ ++} Elf32_gptab; ++ ++/* Entry found in sections of type SHT_MIPS_REGINFO. */ ++ ++typedef struct ++{ ++ Elf32_Word ri_gprmask; /* General registers used */ ++ Elf32_Word ri_cprmask[4]; /* Coprocessor registers used */ ++ Elf32_Sword ri_gp_value; /* $gp register value */ ++} Elf32_RegInfo; ++ ++/* Entries found in sections of type SHT_MIPS_OPTIONS. */ ++ ++typedef struct ++{ ++ unsigned char kind; /* Determines interpretation of the ++ variable part of descriptor. */ ++ unsigned char size; /* Size of descriptor, including header. */ ++ Elf32_Section section; /* Section header index of section affected, ++ 0 for global options. */ ++ Elf32_Word info; /* Kind-specific information. */ ++} Elf_Options; ++ ++/* Values for `kind' field in Elf_Options. */ ++ ++#define ODK_NULL 0 /* Undefined. */ ++#define ODK_REGINFO 1 /* Register usage information. */ ++#define ODK_EXCEPTIONS 2 /* Exception processing options. */ ++#define ODK_PAD 3 /* Section padding options. */ ++#define ODK_HWPATCH 4 /* Hardware workarounds performed */ ++#define ODK_FILL 5 /* record the fill value used by the linker. */ ++#define ODK_TAGS 6 /* reserve space for desktop tools to write. */ ++#define ODK_HWAND 7 /* HW workarounds. 'AND' bits when merging. */ ++#define ODK_HWOR 8 /* HW workarounds. 'OR' bits when merging. */ ++ ++/* Values for `info' in Elf_Options for ODK_EXCEPTIONS entries. */ ++ ++#define OEX_FPU_MIN 0x1f /* FPE's which MUST be enabled. */ ++#define OEX_FPU_MAX 0x1f00 /* FPE's which MAY be enabled. */ ++#define OEX_PAGE0 0x10000 /* page zero must be mapped. */ ++#define OEX_SMM 0x20000 /* Force sequential memory mode? */ ++#define OEX_FPDBUG 0x40000 /* Force floating point debug mode? */ ++#define OEX_PRECISEFP OEX_FPDBUG ++#define OEX_DISMISS 0x80000 /* Dismiss invalid address faults? */ ++ ++#define OEX_FPU_INVAL 0x10 ++#define OEX_FPU_DIV0 0x08 ++#define OEX_FPU_OFLO 0x04 ++#define OEX_FPU_UFLO 0x02 ++#define OEX_FPU_INEX 0x01 ++ ++/* Masks for `info' in Elf_Options for an ODK_HWPATCH entry. */ ++ ++#define OHW_R4KEOP 0x1 /* R4000 end-of-page patch. */ ++#define OHW_R8KPFETCH 0x2 /* may need R8000 prefetch patch. */ ++#define OHW_R5KEOP 0x4 /* R5000 end-of-page patch. */ ++#define OHW_R5KCVTL 0x8 /* R5000 cvt.[ds].l bug. clean=1. */ ++ ++#define OPAD_PREFIX 0x1 ++#define OPAD_POSTFIX 0x2 ++#define OPAD_SYMBOL 0x4 ++ ++/* Entry found in `.options' section. */ ++ ++typedef struct ++{ ++ Elf32_Word hwp_flags1; /* Extra flags. */ ++ Elf32_Word hwp_flags2; /* Extra flags. */ ++} Elf_Options_Hw; ++ ++/* Masks for `info' in ElfOptions for ODK_HWAND and ODK_HWOR entries. */ ++ ++#define OHWA0_R4KEOP_CHECKED 0x00000001 ++#define OHWA1_R4KEOP_CLEAN 0x00000002 ++ ++/* MIPS relocs. */ ++ ++#define R_MIPS_NONE 0 /* No reloc */ ++#define R_MIPS_16 1 /* Direct 16 bit */ ++#define R_MIPS_32 2 /* Direct 32 bit */ ++#define R_MIPS_REL32 3 /* PC relative 32 bit */ ++#define R_MIPS_26 4 /* Direct 26 bit shifted */ ++#define R_MIPS_HI16 5 /* High 16 bit */ ++#define R_MIPS_LO16 6 /* Low 16 bit */ ++#define R_MIPS_GPREL16 7 /* GP relative 16 bit */ ++#define R_MIPS_LITERAL 8 /* 16 bit literal entry */ ++#define R_MIPS_GOT16 9 /* 16 bit GOT entry */ ++#define R_MIPS_PC16 10 /* PC relative 16 bit */ ++#define R_MIPS_CALL16 11 /* 16 bit GOT entry for function */ ++#define R_MIPS_GPREL32 12 /* GP relative 32 bit */ ++ ++#define R_MIPS_SHIFT5 16 ++#define R_MIPS_SHIFT6 17 ++#define R_MIPS_64 18 ++#define R_MIPS_GOT_DISP 19 ++#define R_MIPS_GOT_PAGE 20 ++#define R_MIPS_GOT_OFST 21 ++#define R_MIPS_GOT_HI16 22 ++#define R_MIPS_GOT_LO16 23 ++#define R_MIPS_SUB 24 ++#define R_MIPS_INSERT_A 25 ++#define R_MIPS_INSERT_B 26 ++#define R_MIPS_DELETE 27 ++#define R_MIPS_HIGHER 28 ++#define R_MIPS_HIGHEST 29 ++#define R_MIPS_CALL_HI16 30 ++#define R_MIPS_CALL_LO16 31 ++#define R_MIPS_SCN_DISP 32 ++#define R_MIPS_REL16 33 ++#define R_MIPS_ADD_IMMEDIATE 34 ++#define R_MIPS_PJUMP 35 ++#define R_MIPS_RELGOT 36 ++#define R_MIPS_JALR 37 ++#define R_MIPS_TLS_DTPMOD32 38 /* Module number 32 bit */ ++#define R_MIPS_TLS_DTPREL32 39 /* Module-relative offset 32 bit */ ++#define R_MIPS_TLS_DTPMOD64 40 /* Module number 64 bit */ ++#define R_MIPS_TLS_DTPREL64 41 /* Module-relative offset 64 bit */ ++#define R_MIPS_TLS_GD 42 /* 16 bit GOT offset for GD */ ++#define R_MIPS_TLS_LDM 43 /* 16 bit GOT offset for LDM */ ++#define R_MIPS_TLS_DTPREL_HI16 44 /* Module-relative offset, high 16 bits */ ++#define R_MIPS_TLS_DTPREL_LO16 45 /* Module-relative offset, low 16 bits */ ++#define R_MIPS_TLS_GOTTPREL 46 /* 16 bit GOT offset for IE */ ++#define R_MIPS_TLS_TPREL32 47 /* TP-relative offset, 32 bit */ ++#define R_MIPS_TLS_TPREL64 48 /* TP-relative offset, 64 bit */ ++#define R_MIPS_TLS_TPREL_HI16 49 /* TP-relative offset, high 16 bits */ ++#define R_MIPS_TLS_TPREL_LO16 50 /* TP-relative offset, low 16 bits */ ++#define R_MIPS_GLOB_DAT 51 ++#define R_MIPS_COPY 126 ++#define R_MIPS_JUMP_SLOT 127 ++/* Keep this the last entry. */ ++#define R_MIPS_NUM 128 ++ ++/* Legal values for p_type field of Elf32_Phdr. */ ++ ++#define PT_MIPS_REGINFO 0x70000000 /* Register usage information */ ++#define PT_MIPS_RTPROC 0x70000001 /* Runtime procedure table. */ ++#define PT_MIPS_OPTIONS 0x70000002 ++ ++/* Special program header types. */ ++ ++#define PF_MIPS_LOCAL 0x10000000 ++ ++/* Legal values for d_tag field of Elf32_Dyn. */ ++ ++#define DT_MIPS_RLD_VERSION 0x70000001 /* Runtime linker interface version */ ++#define DT_MIPS_TIME_STAMP 0x70000002 /* Timestamp */ ++#define DT_MIPS_ICHECKSUM 0x70000003 /* Checksum */ ++#define DT_MIPS_IVERSION 0x70000004 /* Version string (string tbl index) */ ++#define DT_MIPS_FLAGS 0x70000005 /* Flags */ ++#define DT_MIPS_BASE_ADDRESS 0x70000006 /* Base address */ ++#define DT_MIPS_MSYM 0x70000007 ++#define DT_MIPS_CONFLICT 0x70000008 /* Address of CONFLICT section */ ++#define DT_MIPS_LIBLIST 0x70000009 /* Address of LIBLIST section */ ++#define DT_MIPS_LOCAL_GOTNO 0x7000000a /* Number of local GOT entries */ ++#define DT_MIPS_CONFLICTNO 0x7000000b /* Number of CONFLICT entries */ ++#define DT_MIPS_LIBLISTNO 0x70000010 /* Number of LIBLIST entries */ ++#define DT_MIPS_SYMTABNO 0x70000011 /* Number of DYNSYM entries */ ++#define DT_MIPS_UNREFEXTNO 0x70000012 /* First external DYNSYM */ ++#define DT_MIPS_GOTSYM 0x70000013 /* First GOT entry in DYNSYM */ ++#define DT_MIPS_HIPAGENO 0x70000014 /* Number of GOT page table entries */ ++#define DT_MIPS_RLD_MAP 0x70000016 /* Address of run time loader map. */ ++#define DT_MIPS_DELTA_CLASS 0x70000017 /* Delta C++ class definition. */ ++#define DT_MIPS_DELTA_CLASS_NO 0x70000018 /* Number of entries in ++ DT_MIPS_DELTA_CLASS. */ ++#define DT_MIPS_DELTA_INSTANCE 0x70000019 /* Delta C++ class instances. */ ++#define DT_MIPS_DELTA_INSTANCE_NO 0x7000001a /* Number of entries in ++ DT_MIPS_DELTA_INSTANCE. */ ++#define DT_MIPS_DELTA_RELOC 0x7000001b /* Delta relocations. */ ++#define DT_MIPS_DELTA_RELOC_NO 0x7000001c /* Number of entries in ++ DT_MIPS_DELTA_RELOC. */ ++#define DT_MIPS_DELTA_SYM 0x7000001d /* Delta symbols that Delta ++ relocations refer to. */ ++#define DT_MIPS_DELTA_SYM_NO 0x7000001e /* Number of entries in ++ DT_MIPS_DELTA_SYM. */ ++#define DT_MIPS_DELTA_CLASSSYM 0x70000020 /* Delta symbols that hold the ++ class declaration. */ ++#define DT_MIPS_DELTA_CLASSSYM_NO 0x70000021 /* Number of entries in ++ DT_MIPS_DELTA_CLASSSYM. */ ++#define DT_MIPS_CXX_FLAGS 0x70000022 /* Flags indicating for C++ flavor. */ ++#define DT_MIPS_PIXIE_INIT 0x70000023 ++#define DT_MIPS_SYMBOL_LIB 0x70000024 ++#define DT_MIPS_LOCALPAGE_GOTIDX 0x70000025 ++#define DT_MIPS_LOCAL_GOTIDX 0x70000026 ++#define DT_MIPS_HIDDEN_GOTIDX 0x70000027 ++#define DT_MIPS_PROTECTED_GOTIDX 0x70000028 ++#define DT_MIPS_OPTIONS 0x70000029 /* Address of .options. */ ++#define DT_MIPS_INTERFACE 0x7000002a /* Address of .interface. */ ++#define DT_MIPS_DYNSTR_ALIGN 0x7000002b ++#define DT_MIPS_INTERFACE_SIZE 0x7000002c /* Size of the .interface section. */ ++#define DT_MIPS_RLD_TEXT_RESOLVE_ADDR 0x7000002d /* Address of rld_text_rsolve ++ function stored in GOT. */ ++#define DT_MIPS_PERF_SUFFIX 0x7000002e /* Default suffix of dso to be added ++ by rld on dlopen() calls. */ ++#define DT_MIPS_COMPACT_SIZE 0x7000002f /* (O32)Size of compact rel section. */ ++#define DT_MIPS_GP_VALUE 0x70000030 /* GP value for aux GOTs. */ ++#define DT_MIPS_AUX_DYNAMIC 0x70000031 /* Address of aux .dynamic. */ ++/* The address of .got.plt in an executable using the new non-PIC ABI. */ ++#define DT_MIPS_PLTGOT 0x70000032 ++/* The base of the PLT in an executable using the new non-PIC ABI if that ++ PLT is writable. For a non-writable PLT, this is omitted or has a zero ++ value. */ ++#define DT_MIPS_RWPLT 0x70000034 ++#define DT_MIPS_NUM 0x35 ++ ++/* Legal values for DT_MIPS_FLAGS Elf32_Dyn entry. */ ++ ++#define RHF_NONE 0 /* No flags */ ++#define RHF_QUICKSTART (1 << 0) /* Use quickstart */ ++#define RHF_NOTPOT (1 << 1) /* Hash size not power of 2 */ ++#define RHF_NO_LIBRARY_REPLACEMENT (1 << 2) /* Ignore LD_LIBRARY_PATH */ ++#define RHF_NO_MOVE (1 << 3) ++#define RHF_SGI_ONLY (1 << 4) ++#define RHF_GUARANTEE_INIT (1 << 5) ++#define RHF_DELTA_C_PLUS_PLUS (1 << 6) ++#define RHF_GUARANTEE_START_INIT (1 << 7) ++#define RHF_PIXIE (1 << 8) ++#define RHF_DEFAULT_DELAY_LOAD (1 << 9) ++#define RHF_REQUICKSTART (1 << 10) ++#define RHF_REQUICKSTARTED (1 << 11) ++#define RHF_CORD (1 << 12) ++#define RHF_NO_UNRES_UNDEF (1 << 13) ++#define RHF_RLD_ORDER_SAFE (1 << 14) ++ ++/* Entries found in sections of type SHT_MIPS_LIBLIST. */ ++ ++typedef struct ++{ ++ Elf32_Word l_name; /* Name (string table index) */ ++ Elf32_Word l_time_stamp; /* Timestamp */ ++ Elf32_Word l_checksum; /* Checksum */ ++ Elf32_Word l_version; /* Interface version */ ++ Elf32_Word l_flags; /* Flags */ ++} Elf32_Lib; ++ ++typedef struct ++{ ++ Elf64_Word l_name; /* Name (string table index) */ ++ Elf64_Word l_time_stamp; /* Timestamp */ ++ Elf64_Word l_checksum; /* Checksum */ ++ Elf64_Word l_version; /* Interface version */ ++ Elf64_Word l_flags; /* Flags */ ++} Elf64_Lib; ++ ++ ++/* Legal values for l_flags. */ ++ ++#define LL_NONE 0 ++#define LL_EXACT_MATCH (1 << 0) /* Require exact match */ ++#define LL_IGNORE_INT_VER (1 << 1) /* Ignore interface version */ ++#define LL_REQUIRE_MINOR (1 << 2) ++#define LL_EXPORTS (1 << 3) ++#define LL_DELAY_LOAD (1 << 4) ++#define LL_DELTA (1 << 5) ++ ++/* Entries found in sections of type SHT_MIPS_CONFLICT. */ ++ ++typedef Elf32_Addr Elf32_Conflict; ++ ++ ++/* HPPA specific definitions. */ ++ ++/* Legal values for e_flags field of Elf32_Ehdr. */ ++ ++#define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */ ++#define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */ ++#define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */ ++#define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */ ++#define EF_PARISC_NO_KABP 0x00100000 /* No kernel assisted branch ++ prediction. */ ++#define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */ ++#define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */ ++ ++/* Defined values for `e_flags & EF_PARISC_ARCH' are: */ ++ ++#define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */ ++#define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */ ++#define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */ ++ ++/* Additional section indeces. */ ++ ++#define SHN_PARISC_ANSI_COMMON 0xff00 /* Section for tenatively declared ++ symbols in ANSI C. */ ++#define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */ ++ ++/* Legal values for sh_type field of Elf32_Shdr. */ ++ ++#define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */ ++#define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */ ++#define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */ ++ ++/* Legal values for sh_flags field of Elf32_Shdr. */ ++ ++#define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */ ++#define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */ ++#define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */ ++ ++/* Legal values for ST_TYPE subfield of st_info (symbol type). */ ++ ++#define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */ ++ ++#define STT_HP_OPAQUE (STT_LOOS + 0x1) ++#define STT_HP_STUB (STT_LOOS + 0x2) ++ ++/* HPPA relocs. */ ++ ++#define R_PARISC_NONE 0 /* No reloc. */ ++#define R_PARISC_DIR32 1 /* Direct 32-bit reference. */ ++#define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */ ++#define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */ ++#define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */ ++#define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */ ++#define R_PARISC_PCREL32 9 /* 32-bit rel. address. */ ++#define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */ ++#define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */ ++#define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */ ++#define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */ ++#define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */ ++#define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */ ++#define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */ ++#define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */ ++#define R_PARISC_LTOFF21L 34 /* LT-relative, left 21 bits. */ ++#define R_PARISC_LTOFF14R 38 /* LT-relative, right 14 bits. */ ++#define R_PARISC_SECREL32 41 /* 32 bits section rel. address. */ ++#define R_PARISC_SEGBASE 48 /* No relocation, set segment base. */ ++#define R_PARISC_SEGREL32 49 /* 32 bits segment rel. address. */ ++#define R_PARISC_PLTOFF21L 50 /* PLT rel. address, left 21 bits. */ ++#define R_PARISC_PLTOFF14R 54 /* PLT rel. address, right 14 bits. */ ++#define R_PARISC_LTOFF_FPTR32 57 /* 32 bits LT-rel. function pointer. */ ++#define R_PARISC_LTOFF_FPTR21L 58 /* LT-rel. fct ptr, left 21 bits. */ ++#define R_PARISC_LTOFF_FPTR14R 62 /* LT-rel. fct ptr, right 14 bits. */ ++#define R_PARISC_FPTR64 64 /* 64 bits function address. */ ++#define R_PARISC_PLABEL32 65 /* 32 bits function address. */ ++#define R_PARISC_PLABEL21L 66 /* Left 21 bits of fdesc address. */ ++#define R_PARISC_PLABEL14R 70 /* Right 14 bits of fdesc address. */ ++#define R_PARISC_PCREL64 72 /* 64 bits PC-rel. address. */ ++#define R_PARISC_PCREL22F 74 /* 22 bits PC-rel. address. */ ++#define R_PARISC_PCREL14WR 75 /* PC-rel. address, right 14 bits. */ ++#define R_PARISC_PCREL14DR 76 /* PC rel. address, right 14 bits. */ ++#define R_PARISC_PCREL16F 77 /* 16 bits PC-rel. address. */ ++#define R_PARISC_PCREL16WF 78 /* 16 bits PC-rel. address. */ ++#define R_PARISC_PCREL16DF 79 /* 16 bits PC-rel. address. */ ++#define R_PARISC_DIR64 80 /* 64 bits of eff. address. */ ++#define R_PARISC_DIR14WR 83 /* 14 bits of eff. address. */ ++#define R_PARISC_DIR14DR 84 /* 14 bits of eff. address. */ ++#define R_PARISC_DIR16F 85 /* 16 bits of eff. address. */ ++#define R_PARISC_DIR16WF 86 /* 16 bits of eff. address. */ ++#define R_PARISC_DIR16DF 87 /* 16 bits of eff. address. */ ++#define R_PARISC_GPREL64 88 /* 64 bits of GP-rel. address. */ ++#define R_PARISC_GPREL14WR 91 /* GP-rel. address, right 14 bits. */ ++#define R_PARISC_GPREL14DR 92 /* GP-rel. address, right 14 bits. */ ++#define R_PARISC_GPREL16F 93 /* 16 bits GP-rel. address. */ ++#define R_PARISC_GPREL16WF 94 /* 16 bits GP-rel. address. */ ++#define R_PARISC_GPREL16DF 95 /* 16 bits GP-rel. address. */ ++#define R_PARISC_LTOFF64 96 /* 64 bits LT-rel. address. */ ++#define R_PARISC_LTOFF14WR 99 /* LT-rel. address, right 14 bits. */ ++#define R_PARISC_LTOFF14DR 100 /* LT-rel. address, right 14 bits. */ ++#define R_PARISC_LTOFF16F 101 /* 16 bits LT-rel. address. */ ++#define R_PARISC_LTOFF16WF 102 /* 16 bits LT-rel. address. */ ++#define R_PARISC_LTOFF16DF 103 /* 16 bits LT-rel. address. */ ++#define R_PARISC_SECREL64 104 /* 64 bits section rel. address. */ ++#define R_PARISC_SEGREL64 112 /* 64 bits segment rel. address. */ ++#define R_PARISC_PLTOFF14WR 115 /* PLT-rel. address, right 14 bits. */ ++#define R_PARISC_PLTOFF14DR 116 /* PLT-rel. address, right 14 bits. */ ++#define R_PARISC_PLTOFF16F 117 /* 16 bits LT-rel. address. */ ++#define R_PARISC_PLTOFF16WF 118 /* 16 bits PLT-rel. address. */ ++#define R_PARISC_PLTOFF16DF 119 /* 16 bits PLT-rel. address. */ ++#define R_PARISC_LTOFF_FPTR64 120 /* 64 bits LT-rel. function ptr. */ ++#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */ ++#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */ ++#define R_PARISC_LTOFF_FPTR16F 125 /* 16 bits LT-rel. function ptr. */ ++#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr. */ ++#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr. */ ++#define R_PARISC_LORESERVE 128 ++#define R_PARISC_COPY 128 /* Copy relocation. */ ++#define R_PARISC_IPLT 129 /* Dynamic reloc, imported PLT */ ++#define R_PARISC_EPLT 130 /* Dynamic reloc, exported PLT */ ++#define R_PARISC_TPREL32 153 /* 32 bits TP-rel. address. */ ++#define R_PARISC_TPREL21L 154 /* TP-rel. address, left 21 bits. */ ++#define R_PARISC_TPREL14R 158 /* TP-rel. address, right 14 bits. */ ++#define R_PARISC_LTOFF_TP21L 162 /* LT-TP-rel. address, left 21 bits. */ ++#define R_PARISC_LTOFF_TP14R 166 /* LT-TP-rel. address, right 14 bits.*/ ++#define R_PARISC_LTOFF_TP14F 167 /* 14 bits LT-TP-rel. address. */ ++#define R_PARISC_TPREL64 216 /* 64 bits TP-rel. address. */ ++#define R_PARISC_TPREL14WR 219 /* TP-rel. address, right 14 bits. */ ++#define R_PARISC_TPREL14DR 220 /* TP-rel. address, right 14 bits. */ ++#define R_PARISC_TPREL16F 221 /* 16 bits TP-rel. address. */ ++#define R_PARISC_TPREL16WF 222 /* 16 bits TP-rel. address. */ ++#define R_PARISC_TPREL16DF 223 /* 16 bits TP-rel. address. */ ++#define R_PARISC_LTOFF_TP64 224 /* 64 bits LT-TP-rel. address. */ ++#define R_PARISC_LTOFF_TP14WR 227 /* LT-TP-rel. address, right 14 bits.*/ ++#define R_PARISC_LTOFF_TP14DR 228 /* LT-TP-rel. address, right 14 bits.*/ ++#define R_PARISC_LTOFF_TP16F 229 /* 16 bits LT-TP-rel. address. */ ++#define R_PARISC_LTOFF_TP16WF 230 /* 16 bits LT-TP-rel. address. */ ++#define R_PARISC_LTOFF_TP16DF 231 /* 16 bits LT-TP-rel. address. */ ++#define R_PARISC_GNU_VTENTRY 232 ++#define R_PARISC_GNU_VTINHERIT 233 ++#define R_PARISC_TLS_GD21L 234 /* GD 21-bit left. */ ++#define R_PARISC_TLS_GD14R 235 /* GD 14-bit right. */ ++#define R_PARISC_TLS_GDCALL 236 /* GD call to __t_g_a. */ ++#define R_PARISC_TLS_LDM21L 237 /* LD module 21-bit left. */ ++#define R_PARISC_TLS_LDM14R 238 /* LD module 14-bit right. */ ++#define R_PARISC_TLS_LDMCALL 239 /* LD module call to __t_g_a. */ ++#define R_PARISC_TLS_LDO21L 240 /* LD offset 21-bit left. */ ++#define R_PARISC_TLS_LDO14R 241 /* LD offset 14-bit right. */ ++#define R_PARISC_TLS_DTPMOD32 242 /* DTP module 32-bit. */ ++#define R_PARISC_TLS_DTPMOD64 243 /* DTP module 64-bit. */ ++#define R_PARISC_TLS_DTPOFF32 244 /* DTP offset 32-bit. */ ++#define R_PARISC_TLS_DTPOFF64 245 /* DTP offset 32-bit. */ ++#define R_PARISC_TLS_LE21L R_PARISC_TPREL21L ++#define R_PARISC_TLS_LE14R R_PARISC_TPREL14R ++#define R_PARISC_TLS_IE21L R_PARISC_LTOFF_TP21L ++#define R_PARISC_TLS_IE14R R_PARISC_LTOFF_TP14R ++#define R_PARISC_TLS_TPREL32 R_PARISC_TPREL32 ++#define R_PARISC_TLS_TPREL64 R_PARISC_TPREL64 ++#define R_PARISC_HIRESERVE 255 ++ ++/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */ ++ ++#define PT_HP_TLS (PT_LOOS + 0x0) ++#define PT_HP_CORE_NONE (PT_LOOS + 0x1) ++#define PT_HP_CORE_VERSION (PT_LOOS + 0x2) ++#define PT_HP_CORE_KERNEL (PT_LOOS + 0x3) ++#define PT_HP_CORE_COMM (PT_LOOS + 0x4) ++#define PT_HP_CORE_PROC (PT_LOOS + 0x5) ++#define PT_HP_CORE_LOADABLE (PT_LOOS + 0x6) ++#define PT_HP_CORE_STACK (PT_LOOS + 0x7) ++#define PT_HP_CORE_SHM (PT_LOOS + 0x8) ++#define PT_HP_CORE_MMF (PT_LOOS + 0x9) ++#define PT_HP_PARALLEL (PT_LOOS + 0x10) ++#define PT_HP_FASTBIND (PT_LOOS + 0x11) ++#define PT_HP_OPT_ANNOT (PT_LOOS + 0x12) ++#define PT_HP_HSL_ANNOT (PT_LOOS + 0x13) ++#define PT_HP_STACK (PT_LOOS + 0x14) ++ ++#define PT_PARISC_ARCHEXT 0x70000000 ++#define PT_PARISC_UNWIND 0x70000001 ++ ++/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr. */ ++ ++#define PF_PARISC_SBP 0x08000000 ++ ++#define PF_HP_PAGE_SIZE 0x00100000 ++#define PF_HP_FAR_SHARED 0x00200000 ++#define PF_HP_NEAR_SHARED 0x00400000 ++#define PF_HP_CODE 0x01000000 ++#define PF_HP_MODIFY 0x02000000 ++#define PF_HP_LAZYSWAP 0x04000000 ++#define PF_HP_SBP 0x08000000 ++ ++ ++/* Alpha specific definitions. */ ++ ++/* Legal values for e_flags field of Elf64_Ehdr. */ ++ ++#define EF_ALPHA_32BIT 1 /* All addresses must be < 2GB. */ ++#define EF_ALPHA_CANRELAX 2 /* Relocations for relaxing exist. */ ++ ++/* Legal values for sh_type field of Elf64_Shdr. */ ++ ++/* These two are primerily concerned with ECOFF debugging info. */ ++#define SHT_ALPHA_DEBUG 0x70000001 ++#define SHT_ALPHA_REGINFO 0x70000002 ++ ++/* Legal values for sh_flags field of Elf64_Shdr. */ ++ ++#define SHF_ALPHA_GPREL 0x10000000 ++ ++/* Legal values for st_other field of Elf64_Sym. */ ++#define STO_ALPHA_NOPV 0x80 /* No PV required. */ ++#define STO_ALPHA_STD_GPLOAD 0x88 /* PV only used for initial ldgp. */ ++ ++/* Alpha relocs. */ ++ ++#define R_ALPHA_NONE 0 /* No reloc */ ++#define R_ALPHA_REFLONG 1 /* Direct 32 bit */ ++#define R_ALPHA_REFQUAD 2 /* Direct 64 bit */ ++#define R_ALPHA_GPREL32 3 /* GP relative 32 bit */ ++#define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */ ++#define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */ ++#define R_ALPHA_GPDISP 6 /* Add displacement to GP */ ++#define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */ ++#define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */ ++#define R_ALPHA_SREL16 9 /* PC relative 16 bit */ ++#define R_ALPHA_SREL32 10 /* PC relative 32 bit */ ++#define R_ALPHA_SREL64 11 /* PC relative 64 bit */ ++#define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */ ++#define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */ ++#define R_ALPHA_GPREL16 19 /* GP relative 16 bit */ ++#define R_ALPHA_COPY 24 /* Copy symbol at runtime */ ++#define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */ ++#define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */ ++#define R_ALPHA_RELATIVE 27 /* Adjust by program base */ ++#define R_ALPHA_TLS_GD_HI 28 ++#define R_ALPHA_TLSGD 29 ++#define R_ALPHA_TLS_LDM 30 ++#define R_ALPHA_DTPMOD64 31 ++#define R_ALPHA_GOTDTPREL 32 ++#define R_ALPHA_DTPREL64 33 ++#define R_ALPHA_DTPRELHI 34 ++#define R_ALPHA_DTPRELLO 35 ++#define R_ALPHA_DTPREL16 36 ++#define R_ALPHA_GOTTPREL 37 ++#define R_ALPHA_TPREL64 38 ++#define R_ALPHA_TPRELHI 39 ++#define R_ALPHA_TPRELLO 40 ++#define R_ALPHA_TPREL16 41 ++/* Keep this the last entry. */ ++#define R_ALPHA_NUM 46 ++ ++/* Magic values of the LITUSE relocation addend. */ ++#define LITUSE_ALPHA_ADDR 0 ++#define LITUSE_ALPHA_BASE 1 ++#define LITUSE_ALPHA_BYTOFF 2 ++#define LITUSE_ALPHA_JSR 3 ++#define LITUSE_ALPHA_TLS_GD 4 ++#define LITUSE_ALPHA_TLS_LDM 5 ++ ++/* Legal values for d_tag of Elf64_Dyn. */ ++#define DT_ALPHA_PLTRO (DT_LOPROC + 0) ++#define DT_ALPHA_NUM 1 ++ ++/* PowerPC specific declarations */ ++ ++/* Values for Elf32/64_Ehdr.e_flags. */ ++#define EF_PPC_EMB 0x80000000 /* PowerPC embedded flag */ ++ ++/* Cygnus local bits below */ ++#define EF_PPC_RELOCATABLE 0x00010000 /* PowerPC -mrelocatable flag*/ ++#define EF_PPC_RELOCATABLE_LIB 0x00008000 /* PowerPC -mrelocatable-lib ++ flag */ ++ ++/* PowerPC relocations defined by the ABIs */ ++#define R_PPC_NONE 0 ++#define R_PPC_ADDR32 1 /* 32bit absolute address */ ++#define R_PPC_ADDR24 2 /* 26bit address, 2 bits ignored. */ ++#define R_PPC_ADDR16 3 /* 16bit absolute address */ ++#define R_PPC_ADDR16_LO 4 /* lower 16bit of absolute address */ ++#define R_PPC_ADDR16_HI 5 /* high 16bit of absolute address */ ++#define R_PPC_ADDR16_HA 6 /* adjusted high 16bit */ ++#define R_PPC_ADDR14 7 /* 16bit address, 2 bits ignored */ ++#define R_PPC_ADDR14_BRTAKEN 8 ++#define R_PPC_ADDR14_BRNTAKEN 9 ++#define R_PPC_REL24 10 /* PC relative 26 bit */ ++#define R_PPC_REL14 11 /* PC relative 16 bit */ ++#define R_PPC_REL14_BRTAKEN 12 ++#define R_PPC_REL14_BRNTAKEN 13 ++#define R_PPC_GOT16 14 ++#define R_PPC_GOT16_LO 15 ++#define R_PPC_GOT16_HI 16 ++#define R_PPC_GOT16_HA 17 ++#define R_PPC_PLTREL24 18 ++#define R_PPC_COPY 19 ++#define R_PPC_GLOB_DAT 20 ++#define R_PPC_JMP_SLOT 21 ++#define R_PPC_RELATIVE 22 ++#define R_PPC_LOCAL24PC 23 ++#define R_PPC_UADDR32 24 ++#define R_PPC_UADDR16 25 ++#define R_PPC_REL32 26 ++#define R_PPC_PLT32 27 ++#define R_PPC_PLTREL32 28 ++#define R_PPC_PLT16_LO 29 ++#define R_PPC_PLT16_HI 30 ++#define R_PPC_PLT16_HA 31 ++#define R_PPC_SDAREL16 32 ++#define R_PPC_SECTOFF 33 ++#define R_PPC_SECTOFF_LO 34 ++#define R_PPC_SECTOFF_HI 35 ++#define R_PPC_SECTOFF_HA 36 ++ ++/* PowerPC relocations defined for the TLS access ABI. */ ++#define R_PPC_TLS 67 /* none (sym+add)@tls */ ++#define R_PPC_DTPMOD32 68 /* word32 (sym+add)@dtpmod */ ++#define R_PPC_TPREL16 69 /* half16* (sym+add)@tprel */ ++#define R_PPC_TPREL16_LO 70 /* half16 (sym+add)@tprel@l */ ++#define R_PPC_TPREL16_HI 71 /* half16 (sym+add)@tprel@h */ ++#define R_PPC_TPREL16_HA 72 /* half16 (sym+add)@tprel@ha */ ++#define R_PPC_TPREL32 73 /* word32 (sym+add)@tprel */ ++#define R_PPC_DTPREL16 74 /* half16* (sym+add)@dtprel */ ++#define R_PPC_DTPREL16_LO 75 /* half16 (sym+add)@dtprel@l */ ++#define R_PPC_DTPREL16_HI 76 /* half16 (sym+add)@dtprel@h */ ++#define R_PPC_DTPREL16_HA 77 /* half16 (sym+add)@dtprel@ha */ ++#define R_PPC_DTPREL32 78 /* word32 (sym+add)@dtprel */ ++#define R_PPC_GOT_TLSGD16 79 /* half16* (sym+add)@got@tlsgd */ ++#define R_PPC_GOT_TLSGD16_LO 80 /* half16 (sym+add)@got@tlsgd@l */ ++#define R_PPC_GOT_TLSGD16_HI 81 /* half16 (sym+add)@got@tlsgd@h */ ++#define R_PPC_GOT_TLSGD16_HA 82 /* half16 (sym+add)@got@tlsgd@ha */ ++#define R_PPC_GOT_TLSLD16 83 /* half16* (sym+add)@got@tlsld */ ++#define R_PPC_GOT_TLSLD16_LO 84 /* half16 (sym+add)@got@tlsld@l */ ++#define R_PPC_GOT_TLSLD16_HI 85 /* half16 (sym+add)@got@tlsld@h */ ++#define R_PPC_GOT_TLSLD16_HA 86 /* half16 (sym+add)@got@tlsld@ha */ ++#define R_PPC_GOT_TPREL16 87 /* half16* (sym+add)@got@tprel */ ++#define R_PPC_GOT_TPREL16_LO 88 /* half16 (sym+add)@got@tprel@l */ ++#define R_PPC_GOT_TPREL16_HI 89 /* half16 (sym+add)@got@tprel@h */ ++#define R_PPC_GOT_TPREL16_HA 90 /* half16 (sym+add)@got@tprel@ha */ ++#define R_PPC_GOT_DTPREL16 91 /* half16* (sym+add)@got@dtprel */ ++#define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */ ++#define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */ ++#define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */ ++ ++/* The remaining relocs are from the Embedded ELF ABI, and are not ++ in the SVR4 ELF ABI. */ ++#define R_PPC_EMB_NADDR32 101 ++#define R_PPC_EMB_NADDR16 102 ++#define R_PPC_EMB_NADDR16_LO 103 ++#define R_PPC_EMB_NADDR16_HI 104 ++#define R_PPC_EMB_NADDR16_HA 105 ++#define R_PPC_EMB_SDAI16 106 ++#define R_PPC_EMB_SDA2I16 107 ++#define R_PPC_EMB_SDA2REL 108 ++#define R_PPC_EMB_SDA21 109 /* 16 bit offset in SDA */ ++#define R_PPC_EMB_MRKREF 110 ++#define R_PPC_EMB_RELSEC16 111 ++#define R_PPC_EMB_RELST_LO 112 ++#define R_PPC_EMB_RELST_HI 113 ++#define R_PPC_EMB_RELST_HA 114 ++#define R_PPC_EMB_BIT_FLD 115 ++#define R_PPC_EMB_RELSDA 116 /* 16 bit relative offset in SDA */ ++ ++/* Diab tool relocations. */ ++#define R_PPC_DIAB_SDA21_LO 180 /* like EMB_SDA21, but lower 16 bit */ ++#define R_PPC_DIAB_SDA21_HI 181 /* like EMB_SDA21, but high 16 bit */ ++#define R_PPC_DIAB_SDA21_HA 182 /* like EMB_SDA21, adjusted high 16 */ ++#define R_PPC_DIAB_RELSDA_LO 183 /* like EMB_RELSDA, but lower 16 bit */ ++#define R_PPC_DIAB_RELSDA_HI 184 /* like EMB_RELSDA, but high 16 bit */ ++#define R_PPC_DIAB_RELSDA_HA 185 /* like EMB_RELSDA, adjusted high 16 */ ++ ++/* GNU extension to support local ifunc. */ ++#define R_PPC_IRELATIVE 248 ++ ++/* GNU relocs used in PIC code sequences. */ ++#define R_PPC_REL16 249 /* half16 (sym+add-.) */ ++#define R_PPC_REL16_LO 250 /* half16 (sym+add-.)@l */ ++#define R_PPC_REL16_HI 251 /* half16 (sym+add-.)@h */ ++#define R_PPC_REL16_HA 252 /* half16 (sym+add-.)@ha */ ++ ++/* This is a phony reloc to handle any old fashioned TOC16 references ++ that may still be in object files. */ ++#define R_PPC_TOC16 255 ++ ++/* PowerPC specific values for the Dyn d_tag field. */ ++#define DT_PPC_GOT (DT_LOPROC + 0) ++#define DT_PPC_NUM 1 ++ ++/* PowerPC64 relocations defined by the ABIs */ ++#define R_PPC64_NONE R_PPC_NONE ++#define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address */ ++#define R_PPC64_ADDR24 R_PPC_ADDR24 /* 26bit address, word aligned */ ++#define R_PPC64_ADDR16 R_PPC_ADDR16 /* 16bit absolute address */ ++#define R_PPC64_ADDR16_LO R_PPC_ADDR16_LO /* lower 16bits of address */ ++#define R_PPC64_ADDR16_HI R_PPC_ADDR16_HI /* high 16bits of address. */ ++#define R_PPC64_ADDR16_HA R_PPC_ADDR16_HA /* adjusted high 16bits. */ ++#define R_PPC64_ADDR14 R_PPC_ADDR14 /* 16bit address, word aligned */ ++#define R_PPC64_ADDR14_BRTAKEN R_PPC_ADDR14_BRTAKEN ++#define R_PPC64_ADDR14_BRNTAKEN R_PPC_ADDR14_BRNTAKEN ++#define R_PPC64_REL24 R_PPC_REL24 /* PC-rel. 26 bit, word aligned */ ++#define R_PPC64_REL14 R_PPC_REL14 /* PC relative 16 bit */ ++#define R_PPC64_REL14_BRTAKEN R_PPC_REL14_BRTAKEN ++#define R_PPC64_REL14_BRNTAKEN R_PPC_REL14_BRNTAKEN ++#define R_PPC64_GOT16 R_PPC_GOT16 ++#define R_PPC64_GOT16_LO R_PPC_GOT16_LO ++#define R_PPC64_GOT16_HI R_PPC_GOT16_HI ++#define R_PPC64_GOT16_HA R_PPC_GOT16_HA ++ ++#define R_PPC64_COPY R_PPC_COPY ++#define R_PPC64_GLOB_DAT R_PPC_GLOB_DAT ++#define R_PPC64_JMP_SLOT R_PPC_JMP_SLOT ++#define R_PPC64_RELATIVE R_PPC_RELATIVE ++ ++#define R_PPC64_UADDR32 R_PPC_UADDR32 ++#define R_PPC64_UADDR16 R_PPC_UADDR16 ++#define R_PPC64_REL32 R_PPC_REL32 ++#define R_PPC64_PLT32 R_PPC_PLT32 ++#define R_PPC64_PLTREL32 R_PPC_PLTREL32 ++#define R_PPC64_PLT16_LO R_PPC_PLT16_LO ++#define R_PPC64_PLT16_HI R_PPC_PLT16_HI ++#define R_PPC64_PLT16_HA R_PPC_PLT16_HA ++ ++#define R_PPC64_SECTOFF R_PPC_SECTOFF ++#define R_PPC64_SECTOFF_LO R_PPC_SECTOFF_LO ++#define R_PPC64_SECTOFF_HI R_PPC_SECTOFF_HI ++#define R_PPC64_SECTOFF_HA R_PPC_SECTOFF_HA ++#define R_PPC64_ADDR30 37 /* word30 (S + A - P) >> 2 */ ++#define R_PPC64_ADDR64 38 /* doubleword64 S + A */ ++#define R_PPC64_ADDR16_HIGHER 39 /* half16 #higher(S + A) */ ++#define R_PPC64_ADDR16_HIGHERA 40 /* half16 #highera(S + A) */ ++#define R_PPC64_ADDR16_HIGHEST 41 /* half16 #highest(S + A) */ ++#define R_PPC64_ADDR16_HIGHESTA 42 /* half16 #highesta(S + A) */ ++#define R_PPC64_UADDR64 43 /* doubleword64 S + A */ ++#define R_PPC64_REL64 44 /* doubleword64 S + A - P */ ++#define R_PPC64_PLT64 45 /* doubleword64 L + A */ ++#define R_PPC64_PLTREL64 46 /* doubleword64 L + A - P */ ++#define R_PPC64_TOC16 47 /* half16* S + A - .TOC */ ++#define R_PPC64_TOC16_LO 48 /* half16 #lo(S + A - .TOC.) */ ++#define R_PPC64_TOC16_HI 49 /* half16 #hi(S + A - .TOC.) */ ++#define R_PPC64_TOC16_HA 50 /* half16 #ha(S + A - .TOC.) */ ++#define R_PPC64_TOC 51 /* doubleword64 .TOC */ ++#define R_PPC64_PLTGOT16 52 /* half16* M + A */ ++#define R_PPC64_PLTGOT16_LO 53 /* half16 #lo(M + A) */ ++#define R_PPC64_PLTGOT16_HI 54 /* half16 #hi(M + A) */ ++#define R_PPC64_PLTGOT16_HA 55 /* half16 #ha(M + A) */ ++ ++#define R_PPC64_ADDR16_DS 56 /* half16ds* (S + A) >> 2 */ ++#define R_PPC64_ADDR16_LO_DS 57 /* half16ds #lo(S + A) >> 2 */ ++#define R_PPC64_GOT16_DS 58 /* half16ds* (G + A) >> 2 */ ++#define R_PPC64_GOT16_LO_DS 59 /* half16ds #lo(G + A) >> 2 */ ++#define R_PPC64_PLT16_LO_DS 60 /* half16ds #lo(L + A) >> 2 */ ++#define R_PPC64_SECTOFF_DS 61 /* half16ds* (R + A) >> 2 */ ++#define R_PPC64_SECTOFF_LO_DS 62 /* half16ds #lo(R + A) >> 2 */ ++#define R_PPC64_TOC16_DS 63 /* half16ds* (S + A - .TOC.) >> 2 */ ++#define R_PPC64_TOC16_LO_DS 64 /* half16ds #lo(S + A - .TOC.) >> 2 */ ++#define R_PPC64_PLTGOT16_DS 65 /* half16ds* (M + A) >> 2 */ ++#define R_PPC64_PLTGOT16_LO_DS 66 /* half16ds #lo(M + A) >> 2 */ ++ ++/* PowerPC64 relocations defined for the TLS access ABI. */ ++#define R_PPC64_TLS 67 /* none (sym+add)@tls */ ++#define R_PPC64_DTPMOD64 68 /* doubleword64 (sym+add)@dtpmod */ ++#define R_PPC64_TPREL16 69 /* half16* (sym+add)@tprel */ ++#define R_PPC64_TPREL16_LO 70 /* half16 (sym+add)@tprel@l */ ++#define R_PPC64_TPREL16_HI 71 /* half16 (sym+add)@tprel@h */ ++#define R_PPC64_TPREL16_HA 72 /* half16 (sym+add)@tprel@ha */ ++#define R_PPC64_TPREL64 73 /* doubleword64 (sym+add)@tprel */ ++#define R_PPC64_DTPREL16 74 /* half16* (sym+add)@dtprel */ ++#define R_PPC64_DTPREL16_LO 75 /* half16 (sym+add)@dtprel@l */ ++#define R_PPC64_DTPREL16_HI 76 /* half16 (sym+add)@dtprel@h */ ++#define R_PPC64_DTPREL16_HA 77 /* half16 (sym+add)@dtprel@ha */ ++#define R_PPC64_DTPREL64 78 /* doubleword64 (sym+add)@dtprel */ ++#define R_PPC64_GOT_TLSGD16 79 /* half16* (sym+add)@got@tlsgd */ ++#define R_PPC64_GOT_TLSGD16_LO 80 /* half16 (sym+add)@got@tlsgd@l */ ++#define R_PPC64_GOT_TLSGD16_HI 81 /* half16 (sym+add)@got@tlsgd@h */ ++#define R_PPC64_GOT_TLSGD16_HA 82 /* half16 (sym+add)@got@tlsgd@ha */ ++#define R_PPC64_GOT_TLSLD16 83 /* half16* (sym+add)@got@tlsld */ ++#define R_PPC64_GOT_TLSLD16_LO 84 /* half16 (sym+add)@got@tlsld@l */ ++#define R_PPC64_GOT_TLSLD16_HI 85 /* half16 (sym+add)@got@tlsld@h */ ++#define R_PPC64_GOT_TLSLD16_HA 86 /* half16 (sym+add)@got@tlsld@ha */ ++#define R_PPC64_GOT_TPREL16_DS 87 /* half16ds* (sym+add)@got@tprel */ ++#define R_PPC64_GOT_TPREL16_LO_DS 88 /* half16ds (sym+add)@got@tprel@l */ ++#define R_PPC64_GOT_TPREL16_HI 89 /* half16 (sym+add)@got@tprel@h */ ++#define R_PPC64_GOT_TPREL16_HA 90 /* half16 (sym+add)@got@tprel@ha */ ++#define R_PPC64_GOT_DTPREL16_DS 91 /* half16ds* (sym+add)@got@dtprel */ ++#define R_PPC64_GOT_DTPREL16_LO_DS 92 /* half16ds (sym+add)@got@dtprel@l */ ++#define R_PPC64_GOT_DTPREL16_HI 93 /* half16 (sym+add)@got@dtprel@h */ ++#define R_PPC64_GOT_DTPREL16_HA 94 /* half16 (sym+add)@got@dtprel@ha */ ++#define R_PPC64_TPREL16_DS 95 /* half16ds* (sym+add)@tprel */ ++#define R_PPC64_TPREL16_LO_DS 96 /* half16ds (sym+add)@tprel@l */ ++#define R_PPC64_TPREL16_HIGHER 97 /* half16 (sym+add)@tprel@higher */ ++#define R_PPC64_TPREL16_HIGHERA 98 /* half16 (sym+add)@tprel@highera */ ++#define R_PPC64_TPREL16_HIGHEST 99 /* half16 (sym+add)@tprel@highest */ ++#define R_PPC64_TPREL16_HIGHESTA 100 /* half16 (sym+add)@tprel@highesta */ ++#define R_PPC64_DTPREL16_DS 101 /* half16ds* (sym+add)@dtprel */ ++#define R_PPC64_DTPREL16_LO_DS 102 /* half16ds (sym+add)@dtprel@l */ ++#define R_PPC64_DTPREL16_HIGHER 103 /* half16 (sym+add)@dtprel@higher */ ++#define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */ ++#define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */ ++#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */ ++ ++/* GNU extension to support local ifunc. */ ++#define R_PPC64_JMP_IREL 247 ++#define R_PPC64_IRELATIVE 248 ++#define R_PPC64_REL16 249 /* half16 (sym+add-.) */ ++#define R_PPC64_REL16_LO 250 /* half16 (sym+add-.)@l */ ++#define R_PPC64_REL16_HI 251 /* half16 (sym+add-.)@h */ ++#define R_PPC64_REL16_HA 252 /* half16 (sym+add-.)@ha */ ++ ++/* PowerPC64 specific values for the Dyn d_tag field. */ ++#define DT_PPC64_GLINK (DT_LOPROC + 0) ++#define DT_PPC64_OPD (DT_LOPROC + 1) ++#define DT_PPC64_OPDSZ (DT_LOPROC + 2) ++#define DT_PPC64_NUM 3 ++ ++ ++/* ARM specific declarations */ ++ ++/* Processor specific flags for the ELF header e_flags field. */ ++#define EF_ARM_RELEXEC 0x01 ++#define EF_ARM_HASENTRY 0x02 ++#define EF_ARM_INTERWORK 0x04 ++#define EF_ARM_APCS_26 0x08 ++#define EF_ARM_APCS_FLOAT 0x10 ++#define EF_ARM_PIC 0x20 ++#define EF_ARM_ALIGN8 0x40 /* 8-bit structure alignment is in use */ ++#define EF_ARM_NEW_ABI 0x80 ++#define EF_ARM_OLD_ABI 0x100 ++#define EF_ARM_SOFT_FLOAT 0x200 ++#define EF_ARM_VFP_FLOAT 0x400 ++#define EF_ARM_MAVERICK_FLOAT 0x800 ++ ++ ++/* Other constants defined in the ARM ELF spec. version B-01. */ ++/* NB. These conflict with values defined above. */ ++#define EF_ARM_SYMSARESORTED 0x04 ++#define EF_ARM_DYNSYMSUSESEGIDX 0x08 ++#define EF_ARM_MAPSYMSFIRST 0x10 ++#define EF_ARM_EABIMASK 0XFF000000 ++ ++/* Constants defined in AAELF. */ ++#define EF_ARM_BE8 0x00800000 ++#define EF_ARM_LE8 0x00400000 ++ ++#define EF_ARM_EABI_VERSION(flags) ((flags) & EF_ARM_EABIMASK) ++#define EF_ARM_EABI_UNKNOWN 0x00000000 ++#define EF_ARM_EABI_VER1 0x01000000 ++#define EF_ARM_EABI_VER2 0x02000000 ++#define EF_ARM_EABI_VER3 0x03000000 ++#define EF_ARM_EABI_VER4 0x04000000 ++#define EF_ARM_EABI_VER5 0x05000000 ++ ++/* Additional symbol types for Thumb. */ ++#define STT_ARM_TFUNC STT_LOPROC /* A Thumb function. */ ++#define STT_ARM_16BIT STT_HIPROC /* A Thumb label. */ ++ ++/* ARM-specific values for sh_flags */ ++#define SHF_ARM_ENTRYSECT 0x10000000 /* Section contains an entry point */ ++#define SHF_ARM_COMDEF 0x80000000 /* Section may be multiply defined ++ in the input to a link step. */ ++ ++/* ARM-specific program header flags */ ++#define PF_ARM_SB 0x10000000 /* Segment contains the location ++ addressed by the static base. */ ++#define PF_ARM_PI 0x20000000 /* Position-independent segment. */ ++#define PF_ARM_ABS 0x40000000 /* Absolute segment. */ ++ ++/* Processor specific values for the Phdr p_type field. */ ++#define PT_ARM_EXIDX (PT_LOPROC + 1) /* ARM unwind segment. */ ++ ++/* Processor specific values for the Shdr sh_type field. */ ++#define SHT_ARM_EXIDX (SHT_LOPROC + 1) /* ARM unwind section. */ ++#define SHT_ARM_PREEMPTMAP (SHT_LOPROC + 2) /* Preemption details. */ ++#define SHT_ARM_ATTRIBUTES (SHT_LOPROC + 3) /* ARM attributes section. */ ++ ++ ++/* ARM relocs. */ ++ ++#define R_ARM_NONE 0 /* No reloc */ ++#define R_ARM_PC24 1 /* PC relative 26 bit branch */ ++#define R_ARM_ABS32 2 /* Direct 32 bit */ ++#define R_ARM_REL32 3 /* PC relative 32 bit */ ++#define R_ARM_PC13 4 ++#define R_ARM_ABS16 5 /* Direct 16 bit */ ++#define R_ARM_ABS12 6 /* Direct 12 bit */ ++#define R_ARM_THM_ABS5 7 ++#define R_ARM_ABS8 8 /* Direct 8 bit */ ++#define R_ARM_SBREL32 9 ++#define R_ARM_THM_PC22 10 ++#define R_ARM_THM_PC8 11 ++#define R_ARM_AMP_VCALL9 12 ++#define R_ARM_SWI24 13 /* Obsolete static relocation. */ ++#define R_ARM_TLS_DESC 13 /* Dynamic relocation. */ ++#define R_ARM_THM_SWI8 14 ++#define R_ARM_XPC25 15 ++#define R_ARM_THM_XPC22 16 ++#define R_ARM_TLS_DTPMOD32 17 /* ID of module containing symbol */ ++#define R_ARM_TLS_DTPOFF32 18 /* Offset in TLS block */ ++#define R_ARM_TLS_TPOFF32 19 /* Offset in static TLS block */ ++#define R_ARM_COPY 20 /* Copy symbol at runtime */ ++#define R_ARM_GLOB_DAT 21 /* Create GOT entry */ ++#define R_ARM_JUMP_SLOT 22 /* Create PLT entry */ ++#define R_ARM_RELATIVE 23 /* Adjust by program base */ ++#define R_ARM_GOTOFF 24 /* 32 bit offset to GOT */ ++#define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */ ++#define R_ARM_GOT32 26 /* 32 bit GOT entry */ ++#define R_ARM_PLT32 27 /* 32 bit PLT address */ ++#define R_ARM_ALU_PCREL_7_0 32 ++#define R_ARM_ALU_PCREL_15_8 33 ++#define R_ARM_ALU_PCREL_23_15 34 ++#define R_ARM_LDR_SBREL_11_0 35 ++#define R_ARM_ALU_SBREL_19_12 36 ++#define R_ARM_ALU_SBREL_27_20 37 ++#define R_ARM_TLS_GOTDESC 90 ++#define R_ARM_TLS_CALL 91 ++#define R_ARM_TLS_DESCSEQ 92 ++#define R_ARM_THM_TLS_CALL 93 ++#define R_ARM_GNU_VTENTRY 100 ++#define R_ARM_GNU_VTINHERIT 101 ++#define R_ARM_THM_PC11 102 /* thumb unconditional branch */ ++#define R_ARM_THM_PC9 103 /* thumb conditional branch */ ++#define R_ARM_TLS_GD32 104 /* PC-rel 32 bit for global dynamic ++ thread local data */ ++#define R_ARM_TLS_LDM32 105 /* PC-rel 32 bit for local dynamic ++ thread local data */ ++#define R_ARM_TLS_LDO32 106 /* 32 bit offset relative to TLS ++ block */ ++#define R_ARM_TLS_IE32 107 /* PC-rel 32 bit for GOT entry of ++ static TLS block offset */ ++#define R_ARM_TLS_LE32 108 /* 32 bit offset relative to static ++ TLS block */ ++#define R_ARM_THM_TLS_DESCSEQ 129 ++#define R_ARM_IRELATIVE 160 ++#define R_ARM_RXPC25 249 ++#define R_ARM_RSBREL32 250 ++#define R_ARM_THM_RPC22 251 ++#define R_ARM_RREL32 252 ++#define R_ARM_RABS22 253 ++#define R_ARM_RPC24 254 ++#define R_ARM_RBASE 255 ++/* Keep this the last entry. */ ++#define R_ARM_NUM 256 ++ ++/* IA-64 specific declarations. */ ++ ++/* Processor specific flags for the Ehdr e_flags field. */ ++#define EF_IA_64_MASKOS 0x0000000f /* os-specific flags */ ++#define EF_IA_64_ABI64 0x00000010 /* 64-bit ABI */ ++#define EF_IA_64_ARCH 0xff000000 /* arch. version mask */ ++ ++/* Processor specific values for the Phdr p_type field. */ ++#define PT_IA_64_ARCHEXT (PT_LOPROC + 0) /* arch extension bits */ ++#define PT_IA_64_UNWIND (PT_LOPROC + 1) /* ia64 unwind bits */ ++#define PT_IA_64_HP_OPT_ANOT (PT_LOOS + 0x12) ++#define PT_IA_64_HP_HSL_ANOT (PT_LOOS + 0x13) ++#define PT_IA_64_HP_STACK (PT_LOOS + 0x14) ++ ++/* Processor specific flags for the Phdr p_flags field. */ ++#define PF_IA_64_NORECOV 0x80000000 /* spec insns w/o recovery */ ++ ++/* Processor specific values for the Shdr sh_type field. */ ++#define SHT_IA_64_EXT (SHT_LOPROC + 0) /* extension bits */ ++#define SHT_IA_64_UNWIND (SHT_LOPROC + 1) /* unwind bits */ ++ ++/* Processor specific flags for the Shdr sh_flags field. */ ++#define SHF_IA_64_SHORT 0x10000000 /* section near gp */ ++#define SHF_IA_64_NORECOV 0x20000000 /* spec insns w/o recovery */ ++ ++/* Processor specific values for the Dyn d_tag field. */ ++#define DT_IA_64_PLT_RESERVE (DT_LOPROC + 0) ++#define DT_IA_64_NUM 1 ++ ++/* IA-64 relocations. */ ++#define R_IA64_NONE 0x00 /* none */ ++#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */ ++#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */ ++#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */ ++#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */ ++#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */ ++#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */ ++#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */ ++#define R_IA64_GPREL22 0x2a /* @gprel(sym + add), add imm22 */ ++#define R_IA64_GPREL64I 0x2b /* @gprel(sym + add), mov imm64 */ ++#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym + add), data4 MSB */ ++#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym + add), data4 LSB */ ++#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym + add), data8 MSB */ ++#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym + add), data8 LSB */ ++#define R_IA64_LTOFF22 0x32 /* @ltoff(sym + add), add imm22 */ ++#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym + add), mov imm64 */ ++#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym + add), add imm22 */ ++#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym + add), mov imm64 */ ++#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym + add), data8 MSB */ ++#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym + add), data8 LSB */ ++#define R_IA64_FPTR64I 0x43 /* @fptr(sym + add), mov imm64 */ ++#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym + add), data4 MSB */ ++#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym + add), data4 LSB */ ++#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym + add), data8 MSB */ ++#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym + add), data8 LSB */ ++#define R_IA64_PCREL60B 0x48 /* @pcrel(sym + add), brl */ ++#define R_IA64_PCREL21B 0x49 /* @pcrel(sym + add), ptb, call */ ++#define R_IA64_PCREL21M 0x4a /* @pcrel(sym + add), chk.s */ ++#define R_IA64_PCREL21F 0x4b /* @pcrel(sym + add), fchkf */ ++#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym + add), data4 MSB */ ++#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym + add), data4 LSB */ ++#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym + add), data8 MSB */ ++#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym + add), data8 LSB */ ++#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */ ++#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */ ++#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), data4 MSB */ ++#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), data4 LSB */ ++#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), data8 MSB */ ++#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), data8 LSB */ ++#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym + add), data4 MSB */ ++#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym + add), data4 LSB */ ++#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym + add), data8 MSB */ ++#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym + add), data8 LSB */ ++#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym + add), data4 MSB */ ++#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym + add), data4 LSB */ ++#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym + add), data8 MSB */ ++#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym + add), data8 LSB */ ++#define R_IA64_REL32MSB 0x6c /* data 4 + REL */ ++#define R_IA64_REL32LSB 0x6d /* data 4 + REL */ ++#define R_IA64_REL64MSB 0x6e /* data 8 + REL */ ++#define R_IA64_REL64LSB 0x6f /* data 8 + REL */ ++#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */ ++#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */ ++#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */ ++#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */ ++#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym + add), 21bit inst */ ++#define R_IA64_PCREL22 0x7a /* @pcrel(sym + add), 22bit inst */ ++#define R_IA64_PCREL64I 0x7b /* @pcrel(sym + add), 64bit inst */ ++#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */ ++#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */ ++#define R_IA64_COPY 0x84 /* copy relocation */ ++#define R_IA64_SUB 0x85 /* Addend and symbol difference */ ++#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */ ++#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */ ++#define R_IA64_TPREL14 0x91 /* @tprel(sym + add), imm14 */ ++#define R_IA64_TPREL22 0x92 /* @tprel(sym + add), imm22 */ ++#define R_IA64_TPREL64I 0x93 /* @tprel(sym + add), imm64 */ ++#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym + add), data8 MSB */ ++#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym + add), data8 LSB */ ++#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), imm2 */ ++#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym + add), data8 MSB */ ++#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym + add), data8 LSB */ ++#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(sym + add)), imm22 */ ++#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym + add), imm14 */ ++#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym + add), imm22 */ ++#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym + add), imm64 */ ++#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym + add), data4 MSB */ ++#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym + add), data4 LSB */ ++#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym + add), data8 MSB */ ++#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */ ++#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ ++ ++/* SH specific declarations */ ++ ++/* Processor specific flags for the ELF header e_flags field. */ ++#define EF_SH_MACH_MASK 0x1f ++#define EF_SH_UNKNOWN 0x0 ++#define EF_SH1 0x1 ++#define EF_SH2 0x2 ++#define EF_SH3 0x3 ++#define EF_SH_DSP 0x4 ++#define EF_SH3_DSP 0x5 ++#define EF_SH4AL_DSP 0x6 ++#define EF_SH3E 0x8 ++#define EF_SH4 0x9 ++#define EF_SH2E 0xb ++#define EF_SH4A 0xc ++#define EF_SH2A 0xd ++#define EF_SH4_NOFPU 0x10 ++#define EF_SH4A_NOFPU 0x11 ++#define EF_SH4_NOMMU_NOFPU 0x12 ++#define EF_SH2A_NOFPU 0x13 ++#define EF_SH3_NOMMU 0x14 ++#define EF_SH2A_SH4_NOFPU 0x15 ++#define EF_SH2A_SH3_NOFPU 0x16 ++#define EF_SH2A_SH4 0x17 ++#define EF_SH2A_SH3E 0x18 ++ ++/* SH relocs. */ ++#define R_SH_NONE 0 ++#define R_SH_DIR32 1 ++#define R_SH_REL32 2 ++#define R_SH_DIR8WPN 3 ++#define R_SH_IND12W 4 ++#define R_SH_DIR8WPL 5 ++#define R_SH_DIR8WPZ 6 ++#define R_SH_DIR8BP 7 ++#define R_SH_DIR8W 8 ++#define R_SH_DIR8L 9 ++#define R_SH_SWITCH16 25 ++#define R_SH_SWITCH32 26 ++#define R_SH_USES 27 ++#define R_SH_COUNT 28 ++#define R_SH_ALIGN 29 ++#define R_SH_CODE 30 ++#define R_SH_DATA 31 ++#define R_SH_LABEL 32 ++#define R_SH_SWITCH8 33 ++#define R_SH_GNU_VTINHERIT 34 ++#define R_SH_GNU_VTENTRY 35 ++#define R_SH_TLS_GD_32 144 ++#define R_SH_TLS_LD_32 145 ++#define R_SH_TLS_LDO_32 146 ++#define R_SH_TLS_IE_32 147 ++#define R_SH_TLS_LE_32 148 ++#define R_SH_TLS_DTPMOD32 149 ++#define R_SH_TLS_DTPOFF32 150 ++#define R_SH_TLS_TPOFF32 151 ++#define R_SH_GOT32 160 ++#define R_SH_PLT32 161 ++#define R_SH_COPY 162 ++#define R_SH_GLOB_DAT 163 ++#define R_SH_JMP_SLOT 164 ++#define R_SH_RELATIVE 165 ++#define R_SH_GOTOFF 166 ++#define R_SH_GOTPC 167 ++/* Keep this the last entry. */ ++#define R_SH_NUM 256 ++ ++/* S/390 specific definitions. */ ++ ++/* Valid values for the e_flags field. */ ++ ++#define EF_S390_HIGH_GPRS 0x00000001 /* High GPRs kernel facility needed. */ ++ ++/* Additional s390 relocs */ ++ ++#define R_390_NONE 0 /* No reloc. */ ++#define R_390_8 1 /* Direct 8 bit. */ ++#define R_390_12 2 /* Direct 12 bit. */ ++#define R_390_16 3 /* Direct 16 bit. */ ++#define R_390_32 4 /* Direct 32 bit. */ ++#define R_390_PC32 5 /* PC relative 32 bit. */ ++#define R_390_GOT12 6 /* 12 bit GOT offset. */ ++#define R_390_GOT32 7 /* 32 bit GOT offset. */ ++#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */ ++#define R_390_COPY 9 /* Copy symbol at runtime. */ ++#define R_390_GLOB_DAT 10 /* Create GOT entry. */ ++#define R_390_JMP_SLOT 11 /* Create PLT entry. */ ++#define R_390_RELATIVE 12 /* Adjust by program base. */ ++#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */ ++#define R_390_GOTPC 14 /* 32 bit PC relative offset to GOT. */ ++#define R_390_GOT16 15 /* 16 bit GOT offset. */ ++#define R_390_PC16 16 /* PC relative 16 bit. */ ++#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */ ++#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */ ++#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */ ++#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */ ++#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */ ++#define R_390_64 22 /* Direct 64 bit. */ ++#define R_390_PC64 23 /* PC relative 64 bit. */ ++#define R_390_GOT64 24 /* 64 bit GOT offset. */ ++#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */ ++#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */ ++#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */ ++#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */ ++#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */ ++#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */ ++#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */ ++#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */ ++#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */ ++#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */ ++#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */ ++#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */ ++#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */ ++#define R_390_TLS_GDCALL 38 /* Tag for function call in general ++ dynamic TLS code. */ ++#define R_390_TLS_LDCALL 39 /* Tag for function call in local ++ dynamic TLS code. */ ++#define R_390_TLS_GD32 40 /* Direct 32 bit for general dynamic ++ thread local data. */ ++#define R_390_TLS_GD64 41 /* Direct 64 bit for general dynamic ++ thread local data. */ ++#define R_390_TLS_GOTIE12 42 /* 12 bit GOT offset for static TLS ++ block offset. */ ++#define R_390_TLS_GOTIE32 43 /* 32 bit GOT offset for static TLS ++ block offset. */ ++#define R_390_TLS_GOTIE64 44 /* 64 bit GOT offset for static TLS ++ block offset. */ ++#define R_390_TLS_LDM32 45 /* Direct 32 bit for local dynamic ++ thread local data in LE code. */ ++#define R_390_TLS_LDM64 46 /* Direct 64 bit for local dynamic ++ thread local data in LE code. */ ++#define R_390_TLS_IE32 47 /* 32 bit address of GOT entry for ++ negated static TLS block offset. */ ++#define R_390_TLS_IE64 48 /* 64 bit address of GOT entry for ++ negated static TLS block offset. */ ++#define R_390_TLS_IEENT 49 /* 32 bit rel. offset to GOT entry for ++ negated static TLS block offset. */ ++#define R_390_TLS_LE32 50 /* 32 bit negated offset relative to ++ static TLS block. */ ++#define R_390_TLS_LE64 51 /* 64 bit negated offset relative to ++ static TLS block. */ ++#define R_390_TLS_LDO32 52 /* 32 bit offset relative to TLS ++ block. */ ++#define R_390_TLS_LDO64 53 /* 64 bit offset relative to TLS ++ block. */ ++#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */ ++#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */ ++#define R_390_TLS_TPOFF 56 /* Negated offset in static TLS ++ block. */ ++#define R_390_20 57 /* Direct 20 bit. */ ++#define R_390_GOT20 58 /* 20 bit GOT offset. */ ++#define R_390_GOTPLT20 59 /* 20 bit offset to jump slot. */ ++#define R_390_TLS_GOTIE20 60 /* 20 bit GOT offset for static TLS ++ block offset. */ ++#define R_390_IRELATIVE 61 /* STT_GNU_IFUNC relocation. */ ++/* Keep this the last entry. */ ++#define R_390_NUM 62 ++ ++ ++/* CRIS relocations. */ ++#define R_CRIS_NONE 0 ++#define R_CRIS_8 1 ++#define R_CRIS_16 2 ++#define R_CRIS_32 3 ++#define R_CRIS_8_PCREL 4 ++#define R_CRIS_16_PCREL 5 ++#define R_CRIS_32_PCREL 6 ++#define R_CRIS_GNU_VTINHERIT 7 ++#define R_CRIS_GNU_VTENTRY 8 ++#define R_CRIS_COPY 9 ++#define R_CRIS_GLOB_DAT 10 ++#define R_CRIS_JUMP_SLOT 11 ++#define R_CRIS_RELATIVE 12 ++#define R_CRIS_16_GOT 13 ++#define R_CRIS_32_GOT 14 ++#define R_CRIS_16_GOTPLT 15 ++#define R_CRIS_32_GOTPLT 16 ++#define R_CRIS_32_GOTREL 17 ++#define R_CRIS_32_PLT_GOTREL 18 ++#define R_CRIS_32_PLT_PCREL 19 ++ ++#define R_CRIS_NUM 20 ++ ++ ++/* AMD x86-64 relocations. */ ++#define R_X86_64_NONE 0 /* No reloc */ ++#define R_X86_64_64 1 /* Direct 64 bit */ ++#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ ++#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ ++#define R_X86_64_PLT32 4 /* 32 bit PLT address */ ++#define R_X86_64_COPY 5 /* Copy symbol at runtime */ ++#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ ++#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ ++#define R_X86_64_RELATIVE 8 /* Adjust by program base */ ++#define R_X86_64_GOTPCREL 9 /* 32 bit signed PC relative ++ offset to GOT */ ++#define R_X86_64_32 10 /* Direct 32 bit zero extended */ ++#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ ++#define R_X86_64_16 12 /* Direct 16 bit zero extended */ ++#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ ++#define R_X86_64_8 14 /* Direct 8 bit sign extended */ ++#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ ++#define R_X86_64_DTPMOD64 16 /* ID of module containing symbol */ ++#define R_X86_64_DTPOFF64 17 /* Offset in module's TLS block */ ++#define R_X86_64_TPOFF64 18 /* Offset in initial TLS block */ ++#define R_X86_64_TLSGD 19 /* 32 bit signed PC relative offset ++ to two GOT entries for GD symbol */ ++#define R_X86_64_TLSLD 20 /* 32 bit signed PC relative offset ++ to two GOT entries for LD symbol */ ++#define R_X86_64_DTPOFF32 21 /* Offset in TLS block */ ++#define R_X86_64_GOTTPOFF 22 /* 32 bit signed PC relative offset ++ to GOT entry for IE symbol */ ++#define R_X86_64_TPOFF32 23 /* Offset in initial TLS block */ ++#define R_X86_64_PC64 24 /* PC relative 64 bit */ ++#define R_X86_64_GOTOFF64 25 /* 64 bit offset to GOT */ ++#define R_X86_64_GOTPC32 26 /* 32 bit signed pc relative ++ offset to GOT */ ++#define R_X86_64_GOT64 27 /* 64-bit GOT entry offset */ ++#define R_X86_64_GOTPCREL64 28 /* 64-bit PC relative offset ++ to GOT entry */ ++#define R_X86_64_GOTPC64 29 /* 64-bit PC relative offset to GOT */ ++#define R_X86_64_GOTPLT64 30 /* like GOT64, says PLT entry needed */ ++#define R_X86_64_PLTOFF64 31 /* 64-bit GOT relative offset ++ to PLT entry */ ++#define R_X86_64_SIZE32 32 /* Size of symbol plus 32-bit addend */ ++#define R_X86_64_SIZE64 33 /* Size of symbol plus 64-bit addend */ ++#define R_X86_64_GOTPC32_TLSDESC 34 /* GOT offset for TLS descriptor. */ ++#define R_X86_64_TLSDESC_CALL 35 /* Marker for call through TLS ++ descriptor. */ ++#define R_X86_64_TLSDESC 36 /* TLS descriptor. */ ++#define R_X86_64_IRELATIVE 37 /* Adjust indirectly by program base */ ++#define R_X86_64_RELATIVE64 38 /* 64-bit adjust by program base */ ++ ++#define R_X86_64_NUM 39 ++ ++ ++/* AM33 relocations. */ ++#define R_MN10300_NONE 0 /* No reloc. */ ++#define R_MN10300_32 1 /* Direct 32 bit. */ ++#define R_MN10300_16 2 /* Direct 16 bit. */ ++#define R_MN10300_8 3 /* Direct 8 bit. */ ++#define R_MN10300_PCREL32 4 /* PC-relative 32-bit. */ ++#define R_MN10300_PCREL16 5 /* PC-relative 16-bit signed. */ ++#define R_MN10300_PCREL8 6 /* PC-relative 8-bit signed. */ ++#define R_MN10300_GNU_VTINHERIT 7 /* Ancient C++ vtable garbage... */ ++#define R_MN10300_GNU_VTENTRY 8 /* ... collection annotation. */ ++#define R_MN10300_24 9 /* Direct 24 bit. */ ++#define R_MN10300_GOTPC32 10 /* 32-bit PCrel offset to GOT. */ ++#define R_MN10300_GOTPC16 11 /* 16-bit PCrel offset to GOT. */ ++#define R_MN10300_GOTOFF32 12 /* 32-bit offset from GOT. */ ++#define R_MN10300_GOTOFF24 13 /* 24-bit offset from GOT. */ ++#define R_MN10300_GOTOFF16 14 /* 16-bit offset from GOT. */ ++#define R_MN10300_PLT32 15 /* 32-bit PCrel to PLT entry. */ ++#define R_MN10300_PLT16 16 /* 16-bit PCrel to PLT entry. */ ++#define R_MN10300_GOT32 17 /* 32-bit offset to GOT entry. */ ++#define R_MN10300_GOT24 18 /* 24-bit offset to GOT entry. */ ++#define R_MN10300_GOT16 19 /* 16-bit offset to GOT entry. */ ++#define R_MN10300_COPY 20 /* Copy symbol at runtime. */ ++#define R_MN10300_GLOB_DAT 21 /* Create GOT entry. */ ++#define R_MN10300_JMP_SLOT 22 /* Create PLT entry. */ ++#define R_MN10300_RELATIVE 23 /* Adjust by program base. */ ++ ++#define R_MN10300_NUM 24 ++ ++ ++/* M32R relocs. */ ++#define R_M32R_NONE 0 /* No reloc. */ ++#define R_M32R_16 1 /* Direct 16 bit. */ ++#define R_M32R_32 2 /* Direct 32 bit. */ ++#define R_M32R_24 3 /* Direct 24 bit. */ ++#define R_M32R_10_PCREL 4 /* PC relative 10 bit shifted. */ ++#define R_M32R_18_PCREL 5 /* PC relative 18 bit shifted. */ ++#define R_M32R_26_PCREL 6 /* PC relative 26 bit shifted. */ ++#define R_M32R_HI16_ULO 7 /* High 16 bit with unsigned low. */ ++#define R_M32R_HI16_SLO 8 /* High 16 bit with signed low. */ ++#define R_M32R_LO16 9 /* Low 16 bit. */ ++#define R_M32R_SDA16 10 /* 16 bit offset in SDA. */ ++#define R_M32R_GNU_VTINHERIT 11 ++#define R_M32R_GNU_VTENTRY 12 ++/* M32R relocs use SHT_RELA. */ ++#define R_M32R_16_RELA 33 /* Direct 16 bit. */ ++#define R_M32R_32_RELA 34 /* Direct 32 bit. */ ++#define R_M32R_24_RELA 35 /* Direct 24 bit. */ ++#define R_M32R_10_PCREL_RELA 36 /* PC relative 10 bit shifted. */ ++#define R_M32R_18_PCREL_RELA 37 /* PC relative 18 bit shifted. */ ++#define R_M32R_26_PCREL_RELA 38 /* PC relative 26 bit shifted. */ ++#define R_M32R_HI16_ULO_RELA 39 /* High 16 bit with unsigned low */ ++#define R_M32R_HI16_SLO_RELA 40 /* High 16 bit with signed low */ ++#define R_M32R_LO16_RELA 41 /* Low 16 bit */ ++#define R_M32R_SDA16_RELA 42 /* 16 bit offset in SDA */ ++#define R_M32R_RELA_GNU_VTINHERIT 43 ++#define R_M32R_RELA_GNU_VTENTRY 44 ++#define R_M32R_REL32 45 /* PC relative 32 bit. */ ++ ++#define R_M32R_GOT24 48 /* 24 bit GOT entry */ ++#define R_M32R_26_PLTREL 49 /* 26 bit PC relative to PLT shifted */ ++#define R_M32R_COPY 50 /* Copy symbol at runtime */ ++#define R_M32R_GLOB_DAT 51 /* Create GOT entry */ ++#define R_M32R_JMP_SLOT 52 /* Create PLT entry */ ++#define R_M32R_RELATIVE 53 /* Adjust by program base */ ++#define R_M32R_GOTOFF 54 /* 24 bit offset to GOT */ ++#define R_M32R_GOTPC24 55 /* 24 bit PC relative offset to GOT */ ++#define R_M32R_GOT16_HI_ULO 56 /* High 16 bit GOT entry with unsigned ++ low */ ++#define R_M32R_GOT16_HI_SLO 57 /* High 16 bit GOT entry with signed ++ low */ ++#define R_M32R_GOT16_LO 58 /* Low 16 bit GOT entry */ ++#define R_M32R_GOTPC_HI_ULO 59 /* High 16 bit PC relative offset to ++ GOT with unsigned low */ ++#define R_M32R_GOTPC_HI_SLO 60 /* High 16 bit PC relative offset to ++ GOT with signed low */ ++#define R_M32R_GOTPC_LO 61 /* Low 16 bit PC relative offset to ++ GOT */ ++#define R_M32R_GOTOFF_HI_ULO 62 /* High 16 bit offset to GOT ++ with unsigned low */ ++#define R_M32R_GOTOFF_HI_SLO 63 /* High 16 bit offset to GOT ++ with signed low */ ++#define R_M32R_GOTOFF_LO 64 /* Low 16 bit offset to GOT */ ++#define R_M32R_NUM 256 /* Keep this the last entry. */ ++ ++ ++/* TILEPro relocations. */ ++#define R_TILEPRO_NONE 0 /* No reloc */ ++#define R_TILEPRO_32 1 /* Direct 32 bit */ ++#define R_TILEPRO_16 2 /* Direct 16 bit */ ++#define R_TILEPRO_8 3 /* Direct 8 bit */ ++#define R_TILEPRO_32_PCREL 4 /* PC relative 32 bit */ ++#define R_TILEPRO_16_PCREL 5 /* PC relative 16 bit */ ++#define R_TILEPRO_8_PCREL 6 /* PC relative 8 bit */ ++#define R_TILEPRO_LO16 7 /* Low 16 bit */ ++#define R_TILEPRO_HI16 8 /* High 16 bit */ ++#define R_TILEPRO_HA16 9 /* High 16 bit, adjusted */ ++#define R_TILEPRO_COPY 10 /* Copy relocation */ ++#define R_TILEPRO_GLOB_DAT 11 /* Create GOT entry */ ++#define R_TILEPRO_JMP_SLOT 12 /* Create PLT entry */ ++#define R_TILEPRO_RELATIVE 13 /* Adjust by program base */ ++#define R_TILEPRO_BROFF_X1 14 /* X1 pipe branch offset */ ++#define R_TILEPRO_JOFFLONG_X1 15 /* X1 pipe jump offset */ ++#define R_TILEPRO_JOFFLONG_X1_PLT 16 /* X1 pipe jump offset to PLT */ ++#define R_TILEPRO_IMM8_X0 17 /* X0 pipe 8-bit */ ++#define R_TILEPRO_IMM8_Y0 18 /* Y0 pipe 8-bit */ ++#define R_TILEPRO_IMM8_X1 19 /* X1 pipe 8-bit */ ++#define R_TILEPRO_IMM8_Y1 20 /* Y1 pipe 8-bit */ ++#define R_TILEPRO_MT_IMM15_X1 21 /* X1 pipe mtspr */ ++#define R_TILEPRO_MF_IMM15_X1 22 /* X1 pipe mfspr */ ++#define R_TILEPRO_IMM16_X0 23 /* X0 pipe 16-bit */ ++#define R_TILEPRO_IMM16_X1 24 /* X1 pipe 16-bit */ ++#define R_TILEPRO_IMM16_X0_LO 25 /* X0 pipe low 16-bit */ ++#define R_TILEPRO_IMM16_X1_LO 26 /* X1 pipe low 16-bit */ ++#define R_TILEPRO_IMM16_X0_HI 27 /* X0 pipe high 16-bit */ ++#define R_TILEPRO_IMM16_X1_HI 28 /* X1 pipe high 16-bit */ ++#define R_TILEPRO_IMM16_X0_HA 29 /* X0 pipe high 16-bit, adjusted */ ++#define R_TILEPRO_IMM16_X1_HA 30 /* X1 pipe high 16-bit, adjusted */ ++#define R_TILEPRO_IMM16_X0_PCREL 31 /* X0 pipe PC relative 16 bit */ ++#define R_TILEPRO_IMM16_X1_PCREL 32 /* X1 pipe PC relative 16 bit */ ++#define R_TILEPRO_IMM16_X0_LO_PCREL 33 /* X0 pipe PC relative low 16 bit */ ++#define R_TILEPRO_IMM16_X1_LO_PCREL 34 /* X1 pipe PC relative low 16 bit */ ++#define R_TILEPRO_IMM16_X0_HI_PCREL 35 /* X0 pipe PC relative high 16 bit */ ++#define R_TILEPRO_IMM16_X1_HI_PCREL 36 /* X1 pipe PC relative high 16 bit */ ++#define R_TILEPRO_IMM16_X0_HA_PCREL 37 /* X0 pipe PC relative ha() 16 bit */ ++#define R_TILEPRO_IMM16_X1_HA_PCREL 38 /* X1 pipe PC relative ha() 16 bit */ ++#define R_TILEPRO_IMM16_X0_GOT 39 /* X0 pipe 16-bit GOT offset */ ++#define R_TILEPRO_IMM16_X1_GOT 40 /* X1 pipe 16-bit GOT offset */ ++#define R_TILEPRO_IMM16_X0_GOT_LO 41 /* X0 pipe low 16-bit GOT offset */ ++#define R_TILEPRO_IMM16_X1_GOT_LO 42 /* X1 pipe low 16-bit GOT offset */ ++#define R_TILEPRO_IMM16_X0_GOT_HI 43 /* X0 pipe high 16-bit GOT offset */ ++#define R_TILEPRO_IMM16_X1_GOT_HI 44 /* X1 pipe high 16-bit GOT offset */ ++#define R_TILEPRO_IMM16_X0_GOT_HA 45 /* X0 pipe ha() 16-bit GOT offset */ ++#define R_TILEPRO_IMM16_X1_GOT_HA 46 /* X1 pipe ha() 16-bit GOT offset */ ++#define R_TILEPRO_MMSTART_X0 47 /* X0 pipe mm "start" */ ++#define R_TILEPRO_MMEND_X0 48 /* X0 pipe mm "end" */ ++#define R_TILEPRO_MMSTART_X1 49 /* X1 pipe mm "start" */ ++#define R_TILEPRO_MMEND_X1 50 /* X1 pipe mm "end" */ ++#define R_TILEPRO_SHAMT_X0 51 /* X0 pipe shift amount */ ++#define R_TILEPRO_SHAMT_X1 52 /* X1 pipe shift amount */ ++#define R_TILEPRO_SHAMT_Y0 53 /* Y0 pipe shift amount */ ++#define R_TILEPRO_SHAMT_Y1 54 /* Y1 pipe shift amount */ ++#define R_TILEPRO_DEST_IMM8_X1 55 /* X1 pipe destination 8-bit */ ++/* Relocs 56-59 are currently not defined. */ ++#define R_TILEPRO_TLS_GD_CALL 60 /* "jal" for TLS GD */ ++#define R_TILEPRO_IMM8_X0_TLS_GD_ADD 61 /* X0 pipe "addi" for TLS GD */ ++#define R_TILEPRO_IMM8_X1_TLS_GD_ADD 62 /* X1 pipe "addi" for TLS GD */ ++#define R_TILEPRO_IMM8_Y0_TLS_GD_ADD 63 /* Y0 pipe "addi" for TLS GD */ ++#define R_TILEPRO_IMM8_Y1_TLS_GD_ADD 64 /* Y1 pipe "addi" for TLS GD */ ++#define R_TILEPRO_TLS_IE_LOAD 65 /* "lw_tls" for TLS IE */ ++#define R_TILEPRO_IMM16_X0_TLS_GD 66 /* X0 pipe 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X1_TLS_GD 67 /* X1 pipe 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X0_TLS_GD_LO 68 /* X0 pipe low 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X1_TLS_GD_LO 69 /* X1 pipe low 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X0_TLS_GD_HI 70 /* X0 pipe high 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X1_TLS_GD_HI 71 /* X1 pipe high 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X0_TLS_GD_HA 72 /* X0 pipe ha() 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X1_TLS_GD_HA 73 /* X1 pipe ha() 16-bit TLS GD offset */ ++#define R_TILEPRO_IMM16_X0_TLS_IE 74 /* X0 pipe 16-bit TLS IE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_IE 75 /* X1 pipe 16-bit TLS IE offset */ ++#define R_TILEPRO_IMM16_X0_TLS_IE_LO 76 /* X0 pipe low 16-bit TLS IE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_IE_LO 77 /* X1 pipe low 16-bit TLS IE offset */ ++#define R_TILEPRO_IMM16_X0_TLS_IE_HI 78 /* X0 pipe high 16-bit TLS IE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_IE_HI 79 /* X1 pipe high 16-bit TLS IE offset */ ++#define R_TILEPRO_IMM16_X0_TLS_IE_HA 80 /* X0 pipe ha() 16-bit TLS IE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_IE_HA 81 /* X1 pipe ha() 16-bit TLS IE offset */ ++#define R_TILEPRO_TLS_DTPMOD32 82 /* ID of module containing symbol */ ++#define R_TILEPRO_TLS_DTPOFF32 83 /* Offset in TLS block */ ++#define R_TILEPRO_TLS_TPOFF32 84 /* Offset in static TLS block */ ++#define R_TILEPRO_IMM16_X0_TLS_LE 85 /* X0 pipe 16-bit TLS LE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_LE 86 /* X1 pipe 16-bit TLS LE offset */ ++#define R_TILEPRO_IMM16_X0_TLS_LE_LO 87 /* X0 pipe low 16-bit TLS LE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_LE_LO 88 /* X1 pipe low 16-bit TLS LE offset */ ++#define R_TILEPRO_IMM16_X0_TLS_LE_HI 89 /* X0 pipe high 16-bit TLS LE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_LE_HI 90 /* X1 pipe high 16-bit TLS LE offset */ ++#define R_TILEPRO_IMM16_X0_TLS_LE_HA 91 /* X0 pipe ha() 16-bit TLS LE offset */ ++#define R_TILEPRO_IMM16_X1_TLS_LE_HA 92 /* X1 pipe ha() 16-bit TLS LE offset */ ++ ++#define R_TILEPRO_GNU_VTINHERIT 128 /* GNU C++ vtable hierarchy */ ++#define R_TILEPRO_GNU_VTENTRY 129 /* GNU C++ vtable member usage */ ++ ++#define R_TILEPRO_NUM 130 ++ ++ ++/* TILE-Gx relocations. */ ++#define R_TILEGX_NONE 0 /* No reloc */ ++#define R_TILEGX_64 1 /* Direct 64 bit */ ++#define R_TILEGX_32 2 /* Direct 32 bit */ ++#define R_TILEGX_16 3 /* Direct 16 bit */ ++#define R_TILEGX_8 4 /* Direct 8 bit */ ++#define R_TILEGX_64_PCREL 5 /* PC relative 64 bit */ ++#define R_TILEGX_32_PCREL 6 /* PC relative 32 bit */ ++#define R_TILEGX_16_PCREL 7 /* PC relative 16 bit */ ++#define R_TILEGX_8_PCREL 8 /* PC relative 8 bit */ ++#define R_TILEGX_HW0 9 /* hword 0 16-bit */ ++#define R_TILEGX_HW1 10 /* hword 1 16-bit */ ++#define R_TILEGX_HW2 11 /* hword 2 16-bit */ ++#define R_TILEGX_HW3 12 /* hword 3 16-bit */ ++#define R_TILEGX_HW0_LAST 13 /* last hword 0 16-bit */ ++#define R_TILEGX_HW1_LAST 14 /* last hword 1 16-bit */ ++#define R_TILEGX_HW2_LAST 15 /* last hword 2 16-bit */ ++#define R_TILEGX_COPY 16 /* Copy relocation */ ++#define R_TILEGX_GLOB_DAT 17 /* Create GOT entry */ ++#define R_TILEGX_JMP_SLOT 18 /* Create PLT entry */ ++#define R_TILEGX_RELATIVE 19 /* Adjust by program base */ ++#define R_TILEGX_BROFF_X1 20 /* X1 pipe branch offset */ ++#define R_TILEGX_JUMPOFF_X1 21 /* X1 pipe jump offset */ ++#define R_TILEGX_JUMPOFF_X1_PLT 22 /* X1 pipe jump offset to PLT */ ++#define R_TILEGX_IMM8_X0 23 /* X0 pipe 8-bit */ ++#define R_TILEGX_IMM8_Y0 24 /* Y0 pipe 8-bit */ ++#define R_TILEGX_IMM8_X1 25 /* X1 pipe 8-bit */ ++#define R_TILEGX_IMM8_Y1 26 /* Y1 pipe 8-bit */ ++#define R_TILEGX_DEST_IMM8_X1 27 /* X1 pipe destination 8-bit */ ++#define R_TILEGX_MT_IMM14_X1 28 /* X1 pipe mtspr */ ++#define R_TILEGX_MF_IMM14_X1 29 /* X1 pipe mfspr */ ++#define R_TILEGX_MMSTART_X0 30 /* X0 pipe mm "start" */ ++#define R_TILEGX_MMEND_X0 31 /* X0 pipe mm "end" */ ++#define R_TILEGX_SHAMT_X0 32 /* X0 pipe shift amount */ ++#define R_TILEGX_SHAMT_X1 33 /* X1 pipe shift amount */ ++#define R_TILEGX_SHAMT_Y0 34 /* Y0 pipe shift amount */ ++#define R_TILEGX_SHAMT_Y1 35 /* Y1 pipe shift amount */ ++#define R_TILEGX_IMM16_X0_HW0 36 /* X0 pipe hword 0 */ ++#define R_TILEGX_IMM16_X1_HW0 37 /* X1 pipe hword 0 */ ++#define R_TILEGX_IMM16_X0_HW1 38 /* X0 pipe hword 1 */ ++#define R_TILEGX_IMM16_X1_HW1 39 /* X1 pipe hword 1 */ ++#define R_TILEGX_IMM16_X0_HW2 40 /* X0 pipe hword 2 */ ++#define R_TILEGX_IMM16_X1_HW2 41 /* X1 pipe hword 2 */ ++#define R_TILEGX_IMM16_X0_HW3 42 /* X0 pipe hword 3 */ ++#define R_TILEGX_IMM16_X1_HW3 43 /* X1 pipe hword 3 */ ++#define R_TILEGX_IMM16_X0_HW0_LAST 44 /* X0 pipe last hword 0 */ ++#define R_TILEGX_IMM16_X1_HW0_LAST 45 /* X1 pipe last hword 0 */ ++#define R_TILEGX_IMM16_X0_HW1_LAST 46 /* X0 pipe last hword 1 */ ++#define R_TILEGX_IMM16_X1_HW1_LAST 47 /* X1 pipe last hword 1 */ ++#define R_TILEGX_IMM16_X0_HW2_LAST 48 /* X0 pipe last hword 2 */ ++#define R_TILEGX_IMM16_X1_HW2_LAST 49 /* X1 pipe last hword 2 */ ++#define R_TILEGX_IMM16_X0_HW0_PCREL 50 /* X0 pipe PC relative hword 0 */ ++#define R_TILEGX_IMM16_X1_HW0_PCREL 51 /* X1 pipe PC relative hword 0 */ ++#define R_TILEGX_IMM16_X0_HW1_PCREL 52 /* X0 pipe PC relative hword 1 */ ++#define R_TILEGX_IMM16_X1_HW1_PCREL 53 /* X1 pipe PC relative hword 1 */ ++#define R_TILEGX_IMM16_X0_HW2_PCREL 54 /* X0 pipe PC relative hword 2 */ ++#define R_TILEGX_IMM16_X1_HW2_PCREL 55 /* X1 pipe PC relative hword 2 */ ++#define R_TILEGX_IMM16_X0_HW3_PCREL 56 /* X0 pipe PC relative hword 3 */ ++#define R_TILEGX_IMM16_X1_HW3_PCREL 57 /* X1 pipe PC relative hword 3 */ ++#define R_TILEGX_IMM16_X0_HW0_LAST_PCREL 58 /* X0 pipe PC-rel last hword 0 */ ++#define R_TILEGX_IMM16_X1_HW0_LAST_PCREL 59 /* X1 pipe PC-rel last hword 0 */ ++#define R_TILEGX_IMM16_X0_HW1_LAST_PCREL 60 /* X0 pipe PC-rel last hword 1 */ ++#define R_TILEGX_IMM16_X1_HW1_LAST_PCREL 61 /* X1 pipe PC-rel last hword 1 */ ++#define R_TILEGX_IMM16_X0_HW2_LAST_PCREL 62 /* X0 pipe PC-rel last hword 2 */ ++#define R_TILEGX_IMM16_X1_HW2_LAST_PCREL 63 /* X1 pipe PC-rel last hword 2 */ ++#define R_TILEGX_IMM16_X0_HW0_GOT 64 /* X0 pipe hword 0 GOT offset */ ++#define R_TILEGX_IMM16_X1_HW0_GOT 65 /* X1 pipe hword 0 GOT offset */ ++/* Relocs 66-71 are currently not defined. */ ++#define R_TILEGX_IMM16_X0_HW0_LAST_GOT 72 /* X0 pipe last hword 0 GOT offset */ ++#define R_TILEGX_IMM16_X1_HW0_LAST_GOT 73 /* X1 pipe last hword 0 GOT offset */ ++#define R_TILEGX_IMM16_X0_HW1_LAST_GOT 74 /* X0 pipe last hword 1 GOT offset */ ++#define R_TILEGX_IMM16_X1_HW1_LAST_GOT 75 /* X1 pipe last hword 1 GOT offset */ ++/* Relocs 76-77 are currently not defined. */ ++#define R_TILEGX_IMM16_X0_HW0_TLS_GD 78 /* X0 pipe hword 0 TLS GD offset */ ++#define R_TILEGX_IMM16_X1_HW0_TLS_GD 79 /* X1 pipe hword 0 TLS GD offset */ ++#define R_TILEGX_IMM16_X0_HW0_TLS_LE 80 /* X0 pipe hword 0 TLS LE offset */ ++#define R_TILEGX_IMM16_X1_HW0_TLS_LE 81 /* X1 pipe hword 0 TLS LE offset */ ++#define R_TILEGX_IMM16_X0_HW0_LAST_TLS_LE 82 /* X0 pipe last hword 0 LE off */ ++#define R_TILEGX_IMM16_X1_HW0_LAST_TLS_LE 83 /* X1 pipe last hword 0 LE off */ ++#define R_TILEGX_IMM16_X0_HW1_LAST_TLS_LE 84 /* X0 pipe last hword 1 LE off */ ++#define R_TILEGX_IMM16_X1_HW1_LAST_TLS_LE 85 /* X1 pipe last hword 1 LE off */ ++#define R_TILEGX_IMM16_X0_HW0_LAST_TLS_GD 86 /* X0 pipe last hword 0 GD off */ ++#define R_TILEGX_IMM16_X1_HW0_LAST_TLS_GD 87 /* X1 pipe last hword 0 GD off */ ++#define R_TILEGX_IMM16_X0_HW1_LAST_TLS_GD 88 /* X0 pipe last hword 1 GD off */ ++#define R_TILEGX_IMM16_X1_HW1_LAST_TLS_GD 89 /* X1 pipe last hword 1 GD off */ ++/* Relocs 90-91 are currently not defined. */ ++#define R_TILEGX_IMM16_X0_HW0_TLS_IE 92 /* X0 pipe hword 0 TLS IE offset */ ++#define R_TILEGX_IMM16_X1_HW0_TLS_IE 93 /* X1 pipe hword 0 TLS IE offset */ ++/* Relocs 94-99 are currently not defined. */ ++#define R_TILEGX_IMM16_X0_HW0_LAST_TLS_IE 100 /* X0 pipe last hword 0 IE off */ ++#define R_TILEGX_IMM16_X1_HW0_LAST_TLS_IE 101 /* X1 pipe last hword 0 IE off */ ++#define R_TILEGX_IMM16_X0_HW1_LAST_TLS_IE 102 /* X0 pipe last hword 1 IE off */ ++#define R_TILEGX_IMM16_X1_HW1_LAST_TLS_IE 103 /* X1 pipe last hword 1 IE off */ ++/* Relocs 104-105 are currently not defined. */ ++#define R_TILEGX_TLS_DTPMOD64 106 /* 64-bit ID of symbol's module */ ++#define R_TILEGX_TLS_DTPOFF64 107 /* 64-bit offset in TLS block */ ++#define R_TILEGX_TLS_TPOFF64 108 /* 64-bit offset in static TLS block */ ++#define R_TILEGX_TLS_DTPMOD32 109 /* 32-bit ID of symbol's module */ ++#define R_TILEGX_TLS_DTPOFF32 110 /* 32-bit offset in TLS block */ ++#define R_TILEGX_TLS_TPOFF32 111 /* 32-bit offset in static TLS block */ ++#define R_TILEGX_TLS_GD_CALL 112 /* "jal" for TLS GD */ ++#define R_TILEGX_IMM8_X0_TLS_GD_ADD 113 /* X0 pipe "addi" for TLS GD */ ++#define R_TILEGX_IMM8_X1_TLS_GD_ADD 114 /* X1 pipe "addi" for TLS GD */ ++#define R_TILEGX_IMM8_Y0_TLS_GD_ADD 115 /* Y0 pipe "addi" for TLS GD */ ++#define R_TILEGX_IMM8_Y1_TLS_GD_ADD 116 /* Y1 pipe "addi" for TLS GD */ ++#define R_TILEGX_TLS_IE_LOAD 117 /* "ld_tls" for TLS IE */ ++#define R_TILEGX_IMM8_X0_TLS_ADD 118 /* X0 pipe "addi" for TLS GD/IE */ ++#define R_TILEGX_IMM8_X1_TLS_ADD 119 /* X1 pipe "addi" for TLS GD/IE */ ++#define R_TILEGX_IMM8_Y0_TLS_ADD 120 /* Y0 pipe "addi" for TLS GD/IE */ ++#define R_TILEGX_IMM8_Y1_TLS_ADD 121 /* Y1 pipe "addi" for TLS GD/IE */ ++ ++#define R_TILEGX_GNU_VTINHERIT 128 /* GNU C++ vtable hierarchy */ ++#define R_TILEGX_GNU_VTENTRY 129 /* GNU C++ vtable member usage */ ++ ++#define R_TILEGX_NUM 130 ++ ++#endif /* elf.h */ +--- a/scripts/mod/mk_elfconfig.c ++++ b/scripts/mod/mk_elfconfig.c +@@ -2,7 +2,11 @@ + #include + #include + #include ++#ifndef __APPLE__ + #include ++#else ++#include "elf.h" ++#endif + + int + main(int argc, char **argv) +--- a/scripts/mod/modpost.h ++++ b/scripts/mod/modpost.h +@@ -8,7 +8,11 @@ + #include + #include + #include ++#if !(defined(__APPLE__) || defined(__CYGWIN__)) + #include ++#else ++#include "elf.h" ++#endif + + #include "elfconfig.h" + diff --git a/root/target/linux/generic/hack-5.4/211-darwin-uuid-typedef-clash.patch b/root/target/linux/generic/hack-5.4/211-darwin-uuid-typedef-clash.patch new file mode 100755 index 00000000..50a62271 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/211-darwin-uuid-typedef-clash.patch @@ -0,0 +1,22 @@ +From e44fc2af1ddc452b6659d08c16973d65c73b7d0a Mon Sep 17 00:00:00 2001 +From: Kevin Darbyshire-Bryant +Date: Wed, 5 Feb 2020 18:36:43 +0000 +Subject: [PATCH] file2alias: build on macos + +Signed-off-by: Kevin Darbyshire-Bryant +--- + scripts/mod/file2alias.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/scripts/mod/file2alias.c ++++ b/scripts/mod/file2alias.c +@@ -38,6 +38,9 @@ typedef struct { + __u8 b[16]; + } guid_t; + ++#ifdef __APPLE__ ++#define uuid_t compat_uuid_t ++#endif + /* backwards compatibility, don't use in new code */ + typedef struct { + __u8 b[16]; diff --git a/root/target/linux/generic/hack-5.4/214-spidev_h_portability.patch b/root/target/linux/generic/hack-5.4/214-spidev_h_portability.patch new file mode 100755 index 00000000..415e9a42 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/214-spidev_h_portability.patch @@ -0,0 +1,24 @@ +From be9be95ff10e16a5b4ad36f903978d0cc5747024 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:04:08 +0200 +Subject: kernel: fix linux/spi/spidev.h portability issues with musl + +Felix will try to get this define included into musl + +lede-commit: 795e7cf60de19e7a076a46874fab7bb88b43bbff +Signed-off-by: Felix Fietkau +--- + include/uapi/linux/spi/spidev.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/uapi/linux/spi/spidev.h ++++ b/include/uapi/linux/spi/spidev.h +@@ -117,7 +117,7 @@ struct spi_ioc_transfer { + + /* not all platforms use or _IOC_TYPECHECK() ... */ + #define SPI_MSGSIZE(N) \ +- ((((N)*(sizeof (struct spi_ioc_transfer))) < (1 << _IOC_SIZEBITS)) \ ++ ((((N)*(sizeof (struct spi_ioc_transfer))) < (1 << 13)) \ + ? ((N)*(sizeof (struct spi_ioc_transfer))) : 0) + #define SPI_IOC_MESSAGE(N) _IOW(SPI_IOC_MAGIC, 0, char[SPI_MSGSIZE(N)]) + diff --git a/root/target/linux/generic/hack-5.4/220-arm-gc_sections.patch b/root/target/linux/generic/hack-5.4/220-arm-gc_sections.patch new file mode 100755 index 00000000..14e24617 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/220-arm-gc_sections.patch @@ -0,0 +1,138 @@ +From e3d8676f5722b7622685581e06e8f53e6138e3ab Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 15 Jul 2017 23:42:36 +0200 +Subject: use -ffunction-sections, -fdata-sections and --gc-sections + +In combination with kernel symbol export stripping this significantly reduces +the kernel image size. Used on both ARM and MIPS architectures. + +Signed-off-by: Felix Fietkau +Signed-off-by: Jonas Gorski +Signed-off-by: Gabor Juhos +--- + Makefile | 10 +++---- + arch/arm/Kconfig | 1 + + arch/arm/boot/compressed/Makefile | 1 + + arch/arm/kernel/vmlinux.lds.S | 26 ++++++++-------- + arch/mips/Kconfig | 1 + + arch/mips/kernel/vmlinux.lds.S | 4 +-- + include/asm-generic/vmlinux.lds.h | 63 ++++++++++++++++++++------------------- + 7 files changed, 55 insertions(+), 51 deletions(-) + +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -112,6 +112,7 @@ config ARM + select HAVE_UID16 + select HAVE_VIRT_CPU_ACCOUNTING_GEN + select IRQ_FORCED_THREADING ++ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION + select MODULES_USE_ELF_REL + select NEED_DMA_MAP_STATE + select OF_EARLY_FLATTREE if OF +--- a/arch/arm/boot/compressed/Makefile ++++ b/arch/arm/boot/compressed/Makefile +@@ -108,6 +108,7 @@ ifeq ($(CONFIG_FUNCTION_TRACER),y) + ORIG_CFLAGS := $(KBUILD_CFLAGS) + KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) + endif ++KBUILD_CFLAGS_KERNEL := $(patsubst -f%-sections,,$(KBUILD_CFLAGS_KERNEL)) + + # -fstack-protector-strong triggers protection checks in this code, + # but it is being used too early to link to meaningful stack_chk logic. +--- a/arch/arm/kernel/vmlinux.lds.S ++++ b/arch/arm/kernel/vmlinux.lds.S +@@ -73,7 +73,7 @@ SECTIONS + . = ALIGN(4); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; +- ARM_MMU_KEEP(*(__ex_table)) ++ KEEP(*(__ex_table)) + __stop___ex_table = .; + } + +@@ -100,24 +100,24 @@ SECTIONS + } + .init.arch.info : { + __arch_info_begin = .; +- *(.arch.info.init) ++ KEEP(*(.arch.info.init)) + __arch_info_end = .; + } + .init.tagtable : { + __tagtable_begin = .; +- *(.taglist.init) ++ KEEP(*(.taglist.init)) + __tagtable_end = .; + } + #ifdef CONFIG_SMP_ON_UP + .init.smpalt : { + __smpalt_begin = .; +- *(.alt.smp.init) ++ KEEP(*(.alt.smp.init)) + __smpalt_end = .; + } + #endif + .init.pv_table : { + __pv_table_begin = .; +- *(.pv_table) ++ KEEP(*(.pv_table)) + __pv_table_end = .; + } + +--- a/arch/arm/kernel/vmlinux.lds.h ++++ b/arch/arm/kernel/vmlinux.lds.h +@@ -28,7 +28,7 @@ + #define PROC_INFO \ + . = ALIGN(4); \ + __proc_info_begin = .; \ +- *(.proc.info.init) \ ++ KEEP(*(.proc.info.init)) \ + __proc_info_end = .; + + #define HYPERVISOR_TEXT \ +@@ -39,11 +39,11 @@ + #define IDMAP_TEXT \ + ALIGN_FUNCTION(); \ + __idmap_text_start = .; \ +- *(.idmap.text) \ ++ KEEP(*(.idmap.text)) \ + __idmap_text_end = .; \ + . = ALIGN(PAGE_SIZE); \ + __hyp_idmap_text_start = .; \ +- *(.hyp.idmap.text) \ ++ KEEP(*(.hyp.idmap.text)) \ + __hyp_idmap_text_end = .; + + #define ARM_DISCARD \ +@@ -86,12 +86,12 @@ + . = ALIGN(8); \ + .ARM.unwind_idx : { \ + __start_unwind_idx = .; \ +- *(.ARM.exidx*) \ ++ KEEP(*(.ARM.exidx*)) \ + __stop_unwind_idx = .; \ + } \ + .ARM.unwind_tab : { \ + __start_unwind_tab = .; \ +- *(.ARM.extab*) \ ++ KEEP(*(.ARM.extab*)) \ + __stop_unwind_tab = .; \ + } + +@@ -102,14 +102,14 @@ + #define ARM_VECTORS \ + __vectors_start = .; \ + .vectors 0xffff0000 : AT(__vectors_start) { \ +- *(.vectors) \ ++ KEEP(*(.vectors)) \ + } \ + . = __vectors_start + SIZEOF(.vectors); \ + __vectors_end = .; \ + \ + __stubs_start = .; \ + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \ +- *(.stubs) \ ++ KEEP(*(.stubs)) \ + } \ + . = __stubs_start + SIZEOF(.stubs); \ + __stubs_end = .; \ diff --git a/root/target/linux/generic/hack-5.4/221-module_exports.patch b/root/target/linux/generic/hack-5.4/221-module_exports.patch new file mode 100755 index 00000000..47f40ac5 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/221-module_exports.patch @@ -0,0 +1,109 @@ +From b14784e7883390c20ed3ff904892255404a5914b Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:05:53 +0200 +Subject: add an optional config option for stripping all unnecessary symbol exports from the kernel image + +lede-commit: bb5a40c64b7c4f4848509fa0a6625055fc9e66cc +Signed-off-by: Felix Fietkau +--- + include/asm-generic/vmlinux.lds.h | 18 +++++++++++++++--- + include/linux/export.h | 9 ++++++++- + scripts/Makefile.build | 2 +- + 3 files changed, 24 insertions(+), 5 deletions(-) + +--- a/include/asm-generic/vmlinux.lds.h ++++ b/include/asm-generic/vmlinux.lds.h +@@ -54,6 +54,16 @@ + #define LOAD_OFFSET 0 + #endif + ++#ifndef SYMTAB_KEEP ++#define SYMTAB_KEEP KEEP(*(SORT(___ksymtab+*))) ++#define SYMTAB_KEEP_GPL KEEP(*(SORT(___ksymtab_gpl+*))) ++#endif ++ ++#ifndef SYMTAB_DISCARD ++#define SYMTAB_DISCARD ++#define SYMTAB_DISCARD_GPL ++#endif ++ + /* Align . to a 8 byte boundary equals to maximum function alignment. */ + #define ALIGN_FUNCTION() . = ALIGN(8) + +@@ -407,14 +417,14 @@ + /* Kernel symbol table: Normal symbols */ \ + __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ + __start___ksymtab = .; \ +- KEEP(*(SORT(___ksymtab+*))) \ ++ SYMTAB_KEEP \ + __stop___ksymtab = .; \ + } \ + \ + /* Kernel symbol table: GPL-only symbols */ \ + __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \ + __start___ksymtab_gpl = .; \ +- KEEP(*(SORT(___ksymtab_gpl+*))) \ ++ SYMTAB_KEEP_GPL \ + __stop___ksymtab_gpl = .; \ + } \ + \ +@@ -476,7 +486,7 @@ + \ + /* Kernel symbol table: strings */ \ + __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \ +- *(__ksymtab_strings) \ ++ *(__ksymtab_strings+*) \ + } \ + \ + /* __*init sections */ \ +@@ -905,6 +915,8 @@ + EXIT_TEXT \ + EXIT_DATA \ + EXIT_CALL \ ++ SYMTAB_DISCARD \ ++ SYMTAB_DISCARD_GPL \ + *(.discard) \ + *(.discard.*) \ + *(.modinfo) \ +--- a/include/linux/export.h ++++ b/include/linux/export.h +@@ -98,18 +98,26 @@ struct kernel_symbol { + + #else + ++#ifdef MODULE ++#define __EXPORT_SUFFIX(sym) ++#else ++#define __EXPORT_SUFFIX(sym) "+" #sym ++#endif ++ + #define ___export_symbol_common(sym, sec) \ + extern typeof(sym) sym; \ + __CRC_SYMBOL(sym, sec); \ + static const char __kstrtab_##sym[] \ +- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \ ++ __attribute__((section("__ksymtab_strings" \ ++ __EXPORT_SUFFIX(sym)), used, aligned(1))) \ + = #sym \ + + /* For every exported symbol, place a struct in the __ksymtab section */ + #define ___EXPORT_SYMBOL_NS(sym, sec, ns) \ + ___export_symbol_common(sym, sec); \ + static const char __kstrtabns_##sym[] \ +- __attribute__((section("__ksymtab_strings"), used, aligned(1))) \ ++ __attribute__((section("__ksymtab_strings" \ ++ __EXPORT_SUFFIX(sym)), used, aligned(1))) \ + = #ns; \ + __KSYMTAB_ENTRY_NS(sym, sec) + +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -350,7 +350,7 @@ targets += $(extra-y) $(MAKECMDGOALS) $( + # Linker scripts preprocessor (.lds.S -> .lds) + # --------------------------------------------------------------------------- + quiet_cmd_cpp_lds_S = LDS $@ +- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -U$(ARCH) \ ++ cmd_cpp_lds_S = $(CPP) $(EXTRA_LDSFLAGS) $(cpp_flags) -P -U$(ARCH) \ + -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $< + + $(obj)/%.lds: $(src)/%.lds.S FORCE diff --git a/root/target/linux/generic/hack-5.4/230-openwrt_lzma_options.patch b/root/target/linux/generic/hack-5.4/230-openwrt_lzma_options.patch new file mode 100755 index 00000000..809ccbc1 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/230-openwrt_lzma_options.patch @@ -0,0 +1,71 @@ +From b3d00b452467f621317953d9e4c6f9ae8dcfd271 Mon Sep 17 00:00:00 2001 +From: Imre Kaloz +Date: Fri, 7 Jul 2017 17:06:55 +0200 +Subject: use the openwrt lzma options for now + +lede-commit: 548de949f392049420a6a1feeef118b30ab8ea8c +Signed-off-by: Imre Kaloz +--- + lib/decompress.c | 1 + + scripts/Makefile.lib | 2 +- + usr/gen_initramfs_list.sh | 10 +++++----- + 3 files changed, 7 insertions(+), 6 deletions(-) + +--- a/lib/decompress.c ++++ b/lib/decompress.c +@@ -49,6 +49,7 @@ static const struct compress_format comp + { {0x1f, 0x9e}, "gzip", gunzip }, + { {0x42, 0x5a}, "bzip2", bunzip2 }, + { {0x5d, 0x00}, "lzma", unlzma }, ++ { {0x6d, 0x00}, "lzma-openwrt", unlzma }, + { {0xfd, 0x37}, "xz", unxz }, + { {0x89, 0x4c}, "lzo", unlzo }, + { {0x02, 0x21}, "lz4", unlz4 }, +--- a/scripts/Makefile.lib ++++ b/scripts/Makefile.lib +@@ -328,7 +328,7 @@ quiet_cmd_bzip2 = BZIP2 $@ + # --------------------------------------------------------------------------- + + quiet_cmd_lzma = LZMA $@ +- cmd_lzma = { cat $(real-prereqs) | $(LZMA) -9; $(size_append); } > $@ ++ cmd_lzma = { cat $(real-prereqs) | $(LZMA) e -d20 -lc1 -lp2 -pb2 -eos -si -so; $(size_append); } > $@ + + quiet_cmd_lzo = LZO $@ + cmd_lzo = { cat $(real-prereqs) | $(KLZOP) -9; $(size_append); } > $@ +--- a/usr/gen_initramfs_list.sh ++++ b/usr/gen_initramfs_list.sh +@@ -229,7 +229,7 @@ cpio_list= + output="/dev/stdout" + output_file="" + is_cpio_compressed= +-compr="gzip -n -9 -f" ++compr="gzip -n -9 -f -" + + arg="$1" + case "$arg" in +@@ -245,13 +245,13 @@ case "$arg" in + output=${cpio_list} + echo "$output_file" | grep -q "\.gz$" \ + && [ -x "`which gzip 2> /dev/null`" ] \ +- && compr="gzip -n -9 -f" ++ && compr="gzip -n -9 -f -" + echo "$output_file" | grep -q "\.bz2$" \ + && [ -x "`which bzip2 2> /dev/null`" ] \ +- && compr="bzip2 -9 -f" ++ && compr="bzip2 -9 -f -" + echo "$output_file" | grep -q "\.lzma$" \ + && [ -x "`which lzma 2> /dev/null`" ] \ +- && compr="lzma -9 -f" ++ && compr="lzma e -d20 -lc1 -lp2 -pb2 -eos -si -so" + echo "$output_file" | grep -q "\.xz$" \ + && [ -x "`which xz 2> /dev/null`" ] \ + && compr="xz --check=crc32 --lzma2=dict=1MiB" +@@ -320,7 +320,7 @@ if [ ! -z ${output_file} ]; then + if [ "${is_cpio_compressed}" = "compressed" ]; then + cat ${cpio_tfile} > ${output_file} + else +- (cat ${cpio_tfile} | ${compr} - > ${output_file}) \ ++ (cat ${cpio_tfile} | ${compr} > ${output_file}) \ + || (rm -f ${output_file} ; false) + fi + [ -z ${cpio_file} ] && rm ${cpio_tfile} diff --git a/root/target/linux/generic/hack-5.4/249-udp-tunnel-selection.patch b/root/target/linux/generic/hack-5.4/249-udp-tunnel-selection.patch new file mode 100755 index 00000000..2c74298d --- /dev/null +++ b/root/target/linux/generic/hack-5.4/249-udp-tunnel-selection.patch @@ -0,0 +1,11 @@ +--- a/net/ipv4/Kconfig ++++ b/net/ipv4/Kconfig +@@ -315,7 +315,7 @@ config NET_IPVTI + on top. + + config NET_UDP_TUNNEL +- tristate ++ tristate "IP: UDP tunneling support" + select NET_IP_TUNNEL + default n + diff --git a/root/target/linux/generic/hack-5.4/250-netfilter_depends.patch b/root/target/linux/generic/hack-5.4/250-netfilter_depends.patch new file mode 100755 index 00000000..d03cb531 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/250-netfilter_depends.patch @@ -0,0 +1,27 @@ +From: Felix Fietkau +Subject: hack: net: remove bogus netfilter dependencies + +lede-commit: 589d2a377dee27d206fc3725325309cf649e4df6 +Signed-off-by: Felix Fietkau +--- + net/netfilter/Kconfig | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -228,7 +228,6 @@ config NF_CONNTRACK_FTP + + config NF_CONNTRACK_H323 + tristate "H.323 protocol support" +- depends on IPV6 || IPV6=n + depends on NETFILTER_ADVANCED + help + H.323 is a VoIP signalling protocol from ITU-T. As one of the most +@@ -1088,7 +1087,6 @@ config NETFILTER_XT_TARGET_SECMARK + + config NETFILTER_XT_TARGET_TCPMSS + tristate '"TCPMSS" target support' +- depends on IPV6 || IPV6=n + default m if NETFILTER_ADVANCED=n + ---help--- + This option adds a `TCPMSS' target, which allows you to alter the diff --git a/root/target/linux/generic/hack-5.4/251-sound_kconfig.patch b/root/target/linux/generic/hack-5.4/251-sound_kconfig.patch new file mode 100755 index 00000000..f593417c --- /dev/null +++ b/root/target/linux/generic/hack-5.4/251-sound_kconfig.patch @@ -0,0 +1,199 @@ +From da3c50704f14132f4adf80d48e9a4cd5d46e54c9 Mon Sep 17 00:00:00 2001 +From: John Crispin +Date: Fri, 7 Jul 2017 17:09:21 +0200 +Subject: kconfig: owrt specifc dependencies + +Signed-off-by: John Crispin +--- + crypto/Kconfig | 10 +++++----- + drivers/bcma/Kconfig | 1 + + drivers/ssb/Kconfig | 3 ++- + lib/Kconfig | 8 ++++---- + net/netfilter/Kconfig | 2 +- + net/wireless/Kconfig | 17 ++++++++++------- + sound/core/Kconfig | 4 ++-- + 7 files changed, 25 insertions(+), 20 deletions(-) + +--- a/crypto/Kconfig ++++ b/crypto/Kconfig +@@ -33,7 +33,7 @@ config CRYPTO_FIPS + this is. + + config CRYPTO_ALGAPI +- tristate ++ tristate "ALGAPI" + select CRYPTO_ALGAPI2 + help + This option provides the API for cryptographic algorithms. +@@ -42,7 +42,7 @@ config CRYPTO_ALGAPI2 + tristate + + config CRYPTO_AEAD +- tristate ++ tristate "AEAD" + select CRYPTO_AEAD2 + select CRYPTO_ALGAPI + +@@ -53,7 +53,7 @@ config CRYPTO_AEAD2 + select CRYPTO_RNG2 + + config CRYPTO_BLKCIPHER +- tristate ++ tristate "BLKCIPHER" + select CRYPTO_BLKCIPHER2 + select CRYPTO_ALGAPI + +@@ -63,7 +63,7 @@ config CRYPTO_BLKCIPHER2 + select CRYPTO_RNG2 + + config CRYPTO_HASH +- tristate ++ tristate "HASH" + select CRYPTO_HASH2 + select CRYPTO_ALGAPI + +@@ -72,7 +72,7 @@ config CRYPTO_HASH2 + select CRYPTO_ALGAPI2 + + config CRYPTO_RNG +- tristate ++ tristate "RNG" + select CRYPTO_RNG2 + select CRYPTO_ALGAPI + +--- a/drivers/bcma/Kconfig ++++ b/drivers/bcma/Kconfig +@@ -16,6 +16,7 @@ if BCMA + # Support for Block-I/O. SELECT this from the driver that needs it. + config BCMA_BLOCKIO + bool ++ default y + + config BCMA_HOST_PCI_POSSIBLE + bool +--- a/drivers/ssb/Kconfig ++++ b/drivers/ssb/Kconfig +@@ -29,6 +29,7 @@ config SSB_SPROM + config SSB_BLOCKIO + bool + depends on SSB ++ default y + + config SSB_PCIHOST_POSSIBLE + bool +@@ -49,7 +50,7 @@ config SSB_PCIHOST + config SSB_B43_PCI_BRIDGE + bool + depends on SSB_PCIHOST +- default n ++ default y + + config SSB_PCMCIAHOST_POSSIBLE + bool +--- a/lib/Kconfig ++++ b/lib/Kconfig +@@ -402,16 +402,16 @@ config BCH_CONST_T + # Textsearch support is select'ed if needed + # + config TEXTSEARCH +- bool ++ bool "Textsearch support" + + config TEXTSEARCH_KMP +- tristate ++ tristate "Textsearch KMP" + + config TEXTSEARCH_BM +- tristate ++ tristate "Textsearch BM" + + config TEXTSEARCH_FSM +- tristate ++ tristate "Textsearch FSM" + + config BTREE + bool +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -11,7 +11,7 @@ config NETFILTER_INGRESS + infrastructure. + + config NETFILTER_NETLINK +- tristate ++ tristate "Netfilter NFNETLINK interface" + + config NETFILTER_FAMILY_BRIDGE + bool +--- a/net/wireless/Kconfig ++++ b/net/wireless/Kconfig +@@ -1,6 +1,6 @@ + # SPDX-License-Identifier: GPL-2.0-only + config WIRELESS_EXT +- bool ++ bool "Wireless extensions" + + config WEXT_CORE + def_bool y +@@ -12,10 +12,10 @@ config WEXT_PROC + depends on WEXT_CORE + + config WEXT_SPY +- bool ++ bool "WEXT_SPY" + + config WEXT_PRIV +- bool ++ bool "WEXT_PRIV" + + config CFG80211 + tristate "cfg80211 - wireless configuration API" +@@ -203,7 +203,7 @@ config CFG80211_WEXT_EXPORT + endif # CFG80211 + + config LIB80211 +- tristate ++ tristate "LIB80211" + default n + help + This options enables a library of common routines used +@@ -212,17 +212,17 @@ config LIB80211 + Drivers should select this themselves if needed. + + config LIB80211_CRYPT_WEP +- tristate ++ tristate "LIB80211_CRYPT_WEP" + select CRYPTO_LIB_ARC4 + + config LIB80211_CRYPT_CCMP +- tristate ++ tristate "LIB80211_CRYPT_CCMP" + select CRYPTO + select CRYPTO_AES + select CRYPTO_CCM + + config LIB80211_CRYPT_TKIP +- tristate ++ tristate "LIB80211_CRYPT_TKIP" + select CRYPTO_LIB_ARC4 + + config LIB80211_DEBUG +--- a/sound/core/Kconfig ++++ b/sound/core/Kconfig +@@ -17,7 +17,7 @@ config SND_DMAENGINE_PCM + tristate + + config SND_HWDEP +- tristate ++ tristate "Sound hardware support" + + config SND_SEQ_DEVICE + tristate +@@ -27,7 +27,7 @@ config SND_RAWMIDI + select SND_SEQ_DEVICE if SND_SEQUENCER != n + + config SND_COMPRESS_OFFLOAD +- tristate ++ tristate "Compression offloading support" + + config SND_JACK + bool diff --git a/root/target/linux/generic/hack-5.4/259-regmap_dynamic.patch b/root/target/linux/generic/hack-5.4/259-regmap_dynamic.patch new file mode 100755 index 00000000..812e1824 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/259-regmap_dynamic.patch @@ -0,0 +1,125 @@ +From 811d9e2268a62b830cfe93cd8bc929afcb8b198b Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 15 Jul 2017 21:12:38 +0200 +Subject: kernel: move regmap bloat out of the kernel image if it is only being used in modules + +lede-commit: 96f39119815028073583e4fca3a9c5fe9141e998 +Signed-off-by: Felix Fietkau +--- + drivers/base/regmap/Kconfig | 15 ++++++++++----- + drivers/base/regmap/Makefile | 12 ++++++++---- + drivers/base/regmap/regmap.c | 3 +++ + include/linux/regmap.h | 2 +- + 4 files changed, 22 insertions(+), 10 deletions(-) + +--- a/drivers/base/regmap/Kconfig ++++ b/drivers/base/regmap/Kconfig +@@ -4,9 +4,8 @@ + # subsystems should select the appropriate symbols. + + config REGMAP +- default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SCCB || REGMAP_I3C) + select IRQ_DOMAIN if REGMAP_IRQ +- bool ++ tristate + + config REGCACHE_COMPRESSED + select LZO_COMPRESS +@@ -18,38 +17,49 @@ config REGMAP_AC97 + + config REGMAP_I2C + tristate ++ select REGMAP + depends on I2C + + config REGMAP_SLIMBUS + tristate ++ select REGMAP + depends on SLIMBUS + + config REGMAP_SPI + tristate ++ select REGMAP ++ depends on SPI_MASTER + depends on SPI + + config REGMAP_SPMI + tristate ++ select REGMAP + depends on SPMI + + config REGMAP_W1 + tristate ++ select REGMAP + depends on W1 + + config REGMAP_MMIO + tristate ++ select REGMAP + + config REGMAP_IRQ + bool ++ select REGMAP + + config REGMAP_SOUNDWIRE + tristate ++ select REGMAP + depends on SOUNDWIRE + + config REGMAP_SCCB + tristate ++ select REGMAP + depends on I2C + + config REGMAP_I3C + tristate ++ select REGMAP + depends on I3C +--- a/drivers/base/regmap/Makefile ++++ b/drivers/base/regmap/Makefile +@@ -2,10 +2,14 @@ + # For include/trace/define_trace.h to include trace.h + CFLAGS_regmap.o := -I$(src) + +-obj-$(CONFIG_REGMAP) += regmap.o regcache.o +-obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-flat.o +-obj-$(CONFIG_REGCACHE_COMPRESSED) += regcache-lzo.o +-obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o ++regmap-core-objs = regmap.o regcache.o regcache-rbtree.o regcache-flat.o ++ifdef CONFIG_DEBUG_FS ++regmap-core-objs += regmap-debugfs.o ++endif ++ifdef CONFIG_REGCACHE_COMPRESSED ++regmap-core-objs += regcache-lzo.o ++endif ++obj-$(CONFIG_REGMAP) += regmap-core.o + obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o + obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o + obj-$(CONFIG_REGMAP_SLIMBUS) += regmap-slimbus.o +--- a/drivers/base/regmap/regmap.c ++++ b/drivers/base/regmap/regmap.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -3118,3 +3119,5 @@ static int __init regmap_initcall(void) + return 0; + } + postcore_initcall(regmap_initcall); ++ ++MODULE_LICENSE("GPL"); +--- a/include/linux/regmap.h ++++ b/include/linux/regmap.h +@@ -185,7 +185,7 @@ struct reg_sequence { + pollret ?: ((cond) ? 0 : -ETIMEDOUT); \ + }) + +-#ifdef CONFIG_REGMAP ++#if IS_REACHABLE(CONFIG_REGMAP) + + enum regmap_endian { + /* Unspecified -> 0 -> Backwards compatible default */ diff --git a/root/target/linux/generic/hack-5.4/260-crypto_test_dependencies.patch b/root/target/linux/generic/hack-5.4/260-crypto_test_dependencies.patch new file mode 100755 index 00000000..c1b0b855 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/260-crypto_test_dependencies.patch @@ -0,0 +1,52 @@ +From fd1799b0bf5efa46dd3e6dfbbf3955564807e508 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:12:51 +0200 +Subject: kernel: prevent cryptomgr from pulling in useless extra dependencies for tests that are not run + +Reduces kernel size after LZMA by about 5k on MIPS + +lede-commit: 044c316167e076479a344c59905e5b435b84a77f +Signed-off-by: Felix Fietkau +--- + crypto/Kconfig | 13 ++++++------- + crypto/algboss.c | 4 ++++ + 2 files changed, 10 insertions(+), 7 deletions(-) + +--- a/crypto/Kconfig ++++ b/crypto/Kconfig +@@ -120,13 +120,13 @@ config CRYPTO_MANAGER + cbc(aes). + + config CRYPTO_MANAGER2 +- def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y) +- select CRYPTO_AEAD2 +- select CRYPTO_HASH2 +- select CRYPTO_BLKCIPHER2 +- select CRYPTO_AKCIPHER2 +- select CRYPTO_KPP2 +- select CRYPTO_ACOMP2 ++ def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y && !CRYPTO_MANAGER_DISABLE_TESTS) ++ select CRYPTO_AEAD2 if !CRYPTO_MANAGER_DISABLE_TESTS ++ select CRYPTO_HASH2 if !CRYPTO_MANAGER_DISABLE_TESTS ++ select CRYPTO_BLKCIPHER2 if !CRYPTO_MANAGER_DISABLE_TESTS ++ select CRYPTO_AKCIPHER2 if !CRYPTO_MANAGER_DISABLE_TESTS ++ select CRYPTO_KPP2 if !CRYPTO_MANAGER_DISABLE_TESTS ++ select CRYPTO_ACOMP2 if !CRYPTO_MANAGER_DISABLE_TESTS + + config CRYPTO_USER + tristate "Userspace cryptographic algorithm configuration" +--- a/crypto/algboss.c ++++ b/crypto/algboss.c +@@ -240,8 +240,12 @@ static int cryptomgr_schedule_test(struc + type = alg->cra_flags; + + /* Do not test internal algorithms. */ ++#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS ++ type |= CRYPTO_ALG_TESTED; ++#else + if (type & CRYPTO_ALG_INTERNAL) + type |= CRYPTO_ALG_TESTED; ++#endif + + param->type = type; + diff --git a/root/target/linux/generic/hack-5.4/260-lib-arc4-unhide.patch b/root/target/linux/generic/hack-5.4/260-lib-arc4-unhide.patch new file mode 100755 index 00000000..a7668acf --- /dev/null +++ b/root/target/linux/generic/hack-5.4/260-lib-arc4-unhide.patch @@ -0,0 +1,15 @@ +This makes it possible to select CONFIG_CRYPTO_LIB_ARC4 directly. We +need this to be able to compile this into the kernel and make use of it +from backports. + +--- a/lib/crypto/Kconfig ++++ b/lib/crypto/Kconfig +@@ -6,7 +6,7 @@ config CRYPTO_LIB_AES + tristate + + config CRYPTO_LIB_ARC4 +- tristate ++ tristate "ARC4 cipher library" + + config CRYPTO_ARCH_HAVE_LIB_BLAKE2S + tristate diff --git a/root/target/linux/generic/hack-5.4/280-rfkill-stubs.patch b/root/target/linux/generic/hack-5.4/280-rfkill-stubs.patch new file mode 100755 index 00000000..2e48aea1 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/280-rfkill-stubs.patch @@ -0,0 +1,84 @@ +From 236c1acdfef5958010ac9814a9872e0a46fd78ee Mon Sep 17 00:00:00 2001 +From: John Crispin +Date: Fri, 7 Jul 2017 17:13:44 +0200 +Subject: rfkill: add fake rfkill support + +allow building of modules depending on RFKILL even if RFKILL is not enabled. + +Signed-off-by: John Crispin +--- + include/linux/rfkill.h | 2 +- + net/Makefile | 2 +- + net/rfkill/Kconfig | 14 +++++++++----- + net/rfkill/Makefile | 2 +- + 4 files changed, 12 insertions(+), 8 deletions(-) + +--- a/include/linux/rfkill.h ++++ b/include/linux/rfkill.h +@@ -64,7 +64,7 @@ struct rfkill_ops { + int (*set_block)(void *data, bool blocked); + }; + +-#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) ++#if defined(CONFIG_RFKILL_FULL) || defined(CONFIG_RFKILL_FULL_MODULE) + /** + * rfkill_alloc - Allocate rfkill structure + * @name: name of the struct -- the string is not copied internally +--- a/net/Makefile ++++ b/net/Makefile +@@ -53,7 +53,7 @@ obj-$(CONFIG_TIPC) += tipc/ + obj-$(CONFIG_NETLABEL) += netlabel/ + obj-$(CONFIG_IUCV) += iucv/ + obj-$(CONFIG_SMC) += smc/ +-obj-$(CONFIG_RFKILL) += rfkill/ ++obj-$(CONFIG_RFKILL_FULL) += rfkill/ + obj-$(CONFIG_NET_9P) += 9p/ + obj-$(CONFIG_CAIF) += caif/ + ifneq ($(CONFIG_DCB),) +--- a/net/rfkill/Kconfig ++++ b/net/rfkill/Kconfig +@@ -2,7 +2,11 @@ + # + # RF switch subsystem configuration + # +-menuconfig RFKILL ++config RFKILL ++ bool ++ default y ++ ++menuconfig RFKILL_FULL + tristate "RF switch subsystem support" + help + Say Y here if you want to have control over RF switches +@@ -14,19 +18,19 @@ menuconfig RFKILL + # LED trigger support + config RFKILL_LEDS + bool +- depends on RFKILL ++ depends on RFKILL_FULL + depends on LEDS_TRIGGERS = y || RFKILL = LEDS_TRIGGERS + default y + + config RFKILL_INPUT + bool "RF switch input support" if EXPERT +- depends on RFKILL ++ depends on RFKILL_FULL + depends on INPUT = y || RFKILL = INPUT + default y if !EXPERT + + config RFKILL_GPIO + tristate "GPIO RFKILL driver" +- depends on RFKILL ++ depends on RFKILL_FULL + depends on GPIOLIB || COMPILE_TEST + default n + help +--- a/net/rfkill/Makefile ++++ b/net/rfkill/Makefile +@@ -5,5 +5,5 @@ + + rfkill-y += core.o + rfkill-$(CONFIG_RFKILL_INPUT) += input.o +-obj-$(CONFIG_RFKILL) += rfkill.o ++obj-$(CONFIG_RFKILL_FULL) += rfkill.o + obj-$(CONFIG_RFKILL_GPIO) += rfkill-gpio.o diff --git a/root/target/linux/generic/hack-5.4/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch b/root/target/linux/generic/hack-5.4/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch new file mode 100755 index 00000000..aed08a5e --- /dev/null +++ b/root/target/linux/generic/hack-5.4/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch @@ -0,0 +1,64 @@ +From: Ben Menchaca +Date: Fri, 7 Jun 2013 18:35:22 -0500 +Subject: MIPS: r4k_cache: use more efficient cache blast + +Optimize the compiler output for larger cache blast cases that are +common for DMA-based networking. + +Signed-off-by: Ben Menchaca +Signed-off-by: Felix Fietkau +--- +--- a/arch/mips/include/asm/r4kcache.h ++++ b/arch/mips/include/asm/r4kcache.h +@@ -617,14 +617,46 @@ static inline void prot##extra##blast_## + unsigned long end) \ + { \ + unsigned long lsize = cpu_##desc##_line_size(); \ ++ unsigned long lsize_2 = lsize * 2; \ ++ unsigned long lsize_3 = lsize * 3; \ ++ unsigned long lsize_4 = lsize * 4; \ ++ unsigned long lsize_5 = lsize * 5; \ ++ unsigned long lsize_6 = lsize * 6; \ ++ unsigned long lsize_7 = lsize * 7; \ ++ unsigned long lsize_8 = lsize * 8; \ + unsigned long addr = start & ~(lsize - 1); \ +- unsigned long aend = (end - 1) & ~(lsize - 1); \ ++ unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \ ++ int lines = (aend - addr) / lsize; \ + \ +- while (1) { \ ++ while (lines >= 8) { \ ++ prot##cache_op(hitop, addr); \ ++ prot##cache_op(hitop, addr + lsize); \ ++ prot##cache_op(hitop, addr + lsize_2); \ ++ prot##cache_op(hitop, addr + lsize_3); \ ++ prot##cache_op(hitop, addr + lsize_4); \ ++ prot##cache_op(hitop, addr + lsize_5); \ ++ prot##cache_op(hitop, addr + lsize_6); \ ++ prot##cache_op(hitop, addr + lsize_7); \ ++ addr += lsize_8; \ ++ lines -= 8; \ ++ } \ ++ \ ++ if (lines & 0x4) { \ ++ prot##cache_op(hitop, addr); \ ++ prot##cache_op(hitop, addr + lsize); \ ++ prot##cache_op(hitop, addr + lsize_2); \ ++ prot##cache_op(hitop, addr + lsize_3); \ ++ addr += lsize_4; \ ++ } \ ++ \ ++ if (lines & 0x2) { \ ++ prot##cache_op(hitop, addr); \ ++ prot##cache_op(hitop, addr + lsize); \ ++ addr += lsize_2; \ ++ } \ ++ \ ++ if (lines & 0x1) { \ + prot##cache_op(hitop, addr); \ +- if (addr == aend) \ +- break; \ +- addr += lsize; \ + } \ + } + diff --git a/root/target/linux/generic/hack-5.4/301-mips_image_cmdline_hack.patch b/root/target/linux/generic/hack-5.4/301-mips_image_cmdline_hack.patch new file mode 100755 index 00000000..ddae75f6 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/301-mips_image_cmdline_hack.patch @@ -0,0 +1,38 @@ +From: John Crispin +Subject: hack: kernel: add generic image_cmdline hack to MIPS targets + +lede-commit: d59f5b3a987a48508257a0ddbaeadc7909f9f976 +Signed-off-by: Gabor Juhos +--- + arch/mips/Kconfig | 4 ++++ + arch/mips/kernel/head.S | 6 ++++++ + 2 files changed, 10 insertions(+) + +--- a/arch/mips/Kconfig ++++ b/arch/mips/Kconfig +@@ -1164,6 +1164,10 @@ config SYNC_R4K + config MIPS_MACHINE + def_bool n + ++config IMAGE_CMDLINE_HACK ++ bool "OpenWrt specific image command line hack" ++ default n ++ + config NO_IOPORT_MAP + def_bool n + +--- a/arch/mips/kernel/head.S ++++ b/arch/mips/kernel/head.S +@@ -79,6 +79,12 @@ FEXPORT(__kernel_entry) + j kernel_entry + #endif /* CONFIG_BOOT_RAW */ + ++#ifdef CONFIG_IMAGE_CMDLINE_HACK ++ .ascii "CMDLINE:" ++EXPORT(__image_cmdline) ++ .fill 0x400 ++#endif /* CONFIG_IMAGE_CMDLINE_HACK */ ++ + __REF + + NESTED(kernel_entry, 16, sp) # kernel entry point diff --git a/root/target/linux/generic/hack-5.4/321-powerpc_crtsavres_prereq.patch b/root/target/linux/generic/hack-5.4/321-powerpc_crtsavres_prereq.patch new file mode 100755 index 00000000..8591705e --- /dev/null +++ b/root/target/linux/generic/hack-5.4/321-powerpc_crtsavres_prereq.patch @@ -0,0 +1,39 @@ +From 107c0964cb8db7ca28ac5199426414fdab3c274d Mon Sep 17 00:00:00 2001 +From: "Alexandros C. Couloumbis" +Date: Fri, 7 Jul 2017 17:14:51 +0200 +Subject: hack: arch: powerpc: drop register save/restore library from modules + +Upstream GCC uses a libgcc function for saving/restoring registers. This +makes the code bigger, and upstream kernels need to carry that function +for every single kernel module. Our GCC is patched to avoid those +references, so we can drop the extra bloat for modules. + +lede-commit: e8e1084654f50904e6bf77b70b2de3f137d7b3ec +Signed-off-by: Alexandros C. Couloumbis +--- + arch/powerpc/Makefile | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/powerpc/Makefile ++++ b/arch/powerpc/Makefile +@@ -61,20 +61,6 @@ machine-$(CONFIG_PPC64) += 64 + machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le + UTS_MACHINE := $(subst $(space),,$(machine-y)) + +-# XXX This needs to be before we override LD below +-ifdef CONFIG_PPC32 +-KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o +-else +-KBUILD_LDS_MODULE += $(srctree)/arch/powerpc/kernel/module.lds +-ifeq ($(call ld-ifversion, -ge, 225000000, y),y) +-# Have the linker provide sfpr if possible. +-# There is a corresponding test in arch/powerpc/lib/Makefile +-KBUILD_LDFLAGS_MODULE += --save-restore-funcs +-else +-KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o +-endif +-endif +- + ifdef CONFIG_CPU_LITTLE_ENDIAN + KBUILD_CFLAGS += -mlittle-endian + KBUILD_LDFLAGS += -EL diff --git a/root/target/linux/generic/hack-5.4/400-block-fit-partition-parser.patch b/root/target/linux/generic/hack-5.4/400-block-fit-partition-parser.patch new file mode 100755 index 00000000..6b3267ef --- /dev/null +++ b/root/target/linux/generic/hack-5.4/400-block-fit-partition-parser.patch @@ -0,0 +1,176 @@ +--- a/block/partitions/Kconfig ++++ b/block/partitions/Kconfig +@@ -101,6 +101,13 @@ config ATARI_PARTITION + Say Y here if you would like to use hard disks under Linux which + were partitioned under the Atari OS. + ++config FIT_PARTITION ++ bool "Flattened-Image-Tree (FIT) partition support" if PARTITION_ADVANCED ++ default n ++ help ++ Say Y here if your system needs to mount the filesystem part of ++ a Flattened-Image-Tree (FIT) image commonly used with Das U-Boot. ++ + config IBM_PARTITION + bool "IBM disk label and partition support" + depends on PARTITION_ADVANCED && S390 +--- a/block/partitions/Makefile ++++ b/block/partitions/Makefile +@@ -9,6 +9,7 @@ obj-$(CONFIG_ACORN_PARTITION) += acorn.o + obj-$(CONFIG_AMIGA_PARTITION) += amiga.o + obj-$(CONFIG_ATARI_PARTITION) += atari.o + obj-$(CONFIG_AIX_PARTITION) += aix.o ++obj-$(CONFIG_FIT_PARTITION) += fit.o + obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o + obj-$(CONFIG_MAC_PARTITION) += mac.o + obj-$(CONFIG_LDM_PARTITION) += ldm.o +--- a/drivers/mtd/ubi/block.c ++++ b/drivers/mtd/ubi/block.c +@@ -396,7 +396,7 @@ int ubiblock_create(struct ubi_volume_in + dev->leb_size = vi->usable_leb_size; + + /* Initialize the gendisk of this ubiblock device */ +- gd = alloc_disk(1); ++ gd = alloc_disk(0); + if (!gd) { + pr_err("UBI: block: alloc_disk failed\n"); + ret = -ENODEV; +@@ -413,6 +413,7 @@ int ubiblock_create(struct ubi_volume_in + goto out_put_disk; + } + gd->private_data = dev; ++ gd->flags |= GENHD_FL_EXT_DEVT; + sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id); + set_capacity(gd, disk_capacity); + dev->gd = gd; +--- a/block/partition-generic.c ++++ b/block/partition-generic.c +@@ -18,6 +18,10 @@ + #include + #include + #include ++#ifdef CONFIG_FIT_PARTITION ++#include ++#endif ++ + + #include "partitions/check.h" + +@@ -180,6 +184,18 @@ ssize_t part_fail_store(struct device *d + } + #endif + ++static ssize_t part_name_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct hd_struct *p = dev_to_part(dev); ++ ++ if (p->info && p->info->volname) ++ return sprintf(buf, "%s\n", p->info->volname); ++ ++ buf[0] = '\0'; ++ return 0; ++} ++ + static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); + static DEVICE_ATTR(start, 0444, part_start_show, NULL); + static DEVICE_ATTR(size, 0444, part_size_show, NULL); +@@ -188,6 +204,7 @@ static DEVICE_ATTR(alignment_offset, 044 + static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL); + static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); + static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); ++static DEVICE_ATTR(name, 0444, part_name_show, NULL); + #ifdef CONFIG_FAIL_MAKE_REQUEST + static struct device_attribute dev_attr_fail = + __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); +@@ -202,6 +219,7 @@ static struct attribute *part_attrs[] = + &dev_attr_discard_alignment.attr, + &dev_attr_stat.attr, + &dev_attr_inflight.attr, ++ &dev_attr_name.attr, + #ifdef CONFIG_FAIL_MAKE_REQUEST + &dev_attr_fail.attr, + #endif +@@ -634,6 +652,10 @@ rescan: + if (state->parts[p].flags & ADDPART_FLAG_RAID) + md_autodetect_dev(part_to_dev(part)->devt); + #endif ++#ifdef CONFIG_FIT_PARTITION ++ if ((state->parts[p].flags & ADDPART_FLAG_ROOTDEV) && ROOT_DEV == 0) ++ ROOT_DEV = part_to_dev(part)->devt; ++#endif + } + free_partitions(state); + return 0; +--- a/block/partitions/check.c ++++ b/block/partitions/check.c +@@ -33,6 +33,7 @@ + #include "ibm.h" + #include "ultrix.h" + #include "efi.h" ++#include "fit.h" + #include "karma.h" + #include "sysv68.h" + #include "cmdline.h" +@@ -73,6 +74,9 @@ static int (*check_part[])(struct parsed + #ifdef CONFIG_EFI_PARTITION + efi_partition, /* this must come before msdos */ + #endif ++#ifdef CONFIG_FIT_PARTITION ++ fit_partition, ++#endif + #ifdef CONFIG_SGI_PARTITION + sgi_partition, + #endif +--- a/include/linux/genhd.h ++++ b/include/linux/genhd.h +@@ -614,6 +614,7 @@ struct unixware_disklabel { + #define ADDPART_FLAG_NONE 0 + #define ADDPART_FLAG_RAID 1 + #define ADDPART_FLAG_WHOLEDISK 2 ++#define ADDPART_FLAG_ROOTDEV 4 + + extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); + extern void blk_free_devt(dev_t devt); +--- /dev/null ++++ b/block/partitions/fit.h +@@ -0,0 +1,3 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++int fit_partition(struct parsed_partitions *); ++int parse_fit_partitions(struct parsed_partitions *state, u64 start_sector, u64 nr_sectors, int *slot, int add_remain); +--- a/block/partitions/efi.c ++++ b/block/partitions/efi.c +@@ -681,6 +681,9 @@ int efi_partition(struct parsed_partitio + gpt_entry *ptes = NULL; + u32 i; + unsigned ssz = bdev_logical_block_size(state->bdev) / 512; ++#ifdef CONFIG_FIT_PARTITION ++ u32 extra_slot = 64; ++#endif + + if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { + kfree(gpt); +@@ -722,6 +725,11 @@ int efi_partition(struct parsed_partitio + label_count++; + } + state->parts[i + 1].has_info = true; ++#ifdef CONFIG_FIT_PARTITION ++ /* If this is a U-Boot FIT volume it may have subpartitions */ ++ if (!efi_guidcmp(ptes[i].partition_type_guid, PARTITION_LINUX_FIT_GUID)) ++ (void) parse_fit_partitions(state, start * ssz, size * ssz, &extra_slot, 1); ++#endif + } + kfree(ptes); + kfree(gpt); +--- a/block/partitions/efi.h ++++ b/block/partitions/efi.h +@@ -52,6 +52,9 @@ + #define PARTITION_LINUX_LVM_GUID \ + EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \ + 0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28) ++#define PARTITION_LINUX_FIT_GUID \ ++ EFI_GUID( 0xcae9be83, 0xb15f, 0x49cc, \ ++ 0x86, 0x3f, 0x08, 0x1b, 0x74, 0x4a, 0x2d, 0x93) + + typedef struct _gpt_header { + __le64 signature; diff --git a/root/target/linux/generic/hack-5.4/400-unlock_mx25l6406e_with_4bit_block_protect.patch b/root/target/linux/generic/hack-5.4/400-unlock_mx25l6406e_with_4bit_block_protect.patch new file mode 100755 index 00000000..8112fa7e --- /dev/null +++ b/root/target/linux/generic/hack-5.4/400-unlock_mx25l6406e_with_4bit_block_protect.patch @@ -0,0 +1,69 @@ +--- a/drivers/mtd/spi-nor/spi-nor.c ++++ b/drivers/mtd/spi-nor/spi-nor.c +@@ -196,7 +196,7 @@ struct flash_info { + u16 page_size; + u16 addr_width; + +- u16 flags; ++ u32 flags; + #define SECT_4K BIT(0) /* SPINOR_OP_BE_4K works uniformly */ + #define SPI_NOR_NO_ERASE BIT(1) /* No erase command needed */ + #define SST_WRITE BIT(2) /* use SST byte programming */ +@@ -233,6 +233,10 @@ struct flash_info { + #define SPI_NOR_SKIP_SFDP BIT(13) /* Skip parsing of SFDP tables */ + #define USE_CLSR BIT(14) /* use CLSR command */ + #define SPI_NOR_OCTAL_READ BIT(15) /* Flash supports Octal Read */ ++#define SPI_NOR_4BIT_BP BIT(17) /* ++ * Flash SR has 4 bit fields (BP0-3) ++ * for block protection. ++ */ + + /* Part specific fixup hooks. */ + const struct spi_nor_fixups *fixups; +@@ -1985,6 +1989,9 @@ static int spi_nor_clear_sr_bp(struct sp + int ret; + u8 mask = SR_BP2 | SR_BP1 | SR_BP0; + ++ if (nor->flags & SNOR_F_HAS_4BIT_BP) ++ mask |= SR_BP3; ++ + ret = read_sr(nor); + if (ret < 0) { + dev_err(nor->dev, "error while reading status register\n"); +@@ -2338,7 +2345,7 @@ static const struct flash_info spi_nor_i + { "mx25l1606e", INFO(0xc22015, 0, 64 * 1024, 32, SECT_4K) }, + { "mx25l3205d", INFO(0xc22016, 0, 64 * 1024, 64, SECT_4K) }, + { "mx25l3255e", INFO(0xc29e16, 0, 64 * 1024, 64, SECT_4K) }, +- { "mx25l6405d", INFO(0xc22017, 0, 64 * 1024, 128, SECT_4K) }, ++ { "mx25l6405d", INFO(0xc22017, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_4BIT_BP) }, + { "mx25u2033e", INFO(0xc22532, 0, 64 * 1024, 4, SECT_4K) }, + { "mx25u3235f", INFO(0xc22536, 0, 64 * 1024, 64, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, +@@ -5026,6 +5033,9 @@ int spi_nor_scan(struct spi_nor *nor, co + if (info->flags & USE_CLSR) + nor->flags |= SNOR_F_USE_CLSR; + ++ if (info->flags & SPI_NOR_4BIT_BP) ++ nor->flags |= SNOR_F_HAS_4BIT_BP; ++ + if (info->flags & SPI_NOR_NO_ERASE) + mtd->flags |= MTD_NO_ERASE; + +--- a/include/linux/mtd/spi-nor.h ++++ b/include/linux/mtd/spi-nor.h +@@ -127,6 +127,7 @@ + #define SR_BP0 BIT(2) /* Block protect 0 */ + #define SR_BP1 BIT(3) /* Block protect 1 */ + #define SR_BP2 BIT(4) /* Block protect 2 */ ++#define SR_BP3 BIT(5) /* Block protect 3 */ + #define SR_TB BIT(5) /* Top/Bottom protect */ + #define SR_SRWD BIT(7) /* SR write protect */ + /* Spansion/Cypress specific status bits */ +@@ -243,6 +244,7 @@ enum spi_nor_option_flags { + SNOR_F_4B_OPCODES = BIT(6), + SNOR_F_HAS_4BAIT = BIT(7), + SNOR_F_HAS_LOCK = BIT(8), ++ SNOR_F_HAS_4BIT_BP = BIT(12), + }; + + /** diff --git a/root/target/linux/generic/hack-5.4/531-debloat_lzma.patch b/root/target/linux/generic/hack-5.4/531-debloat_lzma.patch new file mode 100755 index 00000000..2f70eee3 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/531-debloat_lzma.patch @@ -0,0 +1,1040 @@ +From 3fd297761ac246c54d7723c57fca95c112b99465 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 15 Jul 2017 21:15:44 +0200 +Subject: lzma: de-bloat the lzma library used by jffs2 + +lede-commit: 3fd1dd08fbcbb78b34efefd32c3032e5c99108d6 +Signed-off-by: Felix Fietkau +--- + include/linux/lzma/LzFind.h | 17 --- + include/linux/lzma/LzmaDec.h | 101 --------------- + include/linux/lzma/LzmaEnc.h | 20 --- + lib/lzma/LzFind.c | 287 ++++--------------------------------------- + lib/lzma/LzmaDec.c | 86 +------------ + lib/lzma/LzmaEnc.c | 172 ++------------------------ + 6 files changed, 42 insertions(+), 641 deletions(-) + +--- a/include/linux/lzma/LzFind.h ++++ b/include/linux/lzma/LzFind.h +@@ -55,11 +55,6 @@ typedef struct _CMatchFinder + + #define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) + +-int MatchFinder_NeedMove(CMatchFinder *p); +-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +-void MatchFinder_MoveBlock(CMatchFinder *p); +-void MatchFinder_ReadIfRequired(CMatchFinder *p); +- + void MatchFinder_Construct(CMatchFinder *p); + + /* Conditions: +@@ -70,12 +65,6 @@ int MatchFinder_Create(CMatchFinder *p, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAlloc *alloc); + void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc); +-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems); +-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); +- +-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, +- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, +- UInt32 *distances, UInt32 maxLen); + + /* + Conditions: +@@ -102,12 +91,6 @@ typedef struct _IMatchFinder + + void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); + +-void MatchFinder_Init(CMatchFinder *p); +-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +- + #ifdef __cplusplus + } + #endif +--- a/include/linux/lzma/LzmaDec.h ++++ b/include/linux/lzma/LzmaDec.h +@@ -31,14 +31,6 @@ typedef struct _CLzmaProps + UInt32 dicSize; + } CLzmaProps; + +-/* LzmaProps_Decode - decodes properties +-Returns: +- SZ_OK +- SZ_ERROR_UNSUPPORTED - Unsupported properties +-*/ +- +-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); +- + + /* ---------- LZMA Decoder state ---------- */ + +@@ -70,8 +62,6 @@ typedef struct + + #define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } + +-void LzmaDec_Init(CLzmaDec *p); +- + /* There are two types of LZMA streams: + 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. + 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ +@@ -108,97 +98,6 @@ typedef enum + + /* ELzmaStatus is used only as output value for function call */ + +- +-/* ---------- Interfaces ---------- */ +- +-/* There are 3 levels of interfaces: +- 1) Dictionary Interface +- 2) Buffer Interface +- 3) One Call Interface +- You can select any of these interfaces, but don't mix functions from different +- groups for same object. */ +- +- +-/* There are two variants to allocate state for Dictionary Interface: +- 1) LzmaDec_Allocate / LzmaDec_Free +- 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs +- You can use variant 2, if you set dictionary buffer manually. +- For Buffer Interface you must always use variant 1. +- +-LzmaDec_Allocate* can return: +- SZ_OK +- SZ_ERROR_MEM - Memory allocation error +- SZ_ERROR_UNSUPPORTED - Unsupported properties +-*/ +- +-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc); +-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); +- +-SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); +-void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); +- +-/* ---------- Dictionary Interface ---------- */ +- +-/* You can use it, if you want to eliminate the overhead for data copying from +- dictionary to some other external buffer. +- You must work with CLzmaDec variables directly in this interface. +- +- STEPS: +- LzmaDec_Constr() +- LzmaDec_Allocate() +- for (each new stream) +- { +- LzmaDec_Init() +- while (it needs more decompression) +- { +- LzmaDec_DecodeToDic() +- use data from CLzmaDec::dic and update CLzmaDec::dicPos +- } +- } +- LzmaDec_Free() +-*/ +- +-/* LzmaDec_DecodeToDic +- +- The decoding to internal dictionary buffer (CLzmaDec::dic). +- You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! +- +-finishMode: +- It has meaning only if the decoding reaches output limit (dicLimit). +- LZMA_FINISH_ANY - Decode just dicLimit bytes. +- LZMA_FINISH_END - Stream must be finished after dicLimit. +- +-Returns: +- SZ_OK +- status: +- LZMA_STATUS_FINISHED_WITH_MARK +- LZMA_STATUS_NOT_FINISHED +- LZMA_STATUS_NEEDS_MORE_INPUT +- LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK +- SZ_ERROR_DATA - Data error +-*/ +- +-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, +- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); +- +- +-/* ---------- Buffer Interface ---------- */ +- +-/* It's zlib-like interface. +- See LzmaDec_DecodeToDic description for information about STEPS and return results, +- but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need +- to work with CLzmaDec variables manually. +- +-finishMode: +- It has meaning only if the decoding reaches output limit (*destLen). +- LZMA_FINISH_ANY - Decode just destLen bytes. +- LZMA_FINISH_END - Stream must be finished after (*destLen). +-*/ +- +-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, +- const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); +- +- + /* ---------- One Call Interface ---------- */ + + /* LzmaDecode +--- a/include/linux/lzma/LzmaEnc.h ++++ b/include/linux/lzma/LzmaEnc.h +@@ -31,9 +31,6 @@ typedef struct _CLzmaEncProps + } CLzmaEncProps; + + void LzmaEncProps_Init(CLzmaEncProps *p); +-void LzmaEncProps_Normalize(CLzmaEncProps *p); +-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); +- + + /* ---------- CLzmaEncHandle Interface ---------- */ + +@@ -53,26 +50,9 @@ CLzmaEncHandle LzmaEnc_Create(ISzAlloc * + void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig); + SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); + SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +-SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, +- ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + +-/* ---------- One Call Interface ---------- */ +- +-/* LzmaEncode +-Return code: +- SZ_OK - OK +- SZ_ERROR_MEM - Memory allocation error +- SZ_ERROR_PARAM - Incorrect paramater +- SZ_ERROR_OUTPUT_EOF - output buffer overflow +- SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +-*/ +- +-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, +- const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, +- ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); +- + #ifdef __cplusplus + } + #endif +--- a/lib/lzma/LzFind.c ++++ b/lib/lzma/LzFind.c +@@ -14,9 +14,15 @@ + + #define kStartMaxLen 3 + ++#if 0 ++#define DIRECT_INPUT p->directInput ++#else ++#define DIRECT_INPUT 1 ++#endif ++ + static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc) + { +- if (!p->directInput) ++ if (!DIRECT_INPUT) + { + alloc->Free(alloc, p->bufferBase); + p->bufferBase = 0; +@@ -28,7 +34,7 @@ static void LzInWindow_Free(CMatchFinder + static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc) + { + UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; +- if (p->directInput) ++ if (DIRECT_INPUT) + { + p->blockSize = blockSize; + return 1; +@@ -42,12 +48,12 @@ static int LzInWindow_Create(CMatchFinde + return (p->bufferBase != 0); + } + +-Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +-Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; } ++static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } ++static Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; } + +-UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } ++static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } + +-void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) ++static void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) + { + p->posLimit -= subValue; + p->pos -= subValue; +@@ -58,7 +64,7 @@ static void MatchFinder_ReadBlock(CMatch + { + if (p->streamEndWasReached || p->result != SZ_OK) + return; +- if (p->directInput) ++ if (DIRECT_INPUT) + { + UInt32 curSize = 0xFFFFFFFF - p->streamPos; + if (curSize > p->directInputRem) +@@ -89,7 +95,7 @@ static void MatchFinder_ReadBlock(CMatch + } + } + +-void MatchFinder_MoveBlock(CMatchFinder *p) ++static void MatchFinder_MoveBlock(CMatchFinder *p) + { + memmove(p->bufferBase, + p->buffer - p->keepSizeBefore, +@@ -97,22 +103,14 @@ void MatchFinder_MoveBlock(CMatchFinder + p->buffer = p->bufferBase + p->keepSizeBefore; + } + +-int MatchFinder_NeedMove(CMatchFinder *p) ++static int MatchFinder_NeedMove(CMatchFinder *p) + { +- if (p->directInput) ++ if (DIRECT_INPUT) + return 0; + /* if (p->streamEndWasReached) return 0; */ + return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); + } + +-void MatchFinder_ReadIfRequired(CMatchFinder *p) +-{ +- if (p->streamEndWasReached) +- return; +- if (p->keepSizeAfter >= p->streamPos - p->pos) +- MatchFinder_ReadBlock(p); +-} +- + static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) + { + if (MatchFinder_NeedMove(p)) +@@ -268,7 +266,7 @@ static void MatchFinder_SetLimits(CMatch + p->posLimit = p->pos + limit; + } + +-void MatchFinder_Init(CMatchFinder *p) ++static void MatchFinder_Init(CMatchFinder *p) + { + UInt32 i; + for (i = 0; i < p->hashSizeSum; i++) +@@ -287,7 +285,7 @@ static UInt32 MatchFinder_GetSubValue(CM + return (p->pos - p->historySize - 1) & kNormalizeMask; + } + +-void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems) ++static void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems) + { + UInt32 i; + for (i = 0; i < numItems; i++) +@@ -319,38 +317,7 @@ static void MatchFinder_CheckLimits(CMat + MatchFinder_SetLimits(p); + } + +-static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, +- UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, +- UInt32 *distances, UInt32 maxLen) +-{ +- son[_cyclicBufferPos] = curMatch; +- for (;;) +- { +- UInt32 delta = pos - curMatch; +- if (cutValue-- == 0 || delta >= _cyclicBufferSize) +- return distances; +- { +- const Byte *pb = cur - delta; +- curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; +- if (pb[maxLen] == cur[maxLen] && *pb == *cur) +- { +- UInt32 len = 0; +- while (++len != lenLimit) +- if (pb[len] != cur[len]) +- break; +- if (maxLen < len) +- { +- *distances++ = maxLen = len; +- *distances++ = delta - 1; +- if (len == lenLimit) +- return distances; +- } +- } +- } +- } +-} +- +-UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, ++static UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *distances, UInt32 maxLen) + { +@@ -460,10 +427,10 @@ static void SkipMatchesSpec(UInt32 lenLi + p->buffer++; \ + if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); + +-#define MOVE_POS_RET MOVE_POS return offset; +- + static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } + ++#define MOVE_POS_RET MatchFinder_MovePos(p); return offset; ++ + #define GET_MATCHES_HEADER2(minLen, ret_op) \ + UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \ + lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ +@@ -479,62 +446,7 @@ static void MatchFinder_MovePos(CMatchFi + distances + offset, maxLen) - distances); MOVE_POS_RET; + + #define SKIP_FOOTER \ +- SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; +- +-static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +-{ +- UInt32 offset; +- GET_MATCHES_HEADER(2) +- HASH2_CALC; +- curMatch = p->hash[hashValue]; +- p->hash[hashValue] = p->pos; +- offset = 0; +- GET_MATCHES_FOOTER(offset, 1) +-} +- +-UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +-{ +- UInt32 offset; +- GET_MATCHES_HEADER(3) +- HASH_ZIP_CALC; +- curMatch = p->hash[hashValue]; +- p->hash[hashValue] = p->pos; +- offset = 0; +- GET_MATCHES_FOOTER(offset, 2) +-} +- +-static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +-{ +- UInt32 hash2Value, delta2, maxLen, offset; +- GET_MATCHES_HEADER(3) +- +- HASH3_CALC; +- +- delta2 = p->pos - p->hash[hash2Value]; +- curMatch = p->hash[kFix3HashSize + hashValue]; +- +- p->hash[hash2Value] = +- p->hash[kFix3HashSize + hashValue] = p->pos; +- +- +- maxLen = 2; +- offset = 0; +- if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) +- { +- for (; maxLen != lenLimit; maxLen++) +- if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) +- break; +- distances[0] = maxLen; +- distances[1] = delta2 - 1; +- offset = 2; +- if (maxLen == lenLimit) +- { +- SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); +- MOVE_POS_RET; +- } +- } +- GET_MATCHES_FOOTER(offset, maxLen) +-} ++ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MatchFinder_MovePos(p); + + static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + { +@@ -583,108 +495,6 @@ static UInt32 Bt4_MatchFinder_GetMatches + GET_MATCHES_FOOTER(offset, maxLen) + } + +-static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +-{ +- UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; +- GET_MATCHES_HEADER(4) +- +- HASH4_CALC; +- +- delta2 = p->pos - p->hash[ hash2Value]; +- delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; +- curMatch = p->hash[kFix4HashSize + hashValue]; +- +- p->hash[ hash2Value] = +- p->hash[kFix3HashSize + hash3Value] = +- p->hash[kFix4HashSize + hashValue] = p->pos; +- +- maxLen = 1; +- offset = 0; +- if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) +- { +- distances[0] = maxLen = 2; +- distances[1] = delta2 - 1; +- offset = 2; +- } +- if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) +- { +- maxLen = 3; +- distances[offset + 1] = delta3 - 1; +- offset += 2; +- delta2 = delta3; +- } +- if (offset != 0) +- { +- for (; maxLen != lenLimit; maxLen++) +- if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) +- break; +- distances[offset - 2] = maxLen; +- if (maxLen == lenLimit) +- { +- p->son[p->cyclicBufferPos] = curMatch; +- MOVE_POS_RET; +- } +- } +- if (maxLen < 3) +- maxLen = 3; +- offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), +- distances + offset, maxLen) - (distances)); +- MOVE_POS_RET +-} +- +-UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +-{ +- UInt32 offset; +- GET_MATCHES_HEADER(3) +- HASH_ZIP_CALC; +- curMatch = p->hash[hashValue]; +- p->hash[hashValue] = p->pos; +- offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), +- distances, 2) - (distances)); +- MOVE_POS_RET +-} +- +-static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +-{ +- do +- { +- SKIP_HEADER(2) +- HASH2_CALC; +- curMatch = p->hash[hashValue]; +- p->hash[hashValue] = p->pos; +- SKIP_FOOTER +- } +- while (--num != 0); +-} +- +-void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +-{ +- do +- { +- SKIP_HEADER(3) +- HASH_ZIP_CALC; +- curMatch = p->hash[hashValue]; +- p->hash[hashValue] = p->pos; +- SKIP_FOOTER +- } +- while (--num != 0); +-} +- +-static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +-{ +- do +- { +- UInt32 hash2Value; +- SKIP_HEADER(3) +- HASH3_CALC; +- curMatch = p->hash[kFix3HashSize + hashValue]; +- p->hash[hash2Value] = +- p->hash[kFix3HashSize + hashValue] = p->pos; +- SKIP_FOOTER +- } +- while (--num != 0); +-} +- + static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) + { + do +@@ -701,61 +511,12 @@ static void Bt4_MatchFinder_Skip(CMatchF + while (--num != 0); + } + +-static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +-{ +- do +- { +- UInt32 hash2Value, hash3Value; +- SKIP_HEADER(4) +- HASH4_CALC; +- curMatch = p->hash[kFix4HashSize + hashValue]; +- p->hash[ hash2Value] = +- p->hash[kFix3HashSize + hash3Value] = +- p->hash[kFix4HashSize + hashValue] = p->pos; +- p->son[p->cyclicBufferPos] = curMatch; +- MOVE_POS +- } +- while (--num != 0); +-} +- +-void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +-{ +- do +- { +- SKIP_HEADER(3) +- HASH_ZIP_CALC; +- curMatch = p->hash[hashValue]; +- p->hash[hashValue] = p->pos; +- p->son[p->cyclicBufferPos] = curMatch; +- MOVE_POS +- } +- while (--num != 0); +-} +- + void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) + { + vTable->Init = (Mf_Init_Func)MatchFinder_Init; + vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; +- if (!p->btMode) +- { +- vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; +- vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; +- } +- else if (p->numHashBytes == 2) +- { +- vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; +- vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; +- } +- else if (p->numHashBytes == 3) +- { +- vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; +- vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; +- } +- else +- { +- vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; +- vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; +- } ++ vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; ++ vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } +--- a/lib/lzma/LzmaDec.c ++++ b/lib/lzma/LzmaDec.c +@@ -682,7 +682,7 @@ static void LzmaDec_InitRc(CLzmaDec *p, + p->needFlush = 0; + } + +-void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) ++static void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) + { + p->needFlush = 1; + p->remainLen = 0; +@@ -698,7 +698,7 @@ void LzmaDec_InitDicAndState(CLzmaDec *p + p->needInitState = 1; + } + +-void LzmaDec_Init(CLzmaDec *p) ++static void LzmaDec_Init(CLzmaDec *p) + { + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +@@ -716,7 +716,7 @@ static void LzmaDec_InitStateReal(CLzmaD + p->needInitState = 0; + } + +-SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ++static SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) + { + SizeT inSize = *srcLen; +@@ -837,65 +837,13 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, Si + return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; + } + +-SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +-{ +- SizeT outSize = *destLen; +- SizeT inSize = *srcLen; +- *srcLen = *destLen = 0; +- for (;;) +- { +- SizeT inSizeCur = inSize, outSizeCur, dicPos; +- ELzmaFinishMode curFinishMode; +- SRes res; +- if (p->dicPos == p->dicBufSize) +- p->dicPos = 0; +- dicPos = p->dicPos; +- if (outSize > p->dicBufSize - dicPos) +- { +- outSizeCur = p->dicBufSize; +- curFinishMode = LZMA_FINISH_ANY; +- } +- else +- { +- outSizeCur = dicPos + outSize; +- curFinishMode = finishMode; +- } +- +- res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); +- src += inSizeCur; +- inSize -= inSizeCur; +- *srcLen += inSizeCur; +- outSizeCur = p->dicPos - dicPos; +- memcpy(dest, p->dic + dicPos, outSizeCur); +- dest += outSizeCur; +- outSize -= outSizeCur; +- *destLen += outSizeCur; +- if (res != 0) +- return res; +- if (outSizeCur == 0 || outSize == 0) +- return SZ_OK; +- } +-} +- +-void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) ++static void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) + { + alloc->Free(alloc, p->probs); + p->probs = 0; + } + +-static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) +-{ +- alloc->Free(alloc, p->dic); +- p->dic = 0; +-} +- +-void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) +-{ +- LzmaDec_FreeProbs(p, alloc); +- LzmaDec_FreeDict(p, alloc); +-} +- +-SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) ++static SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) + { + UInt32 dicSize; + Byte d; +@@ -935,7 +883,7 @@ static SRes LzmaDec_AllocateProbs2(CLzma + return SZ_OK; + } + +-SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) ++static SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) + { + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); +@@ -943,28 +891,6 @@ SRes LzmaDec_AllocateProbs(CLzmaDec *p, + p->prop = propNew; + return SZ_OK; + } +- +-SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +-{ +- CLzmaProps propNew; +- SizeT dicBufSize; +- RINOK(LzmaProps_Decode(&propNew, props, propsSize)); +- RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); +- dicBufSize = propNew.dicSize; +- if (p->dic == 0 || dicBufSize != p->dicBufSize) +- { +- LzmaDec_FreeDict(p, alloc); +- p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize); +- if (p->dic == 0) +- { +- LzmaDec_FreeProbs(p, alloc); +- return SZ_ERROR_MEM; +- } +- } +- p->dicBufSize = dicBufSize; +- p->prop = propNew; +- return SZ_OK; +-} + + SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, +--- a/lib/lzma/LzmaEnc.c ++++ b/lib/lzma/LzmaEnc.c +@@ -53,7 +53,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p) + p->writeEndMark = 0; + } + +-void LzmaEncProps_Normalize(CLzmaEncProps *p) ++static void LzmaEncProps_Normalize(CLzmaEncProps *p) + { + int level = p->level; + if (level < 0) level = 5; +@@ -76,7 +76,7 @@ void LzmaEncProps_Normalize(CLzmaEncProp + #endif + } + +-UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) ++static UInt32 __maybe_unused LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) + { + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); +@@ -93,7 +93,7 @@ UInt32 LzmaEncProps_GetDictSize(const CL + + #define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); } + +-UInt32 GetPosSlot1(UInt32 pos) ++static UInt32 GetPosSlot1(UInt32 pos) + { + UInt32 res; + BSR2_RET(pos, res); +@@ -107,7 +107,7 @@ UInt32 GetPosSlot1(UInt32 pos) + #define kNumLogBits (9 + (int)sizeof(size_t) / 2) + #define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +-void LzmaEnc_FastPosInit(Byte *g_FastPos) ++static void LzmaEnc_FastPosInit(Byte *g_FastPos) + { + int c = 2, slotFast; + g_FastPos[0] = 0; +@@ -339,58 +339,6 @@ typedef struct + CSaveState saveState; + } CLzmaEnc; + +-void LzmaEnc_SaveState(CLzmaEncHandle pp) +-{ +- CLzmaEnc *p = (CLzmaEnc *)pp; +- CSaveState *dest = &p->saveState; +- int i; +- dest->lenEnc = p->lenEnc; +- dest->repLenEnc = p->repLenEnc; +- dest->state = p->state; +- +- for (i = 0; i < kNumStates; i++) +- { +- memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); +- memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); +- } +- for (i = 0; i < kNumLenToPosStates; i++) +- memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); +- memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); +- memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); +- memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); +- memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); +- memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); +- memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); +- memcpy(dest->reps, p->reps, sizeof(p->reps)); +- memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); +-} +- +-void LzmaEnc_RestoreState(CLzmaEncHandle pp) +-{ +- CLzmaEnc *dest = (CLzmaEnc *)pp; +- const CSaveState *p = &dest->saveState; +- int i; +- dest->lenEnc = p->lenEnc; +- dest->repLenEnc = p->repLenEnc; +- dest->state = p->state; +- +- for (i = 0; i < kNumStates; i++) +- { +- memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); +- memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); +- } +- for (i = 0; i < kNumLenToPosStates; i++) +- memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); +- memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); +- memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); +- memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); +- memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); +- memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); +- memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); +- memcpy(dest->reps, p->reps, sizeof(p->reps)); +- memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); +-} +- + SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) + { + CLzmaEnc *p = (CLzmaEnc *)pp; +@@ -600,7 +548,7 @@ static void LitEnc_EncodeMatched(CRangeE + while (symbol < 0x10000); + } + +-void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) ++static void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) + { + UInt32 i; + for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) +@@ -1676,7 +1624,7 @@ static void FillDistancesPrices(CLzmaEnc + p->matchPriceCount = 0; + } + +-void LzmaEnc_Construct(CLzmaEnc *p) ++static void LzmaEnc_Construct(CLzmaEnc *p) + { + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&p->matchFinderBase); +@@ -1709,7 +1657,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAlloc * + return p; + } + +-void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc) ++static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc) + { + alloc->Free(alloc, p->litProbs); + alloc->Free(alloc, p->saveState.litProbs); +@@ -1717,7 +1665,7 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAl + p->saveState.litProbs = 0; + } + +-void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) ++static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) + { + #ifndef _7ZIP_ST + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); +@@ -1947,7 +1895,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, U + return SZ_OK; + } + +-void LzmaEnc_Init(CLzmaEnc *p) ++static void LzmaEnc_Init(CLzmaEnc *p) + { + UInt32 i; + p->state = 0; +@@ -2005,7 +1953,7 @@ void LzmaEnc_Init(CLzmaEnc *p) + p->lpMask = (1 << p->lp) - 1; + } + +-void LzmaEnc_InitPrices(CLzmaEnc *p) ++static void LzmaEnc_InitPrices(CLzmaEnc *p) + { + if (!p->fastMode) + { +@@ -2037,26 +1985,6 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEn + return SZ_OK; + } + +-static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, +- ISzAlloc *alloc, ISzAlloc *allocBig) +-{ +- CLzmaEnc *p = (CLzmaEnc *)pp; +- p->matchFinderBase.stream = inStream; +- p->needInit = 1; +- p->rc.outStream = outStream; +- return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); +-} +- +-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, +- ISeqInStream *inStream, UInt32 keepWindowSize, +- ISzAlloc *alloc, ISzAlloc *allocBig) +-{ +- CLzmaEnc *p = (CLzmaEnc *)pp; +- p->matchFinderBase.stream = inStream; +- p->needInit = 1; +- return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +-} +- + static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) + { + p->matchFinderBase.directInput = 1; +@@ -2064,7 +1992,7 @@ static void LzmaEnc_SetInputBuf(CLzmaEnc + p->matchFinderBase.directInputRem = srcLen; + } + +-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, ++static SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) + { + CLzmaEnc *p = (CLzmaEnc *)pp; +@@ -2074,7 +2002,7 @@ SRes LzmaEnc_MemPrepare(CLzmaEncHandle p + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); + } + +-void LzmaEnc_Finish(CLzmaEncHandle pp) ++static void LzmaEnc_Finish(CLzmaEncHandle pp) + { + #ifndef _7ZIP_ST + CLzmaEnc *p = (CLzmaEnc *)pp; +@@ -2107,53 +2035,6 @@ static size_t MyWrite(void *pp, const vo + return size; + } + +- +-UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +-{ +- const CLzmaEnc *p = (CLzmaEnc *)pp; +- return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +-} +- +-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +-{ +- const CLzmaEnc *p = (CLzmaEnc *)pp; +- return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +-} +- +-SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, +- Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) +-{ +- CLzmaEnc *p = (CLzmaEnc *)pp; +- UInt64 nowPos64; +- SRes res; +- CSeqOutStreamBuf outStream; +- +- outStream.funcTable.Write = MyWrite; +- outStream.data = dest; +- outStream.rem = *destLen; +- outStream.overflow = False; +- +- p->writeEndMark = False; +- p->finished = False; +- p->result = SZ_OK; +- +- if (reInit) +- LzmaEnc_Init(p); +- LzmaEnc_InitPrices(p); +- nowPos64 = p->nowPos64; +- RangeEnc_Init(&p->rc); +- p->rc.outStream = &outStream.funcTable; +- +- res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); +- +- *unpackSize = (UInt32)(p->nowPos64 - nowPos64); +- *destLen -= outStream.rem; +- if (outStream.overflow) +- return SZ_ERROR_OUTPUT_EOF; +- +- return res; +-} +- + static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) + { + SRes res = SZ_OK; +@@ -2184,13 +2065,6 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, + return res; + } + +-SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, +- ISzAlloc *alloc, ISzAlloc *allocBig) +-{ +- RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); +- return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); +-} +- + SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) + { + CLzmaEnc *p = (CLzmaEnc *)pp; +@@ -2247,25 +2121,3 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp + return SZ_ERROR_OUTPUT_EOF; + return res; + } +- +-SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, +- const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, +- ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) +-{ +- CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); +- SRes res; +- if (p == 0) +- return SZ_ERROR_MEM; +- +- res = LzmaEnc_SetProps(p, props); +- if (res == SZ_OK) +- { +- res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); +- if (res == SZ_OK) +- res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, +- writeEndMark, progress, alloc, allocBig); +- } +- +- LzmaEnc_Destroy(p, alloc, allocBig); +- return res; +-} diff --git a/root/target/linux/generic/hack-5.4/550-loop-Report-EOPNOTSUPP-properly.patch b/root/target/linux/generic/hack-5.4/550-loop-Report-EOPNOTSUPP-properly.patch new file mode 100755 index 00000000..0e5447d4 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/550-loop-Report-EOPNOTSUPP-properly.patch @@ -0,0 +1,41 @@ +From 2e864386e62e702a343be2507062ee08d5dfc810 Mon Sep 17 00:00:00 2001 +From: Evan Green +Date: Thu, 14 Nov 2019 15:50:07 -0800 +Subject: loop: Report EOPNOTSUPP properly + +Properly plumb out EOPNOTSUPP from loop driver operations, which may +get returned when for instance a discard operation is attempted but not +supported by the underlying block device. Before this change, everything +was reported in the log as an I/O error, which is scary and not +helpful in debugging. + +Signed-off-by: Evan Green +Reviewed-by: Gwendal Grignou +Reviewed-by: Bart Van Assche +--- + drivers/block/loop.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -462,7 +462,7 @@ static void lo_complete_rq(struct reques + if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || + req_op(rq) != REQ_OP_READ) { + if (cmd->ret < 0) +- ret = BLK_STS_IOERR; ++ ret = errno_to_blk_status(cmd->ret); + goto end_io; + } + +@@ -1973,7 +1973,10 @@ static void loop_handle_cmd(struct loop_ + failed: + /* complete non-aio request */ + if (!cmd->use_aio || ret) { +- cmd->ret = ret ? -EIO : 0; ++ if (ret == -EOPNOTSUPP) ++ cmd->ret = ret; ++ else ++ cmd->ret = ret ? -EIO : 0; + blk_mq_complete_request(rq); + } + } diff --git a/root/target/linux/generic/hack-5.4/640-bridge-only-accept-EAP-locally.patch b/root/target/linux/generic/hack-5.4/640-bridge-only-accept-EAP-locally.patch new file mode 100755 index 00000000..a713aa3c --- /dev/null +++ b/root/target/linux/generic/hack-5.4/640-bridge-only-accept-EAP-locally.patch @@ -0,0 +1,82 @@ +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:18:54 +0200 +Subject: bridge: only accept EAP locally + +When bridging, do not forward EAP frames to other ports, only deliver +them locally, regardless of the state. + +Signed-off-by: Felix Fietkau +[add disable_eap_hack sysfs attribute] +Signed-off-by: Etienne Champetier +--- + +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -103,10 +103,14 @@ int br_handle_frame_finish(struct net *n + } + } + ++ BR_INPUT_SKB_CB(skb)->brdev = br->dev; ++ ++ if (skb->protocol == htons(ETH_P_PAE) && !br->disable_eap_hack) ++ return br_pass_frame_up(skb); ++ + if (p->state == BR_STATE_LEARNING) + goto drop; + +- BR_INPUT_SKB_CB(skb)->brdev = br->dev; + BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED); + + if (IS_ENABLED(CONFIG_INET) && +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -345,6 +345,8 @@ struct net_bridge { + u16 group_fwd_mask; + u16 group_fwd_mask_required; + ++ bool disable_eap_hack; ++ + /* STP */ + bridge_id designated_root; + bridge_id bridge_id; +--- a/net/bridge/br_sysfs_br.c ++++ b/net/bridge/br_sysfs_br.c +@@ -166,6 +166,30 @@ static ssize_t group_fwd_mask_store(stru + } + static DEVICE_ATTR_RW(group_fwd_mask); + ++static ssize_t disable_eap_hack_show(struct device *d, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct net_bridge *br = to_bridge(d); ++ return sprintf(buf, "%u\n", br->disable_eap_hack); ++} ++ ++static int set_disable_eap_hack(struct net_bridge *br, unsigned long val) ++{ ++ br->disable_eap_hack = !!val; ++ ++ return 0; ++} ++ ++static ssize_t disable_eap_hack_store(struct device *d, ++ struct device_attribute *attr, ++ const char *buf, ++ size_t len) ++{ ++ return store_bridge_parm(d, buf, len, set_disable_eap_hack); ++} ++static DEVICE_ATTR_RW(disable_eap_hack); ++ + static ssize_t priority_show(struct device *d, struct device_attribute *attr, + char *buf) + { +@@ -851,6 +875,7 @@ static struct attribute *bridge_attrs[] + &dev_attr_ageing_time.attr, + &dev_attr_stp_state.attr, + &dev_attr_group_fwd_mask.attr, ++ &dev_attr_disable_eap_hack.attr, + &dev_attr_priority.attr, + &dev_attr_bridge_id.attr, + &dev_attr_root_id.attr, diff --git a/root/target/linux/generic/hack-5.4/645-netfilter-connmark-introduce-set-dscpmark.patch b/root/target/linux/generic/hack-5.4/645-netfilter-connmark-introduce-set-dscpmark.patch new file mode 100755 index 00000000..2d3fe01a --- /dev/null +++ b/root/target/linux/generic/hack-5.4/645-netfilter-connmark-introduce-set-dscpmark.patch @@ -0,0 +1,212 @@ +From eda40b8c8c82e0f2789d6bc8bf63846dce2e8f32 Mon Sep 17 00:00:00 2001 +From: Kevin Darbyshire-Bryant +Date: Sat, 23 Mar 2019 09:29:49 +0000 +Subject: [PATCH] netfilter: connmark: introduce set-dscpmark + +set-dscpmark is a method of storing the DSCP of an ip packet into +conntrack mark. In combination with a suitable tc filter action +(act_ctinfo) DSCP values are able to be stored in the mark on egress and +restored on ingress across links that otherwise alter or bleach DSCP. + +This is useful for qdiscs such as CAKE which are able to shape according +to policies based on DSCP. + +Ingress classification is traditionally a challenging task since +iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT +lookups, hence are unable to see internal IPv4 addresses as used on the +typical home masquerading gateway. + +x_tables CONNMARK set-dscpmark target solves the problem of storing the +DSCP to the conntrack mark in a way suitable for the new act_ctinfo tc +action to restore. + +The set-dscpmark option accepts 2 parameters, a 32bit 'dscpmask' and a +32bit 'statemask'. The dscp mask must be 6 contiguous bits and +represents the area where the DSCP will be stored in the connmark. The +state mask is a minimum 1 bit length mask that must not overlap with the +dscpmask. It represents a flag which is set when the DSCP has been +stored in the conntrack mark. This is useful to implement a 'one shot' +iptables based classification where the 'complicated' iptables rules are +only run once to classify the connection on initial (egress) packet and +subsequent packets are all marked/restored with the same DSCP. A state +mask of zero disables the setting of a status bit/s. + +example syntax with a suitably modified iptables user space application: + +iptables -A QOS_MARK_eth0 -t mangle -j CONNMARK --set-dscpmark 0xfc000000/0x01000000 + +Would store the DSCP in the top 6 bits of the 32bit mark field, and use +the LSB of the top byte as the 'DSCP has been stored' marker. + +|----0xFC----conntrack mark----000000---| +| Bits 31-26 | bit 25 | bit24 |~~~ Bit 0| +| DSCP | unused | flag |unused | +|-----------------------0x01---000000---| + ^ ^ + | | + ---| Conditional flag + | set this when dscp +|-ip diffserv-| stored in mark +| 6 bits | +|-------------| + +an identically configured tc action to restore looks like: + +tc filter show dev eth0 ingress +filter parent ffff: protocol all pref 10 u32 chain 0 +filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1 +filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1: not_in_hw + match 00000000/00000000 at 0 + action order 1: ctinfo zone 0 pipe + index 2 ref 1 bind 1 dscp 0xfc000000/0x1000000 + + action order 2: mirred (Egress Redirect to device ifb4eth0) stolen + index 1 ref 1 bind 1 + +|----0xFC----conntrack mark----000000---| +| Bits 31-26 | bit 25 | bit24 |~~~ Bit 0| +| DSCP | unused | flag |unused | +|-----------------------0x01---000000---| + | | + | | + ---| Conditional flag + v only restore if set +|-ip diffserv-| +| 6 bits | +|-------------| + +Signed-off-by: Kevin Darbyshire-Bryant +--- + include/uapi/linux/netfilter/xt_connmark.h | 10 ++++ + net/netfilter/xt_connmark.c | 55 ++++++++++++++++++---- + 2 files changed, 57 insertions(+), 8 deletions(-) + +--- a/include/uapi/linux/netfilter/xt_connmark.h ++++ b/include/uapi/linux/netfilter/xt_connmark.h +@@ -20,6 +20,11 @@ enum { + }; + + enum { ++ XT_CONNMARK_VALUE = (1 << 0), ++ XT_CONNMARK_DSCP = (1 << 1) ++}; ++ ++enum { + D_SHIFT_LEFT = 0, + D_SHIFT_RIGHT, + }; +@@ -34,6 +39,11 @@ struct xt_connmark_tginfo2 { + __u8 shift_dir, shift_bits, mode; + }; + ++struct xt_connmark_tginfo3 { ++ __u32 ctmark, ctmask, nfmask; ++ __u8 shift_dir, shift_bits, mode, func; ++}; ++ + struct xt_connmark_mtinfo1 { + __u32 mark, mask; + __u8 invert; +--- a/net/netfilter/xt_connmark.c ++++ b/net/netfilter/xt_connmark.c +@@ -24,12 +24,13 @@ MODULE_ALIAS("ipt_connmark"); + MODULE_ALIAS("ip6t_connmark"); + + static unsigned int +-connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) ++connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo3 *info) + { + enum ip_conntrack_info ctinfo; + u_int32_t new_targetmark; + struct nf_conn *ct; + u_int32_t newmark; ++ u_int8_t dscp; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) +@@ -37,12 +38,24 @@ connmark_tg_shift(struct sk_buff *skb, c + + switch (info->mode) { + case XT_CONNMARK_SET: +- newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; +- if (info->shift_dir == D_SHIFT_RIGHT) +- newmark >>= info->shift_bits; +- else +- newmark <<= info->shift_bits; ++ newmark = ct->mark; ++ if (info->func & XT_CONNMARK_VALUE) { ++ newmark = (newmark & ~info->ctmask) ^ info->ctmark; ++ if (info->shift_dir == D_SHIFT_RIGHT) ++ newmark >>= info->shift_bits; ++ else ++ newmark <<= info->shift_bits; ++ } else if (info->func & XT_CONNMARK_DSCP) { ++ if (skb->protocol == htons(ETH_P_IP)) ++ dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; ++ else if (skb->protocol == htons(ETH_P_IPV6)) ++ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; ++ else /* protocol doesn't have diffserv */ ++ break; + ++ newmark = (newmark & ~info->ctmark) | ++ (info->ctmask | (dscp << info->shift_bits)); ++ } + if (ct->mark != newmark) { + ct->mark = newmark; + nf_conntrack_event_cache(IPCT_MARK, ct); +@@ -81,20 +94,36 @@ static unsigned int + connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) + { + const struct xt_connmark_tginfo1 *info = par->targinfo; +- const struct xt_connmark_tginfo2 info2 = { ++ const struct xt_connmark_tginfo3 info3 = { + .ctmark = info->ctmark, + .ctmask = info->ctmask, + .nfmask = info->nfmask, + .mode = info->mode, ++ .func = XT_CONNMARK_VALUE + }; + +- return connmark_tg_shift(skb, &info2); ++ return connmark_tg_shift(skb, &info3); + } + + static unsigned int + connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) + { + const struct xt_connmark_tginfo2 *info = par->targinfo; ++ const struct xt_connmark_tginfo3 info3 = { ++ .ctmark = info->ctmark, ++ .ctmask = info->ctmask, ++ .nfmask = info->nfmask, ++ .mode = info->mode, ++ .func = XT_CONNMARK_VALUE ++ }; ++ ++ return connmark_tg_shift(skb, &info3); ++} ++ ++static unsigned int ++connmark_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) ++{ ++ const struct xt_connmark_tginfo3 *info = par->targinfo; + + return connmark_tg_shift(skb, info); + } +@@ -165,6 +194,16 @@ static struct xt_target connmark_tg_reg[ + .targetsize = sizeof(struct xt_connmark_tginfo2), + .destroy = connmark_tg_destroy, + .me = THIS_MODULE, ++ }, ++ { ++ .name = "CONNMARK", ++ .revision = 3, ++ .family = NFPROTO_UNSPEC, ++ .checkentry = connmark_tg_check, ++ .target = connmark_tg_v3, ++ .targetsize = sizeof(struct xt_connmark_tginfo3), ++ .destroy = connmark_tg_destroy, ++ .me = THIS_MODULE, + } + }; + diff --git a/root/target/linux/generic/hack-5.4/647-netfilter-flow-acct.patch b/root/target/linux/generic/hack-5.4/647-netfilter-flow-acct.patch new file mode 100755 index 00000000..f9480d59 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/647-netfilter-flow-acct.patch @@ -0,0 +1,70 @@ +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -160,6 +160,8 @@ struct nf_flow_table_hw { + int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload); + void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload); + ++void nf_flow_table_acct(struct flow_offload *flow, struct sk_buff *skb, int dir); ++ + extern struct work_struct nf_flow_offload_hw_work; + + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + struct flow_offload_entry { + struct flow_offload flow; +@@ -164,6 +165,22 @@ void flow_offload_free(struct flow_offlo + } + EXPORT_SYMBOL_GPL(flow_offload_free); + ++void nf_flow_table_acct(struct flow_offload *flow, struct sk_buff *skb, int dir) ++{ ++ struct flow_offload_entry *entry; ++ struct nf_conn_acct *acct; ++ ++ entry = container_of(flow, struct flow_offload_entry, flow); ++ acct = nf_conn_acct_find(entry->ct); ++ if (acct) { ++ struct nf_conn_counter *counter = acct->counter; ++ ++ atomic64_inc(&counter[dir].packets); ++ atomic64_add(skb->len, &counter[dir].bytes); ++ } ++} ++EXPORT_SYMBOL_GPL(nf_flow_table_acct); ++ + static u32 flow_offload_hash(const void *data, u32 len, u32 seed) + { + const struct flow_offload_tuple *tuple = data; +--- a/net/netfilter/nf_flow_table_ip.c ++++ b/net/netfilter/nf_flow_table_ip.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++ + /* For layer 4 checksum field offset. */ + #include + #include +@@ -296,6 +297,7 @@ nf_flow_offload_ip_hook(void *priv, stru + skb->dev = outdev; + nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); + skb_dst_set_noref(skb, &rt->dst); ++ nf_flow_table_acct(flow, skb, dir); + neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); + + return NF_STOLEN; +@@ -526,6 +528,7 @@ nf_flow_offload_ipv6_hook(void *priv, st + skb->dev = outdev; + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); + skb_dst_set_noref(skb, &rt->dst); ++ nf_flow_table_acct(flow, skb, dir); + neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); + + return NF_STOLEN; diff --git a/root/target/linux/generic/hack-5.4/650-netfilter-add-xt_OFFLOAD-target.patch b/root/target/linux/generic/hack-5.4/650-netfilter-add-xt_OFFLOAD-target.patch new file mode 100755 index 00000000..d584cb5c --- /dev/null +++ b/root/target/linux/generic/hack-5.4/650-netfilter-add-xt_OFFLOAD-target.patch @@ -0,0 +1,589 @@ +From: Felix Fietkau +Date: Tue, 20 Feb 2018 15:56:02 +0100 +Subject: [PATCH] netfilter: add xt_OFFLOAD target + +Signed-off-by: Felix Fietkau +--- + create mode 100644 net/netfilter/xt_OFFLOAD.c + +--- a/net/ipv4/netfilter/Kconfig ++++ b/net/ipv4/netfilter/Kconfig +@@ -56,8 +56,6 @@ config NF_TABLES_ARP + help + This option enables the ARP support for nf_tables. + +-endif # NF_TABLES +- + config NF_FLOW_TABLE_IPV4 + tristate "Netfilter flow table IPv4 module" + depends on NF_FLOW_TABLE +@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4 + + To compile it as a module, choose M here. + ++endif # NF_TABLES ++ + config NF_DUP_IPV4 + tristate "Netfilter IPv4 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK +--- a/net/ipv6/netfilter/Kconfig ++++ b/net/ipv6/netfilter/Kconfig +@@ -45,7 +45,6 @@ config NFT_FIB_IPV6 + multicast or blackhole. + + endif # NF_TABLES_IPV6 +-endif # NF_TABLES + + config NF_FLOW_TABLE_IPV6 + tristate "Netfilter flow table IPv6 module" +@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6 + + To compile it as a module, choose M here. + ++endif # NF_TABLES ++ + config NF_DUP_IPV6 + tristate "Netfilter IPv6 packet duplication to alternate destination" + depends on !NF_CONNTRACK || NF_CONNTRACK +--- a/net/netfilter/Kconfig ++++ b/net/netfilter/Kconfig +@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV + + endif # NF_TABLES_NETDEV + +-endif # NF_TABLES +- + config NF_FLOW_TABLE_INET + tristate "Netfilter flow table mixed IPv4/IPv6 module" + depends on NF_FLOW_TABLE +@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET + + To compile it as a module, choose M here. + ++endif # NF_TABLES ++ + config NF_FLOW_TABLE + tristate "Netfilter flow table module" + depends on NETFILTER_INGRESS + depends on NF_CONNTRACK +- depends on NF_TABLES + help + This option adds the flow table core infrastructure. + +@@ -993,6 +992,15 @@ config NETFILTER_XT_TARGET_NOTRACK + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_CT + ++config NETFILTER_XT_TARGET_FLOWOFFLOAD ++ tristate '"FLOWOFFLOAD" target support' ++ depends on NF_FLOW_TABLE ++ depends on NETFILTER_INGRESS ++ help ++ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload ++ module to speed up processing of packets by bypassing the usual ++ netfilter chains ++ + config NETFILTER_XT_TARGET_RATEEST + tristate '"RATEEST" target support' + depends on NETFILTER_ADVANCED +--- a/net/netfilter/Makefile ++++ b/net/netfilter/Makefile +@@ -141,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF + obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o + obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o + obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o ++obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o + obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o + obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o + obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o +--- /dev/null ++++ b/net/netfilter/xt_FLOWOFFLOAD.c +@@ -0,0 +1,427 @@ ++/* ++ * Copyright (C) 2018 Felix Fietkau ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct nf_flowtable nf_flowtable; ++static HLIST_HEAD(hooks); ++static DEFINE_SPINLOCK(hooks_lock); ++static struct delayed_work hook_work; ++ ++struct xt_flowoffload_hook { ++ struct hlist_node list; ++ struct nf_hook_ops ops; ++ struct net *net; ++ bool registered; ++ bool used; ++}; ++ ++static unsigned int ++xt_flowoffload_net_hook(void *priv, struct sk_buff *skb, ++ const struct nf_hook_state *state) ++{ ++ switch (skb->protocol) { ++ case htons(ETH_P_IP): ++ return nf_flow_offload_ip_hook(priv, skb, state); ++ case htons(ETH_P_IPV6): ++ return nf_flow_offload_ipv6_hook(priv, skb, state); ++ } ++ ++ return NF_ACCEPT; ++} ++ ++int nf_flow_table_iterate(struct nf_flowtable *flow_table, ++ void (*iter)(struct flow_offload *flow, void *data), ++ void *data); ++ ++static int ++xt_flowoffload_create_hook(struct net_device *dev) ++{ ++ struct xt_flowoffload_hook *hook; ++ struct nf_hook_ops *ops; ++ ++ hook = kzalloc(sizeof(*hook), GFP_ATOMIC); ++ if (!hook) ++ return -ENOMEM; ++ ++ ops = &hook->ops; ++ ops->pf = NFPROTO_NETDEV; ++ ops->hooknum = NF_NETDEV_INGRESS; ++ ops->priority = 10; ++ ops->priv = &nf_flowtable; ++ ops->hook = xt_flowoffload_net_hook; ++ ops->dev = dev; ++ ++ hlist_add_head(&hook->list, &hooks); ++ mod_delayed_work(system_power_efficient_wq, &hook_work, 0); ++ ++ return 0; ++} ++ ++static struct xt_flowoffload_hook * ++flow_offload_lookup_hook(struct net_device *dev) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->ops.dev == dev) ++ return hook; ++ } ++ ++ return NULL; ++} ++ ++static void ++xt_flowoffload_check_device(struct net_device *dev) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++ spin_lock_bh(&hooks_lock); ++ hook = flow_offload_lookup_hook(dev); ++ if (hook) ++ hook->used = true; ++ else ++ xt_flowoffload_create_hook(dev); ++ spin_unlock_bh(&hooks_lock); ++} ++ ++static void ++xt_flowoffload_register_hooks(void) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++restart: ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->registered) ++ continue; ++ ++ hook->registered = true; ++ hook->net = dev_net(hook->ops.dev); ++ spin_unlock_bh(&hooks_lock); ++ nf_register_net_hook(hook->net, &hook->ops); ++ spin_lock_bh(&hooks_lock); ++ goto restart; ++ } ++ ++} ++ ++static void ++xt_flowoffload_cleanup_hooks(void) ++{ ++ struct xt_flowoffload_hook *hook; ++ ++restart: ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->used || !hook->registered) ++ continue; ++ ++ hlist_del(&hook->list); ++ spin_unlock_bh(&hooks_lock); ++ nf_unregister_net_hook(hook->net, &hook->ops); ++ kfree(hook); ++ spin_lock_bh(&hooks_lock); ++ goto restart; ++ } ++ ++} ++ ++static void ++xt_flowoffload_check_hook(struct flow_offload *flow, void *data) ++{ ++ struct flow_offload_tuple *tuple = &flow->tuplehash[0].tuple; ++ struct xt_flowoffload_hook *hook; ++ bool *found = data; ++ struct rtable *rt = (struct rtable *)tuple->dst_cache; ++ ++ spin_lock_bh(&hooks_lock); ++ hlist_for_each_entry(hook, &hooks, list) { ++ if (hook->ops.dev->ifindex != tuple->iifidx && ++ hook->ops.dev->ifindex != rt->dst.dev->ifindex) ++ continue; ++ ++ hook->used = true; ++ *found = true; ++ } ++ spin_unlock_bh(&hooks_lock); ++} ++ ++static void ++xt_flowoffload_hook_work(struct work_struct *work) ++{ ++ struct xt_flowoffload_hook *hook; ++ bool found = false; ++ int err; ++ ++ spin_lock_bh(&hooks_lock); ++ xt_flowoffload_register_hooks(); ++ hlist_for_each_entry(hook, &hooks, list) ++ hook->used = false; ++ spin_unlock_bh(&hooks_lock); ++ ++ err = nf_flow_table_iterate(&nf_flowtable, xt_flowoffload_check_hook, ++ &found); ++ if (err && err != -EAGAIN) ++ goto out; ++ ++ spin_lock_bh(&hooks_lock); ++ xt_flowoffload_cleanup_hooks(); ++ spin_unlock_bh(&hooks_lock); ++ ++out: ++ if (found) ++ queue_delayed_work(system_power_efficient_wq, &hook_work, HZ); ++} ++ ++static bool ++xt_flowoffload_skip(struct sk_buff *skb, int family) ++{ ++ if (skb_sec_path(skb)) ++ return true; ++ ++ if (family == NFPROTO_IPV4) { ++ const struct ip_options *opt = &(IPCB(skb)->opt); ++ ++ if (unlikely(opt->optlen)) ++ return true; ++ } ++ ++ return false; ++} ++ ++static struct dst_entry * ++xt_flowoffload_dst(const struct nf_conn *ct, enum ip_conntrack_dir dir, ++ const struct xt_action_param *par, int ifindex) ++{ ++ struct dst_entry *dst = NULL; ++ struct flowi fl; ++ ++ memset(&fl, 0, sizeof(fl)); ++ switch (xt_family(par)) { ++ case NFPROTO_IPV4: ++ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; ++ fl.u.ip4.flowi4_oif = ifindex; ++ break; ++ case NFPROTO_IPV6: ++ fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6; ++ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; ++ fl.u.ip6.flowi6_oif = ifindex; ++ break; ++ } ++ ++ nf_route(xt_net(par), &dst, &fl, false, xt_family(par)); ++ ++ return dst; ++} ++ ++static int ++xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct, ++ const struct xt_action_param *par, ++ struct nf_flow_route *route, enum ip_conntrack_dir dir) ++{ ++ struct dst_entry *this_dst, *other_dst; ++ ++ this_dst = xt_flowoffload_dst(ct, !dir, par, xt_out(par)->ifindex); ++ other_dst = xt_flowoffload_dst(ct, dir, par, xt_in(par)->ifindex); ++ ++ route->tuple[dir].dst = this_dst; ++ route->tuple[!dir].dst = other_dst; ++ ++ if (!this_dst || !other_dst) ++ return -ENOENT; ++ ++ if (dst_xfrm(this_dst) || dst_xfrm(other_dst)) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static unsigned int ++flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par) ++{ ++ const struct xt_flowoffload_target_info *info = par->targinfo; ++ struct tcphdr _tcph, *tcph = NULL; ++ enum ip_conntrack_info ctinfo; ++ enum ip_conntrack_dir dir; ++ struct nf_flow_route route; ++ struct flow_offload *flow = NULL; ++ struct nf_conn *ct; ++ struct net *net; ++ ++ if (xt_flowoffload_skip(skb, xt_family(par))) ++ return XT_CONTINUE; ++ ++ ct = nf_ct_get(skb, &ctinfo); ++ if (ct == NULL) ++ return XT_CONTINUE; ++ ++ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { ++ case IPPROTO_TCP: ++ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) ++ return XT_CONTINUE; ++ ++ tcph = skb_header_pointer(skb, par->thoff, ++ sizeof(_tcph), &_tcph); ++ if (unlikely(!tcph || tcph->fin || tcph->rst)) ++ return XT_CONTINUE; ++ break; ++ case IPPROTO_UDP: ++ break; ++ default: ++ return XT_CONTINUE; ++ } ++ ++ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || ++ ct->status & IPS_SEQ_ADJUST) ++ return XT_CONTINUE; ++ ++ if (!nf_ct_is_confirmed(ct)) ++ return XT_CONTINUE; ++ ++ if (!xt_in(par) || !xt_out(par)) ++ return XT_CONTINUE; ++ ++ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) ++ return XT_CONTINUE; ++ ++ dir = CTINFO2DIR(ctinfo); ++ ++ if (xt_flowoffload_route(skb, ct, par, &route, dir) == 0) ++ flow = flow_offload_alloc(ct, &route); ++ ++ dst_release(route.tuple[dir].dst); ++ dst_release(route.tuple[!dir].dst); ++ ++ if (!flow) ++ goto err_flow_route; ++ ++ if (tcph) { ++ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ++ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ++ } ++ ++ if (flow_offload_add(&nf_flowtable, flow) < 0) ++ goto err_flow_add; ++ ++ xt_flowoffload_check_device(xt_in(par)); ++ xt_flowoffload_check_device(xt_out(par)); ++ ++ net = read_pnet(&nf_flowtable.ft_net); ++ if (!net) ++ write_pnet(&nf_flowtable.ft_net, xt_net(par)); ++ ++ if (info->flags & XT_FLOWOFFLOAD_HW) ++ nf_flow_offload_hw_add(xt_net(par), flow, ct); ++ ++ return XT_CONTINUE; ++ ++err_flow_add: ++ flow_offload_free(flow); ++err_flow_route: ++ clear_bit(IPS_OFFLOAD_BIT, &ct->status); ++ return XT_CONTINUE; ++} ++ ++ ++static int flowoffload_chk(const struct xt_tgchk_param *par) ++{ ++ struct xt_flowoffload_target_info *info = par->targinfo; ++ ++ if (info->flags & ~XT_FLOWOFFLOAD_MASK) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static struct xt_target offload_tg_reg __read_mostly = { ++ .family = NFPROTO_UNSPEC, ++ .name = "FLOWOFFLOAD", ++ .revision = 0, ++ .targetsize = sizeof(struct xt_flowoffload_target_info), ++ .usersize = sizeof(struct xt_flowoffload_target_info), ++ .checkentry = flowoffload_chk, ++ .target = flowoffload_tg, ++ .me = THIS_MODULE, ++}; ++ ++static int xt_flowoffload_table_init(struct nf_flowtable *table) ++{ ++ table->flags = NF_FLOWTABLE_F_HW; ++ nf_flow_table_init(table); ++ return 0; ++} ++ ++static void xt_flowoffload_table_cleanup(struct nf_flowtable *table) ++{ ++ nf_flow_table_free(table); ++} ++ ++static int flow_offload_netdev_event(struct notifier_block *this, ++ unsigned long event, void *ptr) ++{ ++ struct xt_flowoffload_hook *hook = NULL; ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ ++ if (event != NETDEV_UNREGISTER) ++ return NOTIFY_DONE; ++ ++ spin_lock_bh(&hooks_lock); ++ hook = flow_offload_lookup_hook(dev); ++ if (hook) { ++ hlist_del(&hook->list); ++ } ++ spin_unlock_bh(&hooks_lock); ++ if (hook) { ++ nf_unregister_net_hook(hook->net, &hook->ops); ++ kfree(hook); ++ } ++ ++ nf_flow_table_cleanup(dev); ++ ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block flow_offload_netdev_notifier = { ++ .notifier_call = flow_offload_netdev_event, ++}; ++ ++static int __init xt_flowoffload_tg_init(void) ++{ ++ int ret; ++ ++ register_netdevice_notifier(&flow_offload_netdev_notifier); ++ ++ INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work); ++ ++ ret = xt_flowoffload_table_init(&nf_flowtable); ++ if (ret) ++ return ret; ++ ++ ret = xt_register_target(&offload_tg_reg); ++ if (ret) ++ xt_flowoffload_table_cleanup(&nf_flowtable); ++ ++ return ret; ++} ++ ++static void __exit xt_flowoffload_tg_exit(void) ++{ ++ xt_unregister_target(&offload_tg_reg); ++ xt_flowoffload_table_cleanup(&nf_flowtable); ++ unregister_netdevice_notifier(&flow_offload_netdev_notifier); ++} ++ ++MODULE_LICENSE("GPL"); ++module_init(xt_flowoffload_tg_init); ++module_exit(xt_flowoffload_tg_exit); +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -7,7 +7,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -338,8 +337,7 @@ flow_offload_lookup(struct nf_flowtable + } + EXPORT_SYMBOL_GPL(flow_offload_lookup); + +-static int +-nf_flow_table_iterate(struct nf_flowtable *flow_table, ++int nf_flow_table_iterate(struct nf_flowtable *flow_table, + void (*iter)(struct flow_offload *flow, void *data), + void *data) + { +@@ -372,6 +370,7 @@ nf_flow_table_iterate(struct nf_flowtabl + + return err; + } ++EXPORT_SYMBOL_GPL(nf_flow_table_iterate); + + static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) + { +--- /dev/null ++++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h +@@ -0,0 +1,17 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _XT_FLOWOFFLOAD_H ++#define _XT_FLOWOFFLOAD_H ++ ++#include ++ ++enum { ++ XT_FLOWOFFLOAD_HW = 1 << 0, ++ ++ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW ++}; ++ ++struct xt_flowoffload_target_info { ++ __u32 flags; ++}; ++ ++#endif /* _XT_FLOWOFFLOAD_H */ +--- a/include/net/netfilter/nf_flow_table.h ++++ b/include/net/netfilter/nf_flow_table.h +@@ -130,6 +130,10 @@ static inline void flow_offload_dead(str + flow->flags |= FLOW_OFFLOAD_DYING; + } + ++int nf_flow_table_iterate(struct nf_flowtable *flow_table, ++ void (*iter)(struct flow_offload *flow, void *data), ++ void *data); ++ + int nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); diff --git a/root/target/linux/generic/hack-5.4/651-wireless_mesh_header.patch b/root/target/linux/generic/hack-5.4/651-wireless_mesh_header.patch new file mode 100755 index 00000000..f545d8eb --- /dev/null +++ b/root/target/linux/generic/hack-5.4/651-wireless_mesh_header.patch @@ -0,0 +1,24 @@ +From 6d3bc769657b0ee7c7506dad9911111c4226a7ea Mon Sep 17 00:00:00 2001 +From: Imre Kaloz +Date: Fri, 7 Jul 2017 17:21:05 +0200 +Subject: mac80211: increase wireless mesh header size + +lede-commit 3d4466cfd8f75f717efdb1f96fdde3c70d865fc1 +Signed-off-by: Imre Kaloz +--- + include/linux/netdevice.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -138,8 +138,8 @@ static inline bool dev_xmit_complete(int + + #if defined(CONFIG_HYPERV_NET) + # define LL_MAX_HEADER 128 +-#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) +-# if defined(CONFIG_MAC80211_MESH) ++#elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) || 1 ++# if defined(CONFIG_MAC80211_MESH) || 1 + # define LL_MAX_HEADER 128 + # else + # define LL_MAX_HEADER 96 diff --git a/root/target/linux/generic/hack-5.4/660-fq_codel_defaults.patch b/root/target/linux/generic/hack-5.4/660-fq_codel_defaults.patch new file mode 100755 index 00000000..46bf0e3b --- /dev/null +++ b/root/target/linux/generic/hack-5.4/660-fq_codel_defaults.patch @@ -0,0 +1,27 @@ +From a6ccb238939b25851474a279b20367fd24a0e816 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:21:53 +0200 +Subject: hack: net: fq_codel: tune defaults for small devices + +Assume that x86_64 devices always have a big memory and do not need this +optimization compared to devices with only 32 MB or 64 MB RAM. + +Signed-off-by: Felix Fietkau +--- + net/sched/sch_fq_codel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/sch_fq_codel.c ++++ b/net/sched/sch_fq_codel.c +@@ -470,7 +470,11 @@ static int fq_codel_init(struct Qdisc *s + + sch->limit = 10*1024; + q->flows_cnt = 1024; ++#ifdef CONFIG_X86_64 + q->memory_limit = 32 << 20; /* 32 MBytes */ ++#else ++ q->memory_limit = 4 << 20; /* 4 MBytes */ ++#endif + q->drop_batch_size = 64; + q->quantum = psched_mtu(qdisc_dev(sch)); + INIT_LIST_HEAD(&q->new_flows); diff --git a/root/target/linux/generic/hack-5.4/661-use_fq_codel_by_default.patch b/root/target/linux/generic/hack-5.4/661-use_fq_codel_by_default.patch new file mode 100755 index 00000000..11f1a25b --- /dev/null +++ b/root/target/linux/generic/hack-5.4/661-use_fq_codel_by_default.patch @@ -0,0 +1,100 @@ +From 1d418f7e88035ed7a94073f6354246c66e9193e9 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:22:58 +0200 +Subject: fq_codel: switch default qdisc from pfifo_fast to fq_codel and remove pfifo_fast + +Signed-off-by: Felix Fietkau +--- + include/net/sch_generic.h | 3 ++- + net/sched/Kconfig | 3 ++- + net/sched/sch_api.c | 2 +- + net/sched/sch_fq_codel.c | 3 ++- + net/sched/sch_generic.c | 4 ++-- + 5 files changed, 9 insertions(+), 6 deletions(-) + +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -617,12 +617,13 @@ extern struct Qdisc_ops noop_qdisc_ops; + extern struct Qdisc_ops pfifo_fast_ops; + extern struct Qdisc_ops mq_qdisc_ops; + extern struct Qdisc_ops noqueue_qdisc_ops; ++extern struct Qdisc_ops fq_codel_qdisc_ops; + extern const struct Qdisc_ops *default_qdisc_ops; + static inline const struct Qdisc_ops * + get_default_qdisc_ops(const struct net_device *dev, int ntx) + { + return ntx < dev->real_num_tx_queues ? +- default_qdisc_ops : &pfifo_fast_ops; ++ default_qdisc_ops : &fq_codel_qdisc_ops; + } + + struct Qdisc_class_common { +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -4,8 +4,9 @@ + # + + menuconfig NET_SCHED +- bool "QoS and/or fair queueing" ++ def_bool y + select NET_SCH_FIFO ++ select NET_SCH_FQ_CODEL + ---help--- + When the kernel has several packets to send out over a network + device, it has to decide which ones to send first, which ones to +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -2278,7 +2278,7 @@ static int __init pktsched_init(void) + return err; + } + +- register_qdisc(&pfifo_fast_ops); ++ register_qdisc(&fq_codel_qdisc_ops); + register_qdisc(&pfifo_qdisc_ops); + register_qdisc(&bfifo_qdisc_ops); + register_qdisc(&pfifo_head_drop_qdisc_ops); +--- a/net/sched/sch_fq_codel.c ++++ b/net/sched/sch_fq_codel.c +@@ -710,7 +710,7 @@ static const struct Qdisc_class_ops fq_c + .walk = fq_codel_walk, + }; + +-static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { ++struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { + .cl_ops = &fq_codel_class_ops, + .id = "fq_codel", + .priv_size = sizeof(struct fq_codel_sched_data), +@@ -725,6 +725,7 @@ static struct Qdisc_ops fq_codel_qdisc_o + .dump_stats = fq_codel_dump_stats, + .owner = THIS_MODULE, + }; ++EXPORT_SYMBOL(fq_codel_qdisc_ops); + + static int __init fq_codel_module_init(void) + { +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -32,7 +32,7 @@ + #include + + /* Qdisc to use by default */ +-const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; ++const struct Qdisc_ops *default_qdisc_ops = &fq_codel_qdisc_ops; + EXPORT_SYMBOL(default_qdisc_ops); + + static void qdisc_maybe_clear_missed(struct Qdisc *q, +@@ -1079,12 +1079,12 @@ static void attach_one_default_qdisc(str + void *_unused) + { + struct Qdisc *qdisc; +- const struct Qdisc_ops *ops = default_qdisc_ops; ++ const struct Qdisc_ops *ops = &fq_codel_qdisc_ops; + + if (dev->priv_flags & IFF_NO_QUEUE) + ops = &noqueue_qdisc_ops; + else if(dev->type == ARPHRD_CAN) +- ops = &pfifo_fast_ops; ++ ops = &fq_codel_qdisc_ops; + + qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL); + if (!qdisc) { diff --git a/root/target/linux/generic/hack-5.4/662-remove_pfifo_fast.patch b/root/target/linux/generic/hack-5.4/662-remove_pfifo_fast.patch new file mode 100755 index 00000000..9df3a825 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/662-remove_pfifo_fast.patch @@ -0,0 +1,243 @@ +From b531d492d5ef1cf9dba0f4888eb5fd8624a6d762 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:23:42 +0200 +Subject: net: sched: switch default qdisc from pfifo_fast to fq_codel and remove pfifo_fast + +Signed-off-by: Felix Fietkau +--- + net/sched/sch_generic.c | 140 ------------------------------------------------ + 1 file changed, 140 deletions(-) + +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -620,230 +620,6 @@ struct Qdisc_ops noqueue_qdisc_ops __rea + .owner = THIS_MODULE, + }; + +-static const u8 prio2band[TC_PRIO_MAX + 1] = { +- 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 +-}; +- +-/* 3-band FIFO queue: old style, but should be a bit faster than +- generic prio+fifo combination. +- */ +- +-#define PFIFO_FAST_BANDS 3 +- +-/* +- * Private data for a pfifo_fast scheduler containing: +- * - rings for priority bands +- */ +-struct pfifo_fast_priv { +- struct skb_array q[PFIFO_FAST_BANDS]; +-}; +- +-static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, +- int band) +-{ +- return &priv->q[band]; +-} +- +-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, +- struct sk_buff **to_free) +-{ +- int band = prio2band[skb->priority & TC_PRIO_MAX]; +- struct pfifo_fast_priv *priv = qdisc_priv(qdisc); +- struct skb_array *q = band2list(priv, band); +- unsigned int pkt_len = qdisc_pkt_len(skb); +- int err; +- +- err = skb_array_produce(q, skb); +- +- if (unlikely(err)) { +- if (qdisc_is_percpu_stats(qdisc)) +- return qdisc_drop_cpu(skb, qdisc, to_free); +- else +- return qdisc_drop(skb, qdisc, to_free); +- } +- +- qdisc_update_stats_at_enqueue(qdisc, pkt_len); +- return NET_XMIT_SUCCESS; +-} +- +-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) +-{ +- struct pfifo_fast_priv *priv = qdisc_priv(qdisc); +- struct sk_buff *skb = NULL; +- bool need_retry = true; +- int band; +- +-retry: +- for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { +- struct skb_array *q = band2list(priv, band); +- +- if (__skb_array_empty(q)) +- continue; +- +- skb = __skb_array_consume(q); +- } +- if (likely(skb)) { +- qdisc_update_stats_at_dequeue(qdisc, skb); +- } else if (need_retry && +- test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { +- /* Delay clearing the STATE_MISSED here to reduce +- * the overhead of the second spin_trylock() in +- * qdisc_run_begin() and __netif_schedule() calling +- * in qdisc_run_end(). +- */ +- clear_bit(__QDISC_STATE_MISSED, &qdisc->state); +- +- /* Make sure dequeuing happens after clearing +- * STATE_MISSED. +- */ +- smp_mb__after_atomic(); +- +- need_retry = false; +- +- goto retry; +- } else { +- WRITE_ONCE(qdisc->empty, true); +- } +- +- return skb; +-} +- +-static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) +-{ +- struct pfifo_fast_priv *priv = qdisc_priv(qdisc); +- struct sk_buff *skb = NULL; +- int band; +- +- for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { +- struct skb_array *q = band2list(priv, band); +- +- skb = __skb_array_peek(q); +- } +- +- return skb; +-} +- +-static void pfifo_fast_reset(struct Qdisc *qdisc) +-{ +- int i, band; +- struct pfifo_fast_priv *priv = qdisc_priv(qdisc); +- +- for (band = 0; band < PFIFO_FAST_BANDS; band++) { +- struct skb_array *q = band2list(priv, band); +- struct sk_buff *skb; +- +- /* NULL ring is possible if destroy path is due to a failed +- * skb_array_init() in pfifo_fast_init() case. +- */ +- if (!q->ring.queue) +- continue; +- +- while ((skb = __skb_array_consume(q)) != NULL) +- kfree_skb(skb); +- } +- +- if (qdisc_is_percpu_stats(qdisc)) { +- for_each_possible_cpu(i) { +- struct gnet_stats_queue *q; +- +- q = per_cpu_ptr(qdisc->cpu_qstats, i); +- q->backlog = 0; +- q->qlen = 0; +- } +- } +-} +- +-static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) +-{ +- struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; +- +- memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); +- if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) +- goto nla_put_failure; +- return skb->len; +- +-nla_put_failure: +- return -1; +-} +- +-static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt, +- struct netlink_ext_ack *extack) +-{ +- unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len; +- struct pfifo_fast_priv *priv = qdisc_priv(qdisc); +- int prio; +- +- /* guard against zero length rings */ +- if (!qlen) +- return -EINVAL; +- +- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { +- struct skb_array *q = band2list(priv, prio); +- int err; +- +- err = skb_array_init(q, qlen, GFP_KERNEL); +- if (err) +- return -ENOMEM; +- } +- +- /* Can by-pass the queue discipline */ +- qdisc->flags |= TCQ_F_CAN_BYPASS; +- return 0; +-} +- +-static void pfifo_fast_destroy(struct Qdisc *sch) +-{ +- struct pfifo_fast_priv *priv = qdisc_priv(sch); +- int prio; +- +- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { +- struct skb_array *q = band2list(priv, prio); +- +- /* NULL ring is possible if destroy path is due to a failed +- * skb_array_init() in pfifo_fast_init() case. +- */ +- if (!q->ring.queue) +- continue; +- /* Destroy ring but no need to kfree_skb because a call to +- * pfifo_fast_reset() has already done that work. +- */ +- ptr_ring_cleanup(&q->ring, NULL); +- } +-} +- +-static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch, +- unsigned int new_len) +-{ +- struct pfifo_fast_priv *priv = qdisc_priv(sch); +- struct skb_array *bands[PFIFO_FAST_BANDS]; +- int prio; +- +- for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { +- struct skb_array *q = band2list(priv, prio); +- +- bands[prio] = q; +- } +- +- return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len, +- GFP_KERNEL); +-} +- +-struct Qdisc_ops pfifo_fast_ops __read_mostly = { +- .id = "pfifo_fast", +- .priv_size = sizeof(struct pfifo_fast_priv), +- .enqueue = pfifo_fast_enqueue, +- .dequeue = pfifo_fast_dequeue, +- .peek = pfifo_fast_peek, +- .init = pfifo_fast_init, +- .destroy = pfifo_fast_destroy, +- .reset = pfifo_fast_reset, +- .dump = pfifo_fast_dump, +- .change_tx_queue_len = pfifo_fast_change_tx_queue_len, +- .owner = THIS_MODULE, +- .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS, +-}; +-EXPORT_SYMBOL(pfifo_fast_ops); +- + struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, + const struct Qdisc_ops *ops, + struct netlink_ext_ack *extack) diff --git a/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch b/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch index 1c2d2734..850613b5 100755 --- a/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch +++ b/root/target/linux/generic/hack-5.4/690-mptcp_v0.96.patch @@ -1,24123 +1,47 @@ -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index dbb68067ba4e..b6c32a29789e 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -2742,6 +2742,10 @@ - allocations which rules out almost all kernel - allocations. Use with caution! - -+ mptcp_htable_entries= -+ [KNL,NET] Set number of hash buckets for MPTCP token -+ hashtables. -+ - MTD_Partition= [MTD] - Format: ,,, - -diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt -index 8af3771a3ebf..e8fecb8f6370 100644 ---- a/Documentation/networking/ip-sysctl.txt -+++ b/Documentation/networking/ip-sysctl.txt -@@ -818,6 +818,18 @@ tcp_rx_skb_cache - BOOLEAN - - Default: 0 (disabled) - -+MPTCP variables: -+ -+mptcp_enabled - INTEGER -+ Enable or disable Multipath TCP for new connections. -+ Possible values are: -+ -+ 0: Multipath TCP is disabled on all TCP-sockets that are newly created. -+ 1: Multipath TCP is enabled by default on all new TCP-sockets. Note that -+ existing sockets in LISTEN-state will still use regular TCP. -+ 2: Enables Multipath TCP only upon the request of the application -+ throught the socket-option MPTCP_ENABLED. -+ - UDP variables: - - udp_l3mdev_accept - BOOLEAN -diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c -index 535ee41ee421..9f82f93e6e77 100644 ---- a/drivers/infiniband/hw/cxgb4/cm.c -+++ b/drivers/infiniband/hw/cxgb4/cm.c -@@ -3950,7 +3950,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) - */ - memset(&tmp_opt, 0, sizeof(tmp_opt)); - tcp_clear_options(&tmp_opt); -- tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL); -+ tcp_parse_options(&init_net, skb, &tmp_opt, NULL, 0, NULL, NULL); - - req = __skb_push(skb, sizeof(*req)); - memset(req, 0, sizeof(*req)); -diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h -index b04b5bd43f54..57e35d51db8c 100644 ---- a/include/linux/skbuff.h -+++ b/include/linux/skbuff.h -@@ -717,7 +717,7 @@ struct sk_buff { - * want to keep them across layers you have to do a skb_clone() - * first. This is owned by whoever has the skb queued ATM. - */ -- char cb[48] __aligned(8); -+ char cb[80] __aligned(8); - - union { - struct { -diff --git a/include/linux/tcp.h b/include/linux/tcp.h -index 358deb4ff830..aebfedba9838 100644 ---- a/include/linux/tcp.h -+++ b/include/linux/tcp.h -@@ -54,7 +54,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) - /* TCP Fast Open */ - #define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ - #define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ --#define TCP_FASTOPEN_COOKIE_SIZE 8 /* the size employed by this impl. */ -+#define TCP_FASTOPEN_COOKIE_SIZE 4 /* the size employed by this impl. */ - - /* TCP Fast Open Cookie as stored in memory */ - struct tcp_fastopen_cookie { -@@ -74,6 +74,56 @@ struct tcp_sack_block { - u32 end_seq; - }; - -+struct tcp_out_options { -+ u16 options; /* bit field of OPTION_* */ -+ u16 mss; /* 0 to disable */ -+ u8 ws; /* window scale, 0 to disable */ -+ u8 num_sack_blocks; /* number of SACK blocks to include */ -+ u8 hash_size; /* bytes in hash_location */ -+ __u8 *hash_location; /* temporary pointer, overloaded */ -+ __u32 tsval, tsecr; /* need to include OPTION_TS */ -+ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ -+#ifdef CONFIG_MPTCP -+ u16 mptcp_options; /* bit field of MPTCP related OPTION_* */ -+ u8 dss_csum:1, /* dss-checksum required? */ -+ add_addr_v4:1, -+ add_addr_v6:1, -+ mptcp_ver:4; -+ -+ union { -+ struct { -+ __u64 sender_key; /* sender's key for mptcp */ -+ __u64 receiver_key; /* receiver's key for mptcp */ -+ } mp_capable; -+ -+ struct { -+ __u64 sender_truncated_mac; -+ __u32 sender_nonce; -+ /* random number of the sender */ -+ __u32 token; /* token for mptcp */ -+ u8 low_prio:1; -+ } mp_join_syns; -+ }; -+ -+ struct { -+ __u64 trunc_mac; -+ struct in_addr addr; -+ u16 port; -+ u8 addr_id; -+ } add_addr4; -+ -+ struct { -+ __u64 trunc_mac; -+ struct in6_addr addr; -+ u16 port; -+ u8 addr_id; -+ } add_addr6; -+ -+ u16 remove_addrs; /* list of address id */ -+ u8 addr_id; /* address id (mp_join or add_address) */ -+#endif /* CONFIG_MPTCP */ -+}; -+ - /*These are used to set the sack_ok field in struct tcp_options_received */ - #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ - #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ -@@ -97,6 +147,9 @@ struct tcp_options_received { - u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ - }; - -+struct mptcp_cb; -+struct mptcp_tcp_sock; -+ - static inline void tcp_clear_options(struct tcp_options_received *rx_opt) - { - rx_opt->tstamp_ok = rx_opt->sack_ok = 0; -@@ -135,6 +188,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) - return (struct tcp_request_sock *)req; - } - -+struct tcp_md5sig_key; -+ - struct tcp_sock { - /* inet_connection_sock has to be the first member of tcp_sock */ - struct inet_connection_sock inet_conn; -@@ -295,6 +350,7 @@ struct tcp_sock { - u32 rate_interval_us; /* saved rate sample: time elapsed */ - - u32 rcv_wnd; /* Current receiver window */ -+ u32 rcv_right_edge; /* Highest announced right edge */ - u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ - u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ - u32 pushed_seq; /* Last pushed seq, required to talk to windows */ -@@ -397,6 +453,44 @@ struct tcp_sock { - */ - struct request_sock __rcu *fastopen_rsk; - u32 *saved_syn; -+ -+ /* MPTCP/TCP-specific callbacks */ -+ const struct tcp_sock_ops *ops; -+ -+ struct mptcp_cb *mpcb; -+ struct sock *meta_sk; -+ /* We keep these flags even if CONFIG_MPTCP is not checked, because -+ * it allows checking MPTCP capability just by checking the mpc flag, -+ * rather than adding ifdefs everywhere. -+ */ -+ u32 mpc:1, /* Other end is multipath capable */ -+ inside_tk_table:1, /* Is the tcp_sock inside the token-table? */ -+ send_mp_fclose:1, -+ request_mptcp:1, /* Did we send out an MP_CAPABLE? -+ * (this speeds up mptcp_doit() in tcp_recvmsg) -+ */ -+ pf:1, /* Potentially Failed state: when this flag is set, we -+ * stop using the subflow -+ */ -+ mp_killed:1, /* Killed with a tcp_done in mptcp? */ -+ is_master_sk:1, -+ close_it:1, /* Must close socket in mptcp_data_ready? */ -+ closing:1, -+ mptcp_ver:4, -+ mptcp_sched_setsockopt:1, -+ mptcp_pm_setsockopt:1, -+ record_master_info:1, -+ tcp_disconnect:1; -+ struct mptcp_tcp_sock *mptcp; -+#ifdef CONFIG_MPTCP -+#define MPTCP_SCHED_NAME_MAX 16 -+#define MPTCP_PM_NAME_MAX 16 -+ struct hlist_nulls_node tk_table; -+ u32 mptcp_loc_token; -+ u64 mptcp_loc_key; -+ char mptcp_sched_name[MPTCP_SCHED_NAME_MAX]; -+ char mptcp_pm_name[MPTCP_PM_NAME_MAX]; -+#endif /* CONFIG_MPTCP */ - }; - - enum tsq_enum { -@@ -408,6 +502,8 @@ enum tsq_enum { - TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call - * tcp_v{4|6}_mtu_reduced() - */ -+ MPTCP_PATH_MANAGER_DEFERRED, /* MPTCP deferred creation of new subflows */ -+ MPTCP_SUB_DEFERRED, /* A subflow got deferred - process them */ - }; - - enum tsq_flags { -@@ -417,6 +513,8 @@ enum tsq_flags { - TCPF_WRITE_TIMER_DEFERRED = (1UL << TCP_WRITE_TIMER_DEFERRED), - TCPF_DELACK_TIMER_DEFERRED = (1UL << TCP_DELACK_TIMER_DEFERRED), - TCPF_MTU_REDUCED_DEFERRED = (1UL << TCP_MTU_REDUCED_DEFERRED), -+ TCPF_PATH_MANAGER_DEFERRED = (1UL << MPTCP_PATH_MANAGER_DEFERRED), -+ TCPF_SUB_DEFERRED = (1UL << MPTCP_SUB_DEFERRED), - }; - - static inline struct tcp_sock *tcp_sk(const struct sock *sk) -@@ -440,6 +538,7 @@ struct tcp_timewait_sock { - #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *tw_md5_key; - #endif -+ struct mptcp_tw *mptcp_tw; - }; - - static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) -diff --git a/include/net/inet_common.h b/include/net/inet_common.h -index ae2ba897675c..aa91a56bd7af 100644 ---- a/include/net/inet_common.h -+++ b/include/net/inet_common.h -@@ -2,6 +2,7 @@ - #ifndef _INET_COMMON_H - #define _INET_COMMON_H - -+#include - #include - - extern const struct proto_ops inet_stream_ops; -@@ -16,6 +17,8 @@ struct sock; - struct sockaddr; - struct socket; - -+int inet_create(struct net *net, struct socket *sock, int protocol, int kern); -+int inet6_create(struct net *net, struct socket *sock, int protocol, int kern); - int inet_release(struct socket *sock); - int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags); -diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h -index 13792c0ef46e..e99cc510610f 100644 ---- a/include/net/inet_connection_sock.h -+++ b/include/net/inet_connection_sock.h -@@ -25,6 +25,7 @@ - - struct inet_bind_bucket; - struct tcp_congestion_ops; -+struct tcp_options_received; - - /* - * Pointers to address related TCP functions -diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h -index 34c4436fd18f..828f79528b32 100644 ---- a/include/net/inet_sock.h -+++ b/include/net/inet_sock.h -@@ -79,7 +79,7 @@ struct inet_request_sock { - #define ireq_state req.__req_common.skc_state - #define ireq_family req.__req_common.skc_family - -- u16 snd_wscale : 4, -+ u32 snd_wscale : 4, - rcv_wscale : 4, - tstamp_ok : 1, - sack_ok : 1, -@@ -87,6 +87,8 @@ struct inet_request_sock { - ecn_ok : 1, - acked : 1, - no_srccheck: 1, -+ mptcp_rqsk : 1, -+ saw_mpc : 1, - smc_ok : 1; - u32 ir_mark; - union { -diff --git a/include/net/mptcp.h b/include/net/mptcp.h -new file mode 100644 -index 000000000000..196b8939cbab ---- /dev/null -+++ b/include/net/mptcp.h -@@ -0,0 +1,1577 @@ -+/* -+ * MPTCP implementation -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#ifndef _MPTCP_H -+#define _MPTCP_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ #define ntohll(x) be64_to_cpu(x) -+ #define htonll(x) cpu_to_be64(x) -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ #define ntohll(x) (x) -+ #define htonll(x) (x) -+#endif -+ -+struct mptcp_loc4 { -+ u8 loc4_id; -+ u8 low_prio:1; -+ int if_idx; -+ struct in_addr addr; -+}; -+ -+struct mptcp_rem4 { -+ u8 rem4_id; -+ __be16 port; -+ struct in_addr addr; -+}; -+ -+struct mptcp_loc6 { -+ u8 loc6_id; -+ u8 low_prio:1; -+ int if_idx; -+ struct in6_addr addr; -+}; -+ -+struct mptcp_rem6 { -+ u8 rem6_id; -+ __be16 port; -+ struct in6_addr addr; -+}; -+ -+struct mptcp_request_sock { -+ struct tcp_request_sock req; -+ struct hlist_nulls_node hash_entry; -+ -+ union { -+ struct { -+ /* Only on initial subflows */ -+ u64 mptcp_loc_key; -+ u64 mptcp_rem_key; -+ u32 mptcp_loc_token; -+ }; -+ -+ struct { -+ /* Only on additional subflows */ -+ u32 mptcp_rem_nonce; -+ u32 mptcp_loc_nonce; -+ u64 mptcp_hash_tmac; -+ }; -+ }; -+ -+ u8 loc_id; -+ u8 rem_id; /* Address-id in the MP_JOIN */ -+ u16 dss_csum:1, -+ rem_key_set:1, -+ is_sub:1, /* Is this a new subflow? */ -+ low_prio:1, /* Interface set to low-prio? */ -+ rcv_low_prio:1, -+ mptcp_ver:4; -+}; -+ -+struct mptcp_options_received { -+ u16 saw_mpc:1, -+ dss_csum:1, -+ drop_me:1, -+ -+ is_mp_join:1, -+ join_ack:1, -+ -+ saw_low_prio:2, /* 0x1 - low-prio set for this subflow -+ * 0x2 - low-prio set for another subflow -+ */ -+ low_prio:1, -+ -+ saw_add_addr:2, /* Saw at least one add_addr option: -+ * 0x1: IPv4 - 0x2: IPv6 -+ */ -+ more_add_addr:1, /* Saw one more add-addr. */ -+ -+ saw_rem_addr:1, /* Saw at least one rem_addr option */ -+ more_rem_addr:1, /* Saw one more rem-addr. */ -+ -+ mp_fail:1, -+ mp_fclose:1; -+ u8 rem_id; /* Address-id in the MP_JOIN */ -+ u8 prio_addr_id; /* Address-id in the MP_PRIO */ -+ -+ const unsigned char *add_addr_ptr; /* Pointer to add-address option */ -+ const unsigned char *rem_addr_ptr; /* Pointer to rem-address option */ -+ -+ u32 data_ack; -+ u32 data_seq; -+ u16 data_len; -+ -+ u8 mptcp_ver; /* MPTCP version */ -+ -+ /* Key inside the option (from mp_capable or fast_close) */ -+ u64 mptcp_sender_key; -+ u64 mptcp_receiver_key; -+ -+ u32 mptcp_rem_token; /* Remote token */ -+ -+ u32 mptcp_recv_nonce; -+ u64 mptcp_recv_tmac; -+ u8 mptcp_recv_mac[20]; -+}; -+ -+struct mptcp_tcp_sock { -+ struct hlist_node node; -+ struct hlist_node cb_list; -+ struct mptcp_options_received rx_opt; -+ -+ /* Those three fields record the current mapping */ -+ u64 map_data_seq; -+ u32 map_subseq; -+ u16 map_data_len; -+ u16 slave_sk:1, -+ fully_established:1, -+ second_packet:1, -+ attached:1, -+ send_mp_fail:1, -+ include_mpc:1, -+ mapping_present:1, -+ map_data_fin:1, -+ low_prio:1, /* use this socket as backup */ -+ rcv_low_prio:1, /* Peer sent low-prio option to us */ -+ send_mp_prio:1, /* Trigger to send mp_prio on this socket */ -+ pre_established:1; /* State between sending 3rd ACK and -+ * receiving the fourth ack of new subflows. -+ */ -+ -+ /* isn: needed to translate abs to relative subflow seqnums */ -+ u32 snt_isn; -+ u32 rcv_isn; -+ u8 path_index; -+ u8 loc_id; -+ u8 rem_id; -+ u8 sk_err; -+ -+#define MPTCP_SCHED_SIZE 16 -+ u8 mptcp_sched[MPTCP_SCHED_SIZE] __aligned(8); -+ -+ int init_rcv_wnd; -+ u32 infinite_cutoff_seq; -+ struct delayed_work work; -+ u32 mptcp_loc_nonce; -+ struct tcp_sock *tp; -+ u32 last_end_data_seq; -+ -+ /* MP_JOIN subflow: timer for retransmitting the 3rd ack */ -+ struct timer_list mptcp_ack_timer; -+ -+ /* HMAC of the third ack */ -+ char sender_mac[SHA256_DIGEST_SIZE]; -+}; -+ -+struct mptcp_tw { -+ struct list_head list; -+ u64 loc_key; -+ u64 rcv_nxt; -+ struct mptcp_cb __rcu *mpcb; -+ u8 meta_tw:1, -+ in_list:1; -+}; -+ -+#define MPTCP_PM_NAME_MAX 16 -+struct mptcp_pm_ops { -+ struct list_head list; -+ -+ /* Signal the creation of a new MPTCP-session. */ -+ void (*new_session)(const struct sock *meta_sk); -+ void (*release_sock)(struct sock *meta_sk); -+ void (*fully_established)(struct sock *meta_sk); -+ void (*close_session)(struct sock *meta_sk); -+ void (*new_remote_address)(struct sock *meta_sk); -+ int (*get_local_id)(const struct sock *meta_sk, sa_family_t family, -+ union inet_addr *addr, bool *low_prio); -+ void (*addr_signal)(struct sock *sk, unsigned *size, -+ struct tcp_out_options *opts, struct sk_buff *skb); -+ void (*add_raddr)(struct mptcp_cb *mpcb, const union inet_addr *addr, -+ sa_family_t family, __be16 port, u8 id); -+ void (*rem_raddr)(struct mptcp_cb *mpcb, u8 rem_id); -+ void (*init_subsocket_v4)(struct sock *sk, struct in_addr addr); -+ void (*init_subsocket_v6)(struct sock *sk, struct in6_addr addr); -+ void (*established_subflow)(struct sock *sk); -+ void (*delete_subflow)(struct sock *sk); -+ void (*prio_changed)(struct sock *sk, int low_prio); -+ -+ char name[MPTCP_PM_NAME_MAX]; -+ struct module *owner; -+}; -+ -+struct mptcp_sched_ops { -+ struct list_head list; -+ -+ struct sock * (*get_subflow)(struct sock *meta_sk, -+ struct sk_buff *skb, -+ bool zero_wnd_test); -+ struct sk_buff * (*next_segment)(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit); -+ void (*init)(struct sock *sk); -+ void (*release)(struct sock *sk); -+ -+ char name[MPTCP_SCHED_NAME_MAX]; -+ struct module *owner; -+}; -+ -+struct mptcp_cb { -+ /* list of sockets in this multipath connection */ -+ struct hlist_head conn_list; -+ /* list of sockets that need a call to release_cb */ -+ struct hlist_head callback_list; -+ -+ /* Lock used for protecting the different rcu-lists of mptcp_cb */ -+ spinlock_t mpcb_list_lock; -+ -+ /* High-order bits of 64-bit sequence numbers */ -+ u32 snd_high_order[2]; -+ u32 rcv_high_order[2]; -+ -+ u16 send_infinite_mapping:1, -+ send_mptcpv1_mpcapable:1, -+ rem_key_set:1, -+ in_time_wait:1, -+ list_rcvd:1, /* XXX TO REMOVE */ -+ addr_signal:1, /* Path-manager wants us to call addr_signal */ -+ dss_csum:1, -+ server_side:1, -+ infinite_mapping_rcv:1, -+ infinite_mapping_snd:1, -+ dfin_combined:1, /* Was the DFIN combined with subflow-fin? */ -+ passive_close:1, -+ snd_hiseq_index:1, /* Index in snd_high_order of snd_nxt */ -+ rcv_hiseq_index:1, /* Index in rcv_high_order of rcv_nxt */ -+ tcp_ca_explicit_set:1; /* was meta CC set by app? */ -+ -+#define MPTCP_SCHED_DATA_SIZE 8 -+ u8 mptcp_sched[MPTCP_SCHED_DATA_SIZE] __aligned(8); -+ const struct mptcp_sched_ops *sched_ops; -+ -+ struct sk_buff_head reinject_queue; -+ /* First cache-line boundary is here minus 8 bytes. But from the -+ * reinject-queue only the next and prev pointers are regularly -+ * accessed. Thus, the whole data-path is on a single cache-line. -+ */ -+ -+ u64 csum_cutoff_seq; -+ u64 infinite_rcv_seq; -+ -+ /***** Start of fields, used for connection closure */ -+ unsigned char mptw_state; -+ u8 dfin_path_index; -+ -+ struct list_head tw_list; -+ -+ /***** Start of fields, used for subflow establishment and closure */ -+ refcount_t mpcb_refcnt; -+ -+ /* Mutex needed, because otherwise mptcp_close will complain that the -+ * socket is owned by the user. -+ * E.g., mptcp_sub_close_wq is taking the meta-lock. -+ */ -+ struct mutex mpcb_mutex; -+ -+ /***** Start of fields, used for subflow establishment */ -+ struct sock *meta_sk; -+ -+ /* Master socket, also part of the conn_list, this -+ * socket is the one that the application sees. -+ */ -+ struct sock *master_sk; -+ -+ __u64 mptcp_loc_key; -+ __u64 mptcp_rem_key; -+ __u32 mptcp_loc_token; -+ __u32 mptcp_rem_token; -+ -+#define MPTCP_PM_SIZE 608 -+ u8 mptcp_pm[MPTCP_PM_SIZE] __aligned(8); -+ const struct mptcp_pm_ops *pm_ops; -+ -+ unsigned long path_index_bits; -+ -+ __u8 mptcp_ver; -+ -+ /* Original snd/rcvbuf of the initial subflow. -+ * Used for the new subflows on the server-side to allow correct -+ * autotuning -+ */ -+ int orig_sk_rcvbuf; -+ int orig_sk_sndbuf; -+ u32 orig_window_clamp; -+ -+ struct tcp_info *master_info; -+}; -+ -+#define MPTCP_VERSION_0 0 -+#define MPTCP_VERSION_1 1 -+ -+#define MPTCP_SUB_CAPABLE 0 -+#define MPTCP_SUB_LEN_CAPABLE_SYN 12 -+#define MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN 12 -+#define MPTCP_SUB_LEN_CAPABLE_ACK 20 -+#define MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN 20 -+ -+#define MPTCPV1_SUB_LEN_CAPABLE_SYN 4 -+#define MPTCPV1_SUB_LEN_CAPABLE_SYN_ALIGN 4 -+#define MPTCPV1_SUB_LEN_CAPABLE_SYNACK 12 -+#define MPTCPV1_SUB_LEN_CAPABLE_SYNACK_ALIGN 12 -+#define MPTCPV1_SUB_LEN_CAPABLE_ACK 20 -+#define MPTCPV1_SUB_LEN_CAPABLE_ACK_ALIGN 20 -+#define MPTCPV1_SUB_LEN_CAPABLE_DATA 22 -+#define MPTCPV1_SUB_LEN_CAPABLE_DATA_CSUM 24 -+#define MPTCPV1_SUB_LEN_CAPABLE_DATA_ALIGN 24 -+ -+#define MPTCP_SUB_JOIN 1 -+#define MPTCP_SUB_LEN_JOIN_SYN 12 -+#define MPTCP_SUB_LEN_JOIN_SYN_ALIGN 12 -+#define MPTCP_SUB_LEN_JOIN_SYNACK 16 -+#define MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN 16 -+#define MPTCP_SUB_LEN_JOIN_ACK 24 -+#define MPTCP_SUB_LEN_JOIN_ACK_ALIGN 24 -+ -+#define MPTCP_SUB_DSS 2 -+#define MPTCP_SUB_LEN_DSS 4 -+#define MPTCP_SUB_LEN_DSS_ALIGN 4 -+ -+/* Lengths for seq and ack are the ones without the generic MPTCP-option header, -+ * as they are part of the DSS-option. -+ * To get the total length, just add the different options together. -+ */ -+#define MPTCP_SUB_LEN_SEQ 10 -+#define MPTCP_SUB_LEN_SEQ_CSUM 12 -+#define MPTCP_SUB_LEN_SEQ_ALIGN 12 -+ -+#define MPTCP_SUB_LEN_SEQ_64 14 -+#define MPTCP_SUB_LEN_SEQ_CSUM_64 16 -+#define MPTCP_SUB_LEN_SEQ_64_ALIGN 16 -+ -+#define MPTCP_SUB_LEN_ACK 4 -+#define MPTCP_SUB_LEN_ACK_ALIGN 4 -+ -+#define MPTCP_SUB_LEN_ACK_64 8 -+#define MPTCP_SUB_LEN_ACK_64_ALIGN 8 -+ -+/* This is the "default" option-length we will send out most often. -+ * MPTCP DSS-header -+ * 32-bit data sequence number -+ * 32-bit data ack -+ * -+ * It is necessary to calculate the effective MSS we will be using when -+ * sending data. -+ */ -+#define MPTCP_SUB_LEN_DSM_ALIGN (MPTCP_SUB_LEN_DSS_ALIGN + \ -+ MPTCP_SUB_LEN_SEQ_ALIGN + \ -+ MPTCP_SUB_LEN_ACK_ALIGN) -+ -+#define MPTCP_SUB_ADD_ADDR 3 -+#define MPTCP_SUB_LEN_ADD_ADDR4 8 -+#define MPTCP_SUB_LEN_ADD_ADDR4_VER1 16 -+#define MPTCP_SUB_LEN_ADD_ADDR6 20 -+#define MPTCP_SUB_LEN_ADD_ADDR6_VER1 28 -+#define MPTCP_SUB_LEN_ADD_ADDR4_ALIGN 8 -+#define MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1 16 -+#define MPTCP_SUB_LEN_ADD_ADDR6_ALIGN 20 -+#define MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1 28 -+ -+#define MPTCP_SUB_REMOVE_ADDR 4 -+#define MPTCP_SUB_LEN_REMOVE_ADDR 4 -+ -+#define MPTCP_SUB_PRIO 5 -+#define MPTCP_SUB_LEN_PRIO 3 -+#define MPTCP_SUB_LEN_PRIO_ADDR 4 -+#define MPTCP_SUB_LEN_PRIO_ALIGN 4 -+ -+#define MPTCP_SUB_FAIL 6 -+#define MPTCP_SUB_LEN_FAIL 12 -+#define MPTCP_SUB_LEN_FAIL_ALIGN 12 -+ -+#define MPTCP_SUB_FCLOSE 7 -+#define MPTCP_SUB_LEN_FCLOSE 12 -+#define MPTCP_SUB_LEN_FCLOSE_ALIGN 12 -+ -+ -+#define OPTION_MPTCP (1 << 5) -+ -+/* Max number of fastclose retransmissions */ -+#define MPTCP_FASTCLOSE_RETRIES 3 -+ -+#ifdef CONFIG_MPTCP -+ -+/* Used for checking if the mptcp initialization has been successful */ -+extern bool mptcp_init_failed; -+ -+/* MPTCP options */ -+#define OPTION_TYPE_SYN (1 << 0) -+#define OPTION_TYPE_SYNACK (1 << 1) -+#define OPTION_TYPE_ACK (1 << 2) -+#define OPTION_MP_CAPABLE (1 << 3) -+#define OPTION_DATA_ACK (1 << 4) -+#define OPTION_ADD_ADDR (1 << 5) -+#define OPTION_MP_JOIN (1 << 6) -+#define OPTION_MP_FAIL (1 << 7) -+#define OPTION_MP_FCLOSE (1 << 8) -+#define OPTION_REMOVE_ADDR (1 << 9) -+#define OPTION_MP_PRIO (1 << 10) -+ -+/* MPTCP flags: both TX and RX */ -+#define MPTCPHDR_SEQ 0x01 /* DSS.M option is present */ -+#define MPTCPHDR_FIN 0x02 /* DSS.F option is present */ -+#define MPTCPHDR_SEQ64_INDEX 0x04 /* index of seq in mpcb->snd_high_order */ -+#define MPTCPHDR_MPC_DATA 0x08 -+/* MPTCP flags: RX only */ -+#define MPTCPHDR_ACK 0x10 -+#define MPTCPHDR_SEQ64_SET 0x20 /* Did we received a 64-bit seq number? */ -+#define MPTCPHDR_SEQ64_OFO 0x40 /* Is it not in our circular array? */ -+#define MPTCPHDR_DSS_CSUM 0x80 -+/* MPTCP flags: TX only */ -+#define MPTCPHDR_INF 0x10 -+#define MPTCP_REINJECT 0x20 /* Did we reinject this segment? */ -+ -+struct mptcp_option { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 ver:4, -+ sub:4; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u8 sub:4, -+ ver:4; -+#else -+#error "Adjust your defines" -+#endif -+}; -+ -+struct mp_capable { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 ver:4, -+ sub:4; -+ __u8 h:1, -+ rsv:5, -+ b:1, -+ a:1; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u8 sub:4, -+ ver:4; -+ __u8 a:1, -+ b:1, -+ rsv:5, -+ h:1; -+#else -+#error "Adjust your defines" -+#endif -+ __u64 sender_key; -+ __u64 receiver_key; -+} __attribute__((__packed__)); -+ -+struct mp_join { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 b:1, -+ rsv:3, -+ sub:4; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u8 sub:4, -+ rsv:3, -+ b:1; -+#else -+#error "Adjust your defines" -+#endif -+ __u8 addr_id; -+ union { -+ struct { -+ u32 token; -+ u32 nonce; -+ } syn; -+ struct { -+ __u64 mac; -+ u32 nonce; -+ } synack; -+ struct { -+ __u8 mac[20]; -+ } ack; -+ } u; -+} __attribute__((__packed__)); -+ -+struct mp_dss { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u16 rsv1:4, -+ sub:4, -+ A:1, -+ a:1, -+ M:1, -+ m:1, -+ F:1, -+ rsv2:3; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u16 sub:4, -+ rsv1:4, -+ rsv2:3, -+ F:1, -+ m:1, -+ M:1, -+ a:1, -+ A:1; -+#else -+#error "Adjust your defines" -+#endif -+}; -+ -+struct mp_add_addr { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ union { -+ struct { -+ __u8 ipver:4, -+ sub:4; -+ } v0; -+ struct { -+ __u8 echo:1, -+ rsv:3, -+ sub:4; -+ } v1; -+ } u_bit; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ union { -+ struct { -+ __u8 sub:4, -+ ipver:4; -+ } v0; -+ struct { -+ __u8 sub:4, -+ rsv:3, -+ echo:1; -+ } v1; -+ } u_bit; -+#else -+#error "Adjust your defines" -+#endif -+ __u8 addr_id; -+ union { -+ struct { -+ struct in_addr addr; -+ __be16 port; -+ __u8 mac[8]; -+ } v4; -+ struct { -+ struct in6_addr addr; -+ __be16 port; -+ __u8 mac[8]; -+ } v6; -+ } u; -+} __attribute__((__packed__)); -+ -+struct mp_remove_addr { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 rsv:4, -+ sub:4; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u8 sub:4, -+ rsv:4; -+#else -+#error "Adjust your defines" -+#endif -+ /* list of addr_id */ -+ __u8 addrs_id; -+}; -+ -+struct mp_fail { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u16 rsv1:4, -+ sub:4, -+ rsv2:8; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u16 sub:4, -+ rsv1:4, -+ rsv2:8; -+#else -+#error "Adjust your defines" -+#endif -+ __be64 data_seq; -+} __attribute__((__packed__)); -+ -+struct mp_fclose { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u16 rsv1:4, -+ sub:4, -+ rsv2:8; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u16 sub:4, -+ rsv1:4, -+ rsv2:8; -+#else -+#error "Adjust your defines" -+#endif -+ __u64 key; -+} __attribute__((__packed__)); -+ -+struct mp_prio { -+ __u8 kind; -+ __u8 len; -+#if defined(__LITTLE_ENDIAN_BITFIELD) -+ __u8 b:1, -+ rsv:3, -+ sub:4; -+#elif defined(__BIG_ENDIAN_BITFIELD) -+ __u8 sub:4, -+ rsv:3, -+ b:1; -+#else -+#error "Adjust your defines" -+#endif -+ __u8 addr_id; -+} __attribute__((__packed__)); -+ -+struct mptcp_hashtable { -+ struct hlist_nulls_head *hashtable; -+ unsigned int mask; -+}; -+ -+static inline int mptcp_sub_len_dss(const struct mp_dss *m, const int csum) -+{ -+ return 4 + m->A * (4 + m->a * 4) + m->M * (10 + m->m * 4 + csum * 2); -+} -+ -+#define MPTCP_ENABLE 0x01 -+#define MPTCP_SOCKOPT 0x02 -+#define MPTCP_CLIENT_DISABLE 0x04 -+#define MPTCP_SERVER_DISABLE 0x08 -+ -+extern int sysctl_mptcp_enabled; -+extern int sysctl_mptcp_version; -+extern int sysctl_mptcp_checksum; -+extern int sysctl_mptcp_debug; -+extern int sysctl_mptcp_syn_retries; -+ -+extern struct workqueue_struct *mptcp_wq; -+ -+#define mptcp_debug(fmt, args...) \ -+ do { \ -+ if (unlikely(sysctl_mptcp_debug)) \ -+ pr_err(fmt, ##args); \ -+ } while (0) -+ -+static inline struct sock *mptcp_to_sock(const struct mptcp_tcp_sock *mptcp) -+{ -+ return (struct sock *)mptcp->tp; -+} -+ -+#define mptcp_for_each_sub(__mpcb, __mptcp) \ -+ hlist_for_each_entry_rcu(__mptcp, &((__mpcb)->conn_list), node) -+ -+/* Must be called with the appropriate lock held */ -+#define mptcp_for_each_sub_safe(__mpcb, __mptcp, __tmp) \ -+ hlist_for_each_entry_safe(__mptcp, __tmp, &((__mpcb)->conn_list), node) -+ -+/* Iterates over all bit set to 1 in a bitset */ -+#define mptcp_for_each_bit_set(b, i) \ -+ for (i = ffs(b) - 1; i >= 0; i = ffs(b >> (i + 1) << (i + 1)) - 1) -+ -+#define mptcp_for_each_bit_unset(b, i) \ -+ mptcp_for_each_bit_set(~b, i) -+ -+#define MPTCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mptcp.mptcp_statistics, field) -+#define MPTCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mptcp.mptcp_statistics, field) -+ -+enum -+{ -+ MPTCP_MIB_NUM = 0, -+ MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */ -+ MPTCP_MIB_MPCAPABLEACTIVE, /* Sent SYN with MP_CAPABLE */ -+ MPTCP_MIB_MPCAPABLEACTIVEACK, /* Received SYN/ACK with MP_CAPABLE */ -+ MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */ -+ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */ -+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ -+ MPTCP_MIB_MPCAPABLERETRANSFALLBACK,/* Client-side stopped sending MP_CAPABLE after too many SYN-retransmissions */ -+ MPTCP_MIB_CSUMENABLED, /* Created MPTCP-connection with DSS-checksum enabled */ -+ MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ -+ MPTCP_MIB_MPFAILRX, /* Received an MP_FAIL */ -+ MPTCP_MIB_CSUMFAIL, /* Received segment with invalid checksum */ -+ MPTCP_MIB_FASTCLOSERX, /* Recevied a FAST_CLOSE */ -+ MPTCP_MIB_FASTCLOSETX, /* Sent a FAST_CLOSE */ -+ MPTCP_MIB_FBACKSUB, /* Fallback upon ack without data-ack on new subflow */ -+ MPTCP_MIB_FBACKINIT, /* Fallback upon ack without data-ack on initial subflow */ -+ MPTCP_MIB_FBDATASUB, /* Fallback upon data without DSS at the beginning on new subflow */ -+ MPTCP_MIB_FBDATAINIT, /* Fallback upon data without DSS at the beginning on initial subflow */ -+ MPTCP_MIB_REMADDRSUB, /* Remove subflow due to REMOVE_ADDR */ -+ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ -+ MPTCP_MIB_JOINFALLBACK, /* Received MP_JOIN on session that has fallen back to reg. TCP */ -+ MPTCP_MIB_JOINSYNTX, /* Sent a SYN + MP_JOIN */ -+ MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */ -+ MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */ -+ MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ -+ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ -+ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ -+ MPTCP_MIB_JOINACKFAIL, /* Third ACK on new subflow did not contain an MP_JOIN */ -+ MPTCP_MIB_JOINACKRTO, /* Retransmission timer for third ACK + MP_JOIN timed out */ -+ MPTCP_MIB_JOINACKRXMIT, /* Retransmitted an ACK + MP_JOIN */ -+ MPTCP_MIB_NODSSWINDOW, /* Received too many packets without a DSS-option */ -+ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ -+ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ -+ MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */ -+ MPTCP_MIB_DSSTRIMHEAD, /* Trimmed segment at the head (coalescing middlebox) */ -+ MPTCP_MIB_DSSSPLITTAIL, /* Trimmed segment at the tail (coalescing middlebox) */ -+ MPTCP_MIB_PURGEOLD, /* Removed old skb from the rcv-queue due to missing DSS-mapping */ -+ MPTCP_MIB_ADDADDRRX, /* Received an ADD_ADDR */ -+ MPTCP_MIB_ADDADDRTX, /* Sent an ADD_ADDR */ -+ MPTCP_MIB_REMADDRRX, /* Received a REMOVE_ADDR */ -+ MPTCP_MIB_REMADDRTX, /* Sent a REMOVE_ADDR */ -+ MPTCP_MIB_JOINALTERNATEPORT, /* Established a subflow on a different destination port-number */ -+ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ -+ __MPTCP_MIB_MAX -+}; -+ -+#define MPTCP_MIB_MAX __MPTCP_MIB_MAX -+struct mptcp_mib { -+ unsigned long mibs[MPTCP_MIB_MAX]; -+}; -+ -+extern struct lock_class_key meta_key; -+extern char *meta_key_name; -+extern struct lock_class_key meta_slock_key; -+extern char *meta_slock_key_name; -+ -+extern siphash_key_t mptcp_secret; -+ -+/* This is needed to ensure that two subsequent key/nonce-generation result in -+ * different keys/nonces if the IPs and ports are the same. -+ */ -+extern u32 mptcp_seed; -+ -+extern struct mptcp_hashtable mptcp_tk_htable; -+ -+/* Request-sockets can be hashed in the tk_htb for collision-detection or in -+ * the regular htb for join-connections. We need to define different NULLS -+ * values so that we can correctly detect a request-socket that has been -+ * recycled. See also c25eb3bfb9729. -+ */ -+#define MPTCP_REQSK_NULLS_BASE (1U << 29) -+ -+ -+void mptcp_data_ready(struct sock *sk); -+void mptcp_write_space(struct sock *sk); -+ -+void mptcp_add_meta_ofo_queue(const struct sock *meta_sk, struct sk_buff *skb, -+ struct sock *sk); -+void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied); -+int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id, -+ gfp_t flags); -+void mptcp_del_sock(struct sock *sk); -+void mptcp_update_metasocket(const struct sock *meta_sk); -+void mptcp_reinject_data(struct sock *orig_sk, int clone_it); -+void mptcp_update_sndbuf(const struct tcp_sock *tp); -+void mptcp_send_fin(struct sock *meta_sk); -+void mptcp_send_active_reset(struct sock *meta_sk, gfp_t priority); -+bool mptcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, -+ int push_one, gfp_t gfp); -+void tcp_parse_mptcp_options(const struct sk_buff *skb, -+ struct mptcp_options_received *mopt); -+void mptcp_parse_options(const uint8_t *ptr, int opsize, -+ struct mptcp_options_received *mopt, -+ const struct sk_buff *skb, -+ struct tcp_sock *tp); -+void mptcp_syn_options(const struct sock *sk, struct tcp_out_options *opts, -+ unsigned *remaining); -+void mptcp_synack_options(struct request_sock *req, -+ struct tcp_out_options *opts, -+ unsigned *remaining); -+void mptcp_established_options(struct sock *sk, struct sk_buff *skb, -+ struct tcp_out_options *opts, unsigned *size); -+void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp, -+ const struct tcp_out_options *opts, -+ struct sk_buff *skb); -+void mptcp_close(struct sock *meta_sk, long timeout); -+bool mptcp_doit(struct sock *sk); -+int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, -+ int rem_key_set, __u8 mptcp_ver, u32 window); -+int mptcp_check_req_fastopen(struct sock *child, struct request_sock *req); -+int mptcp_check_req_master(struct sock *sk, struct sock *child, -+ struct request_sock *req, const struct sk_buff *skb, -+ const struct mptcp_options_received *mopt, -+ int drop, u32 tsoff); -+struct sock *mptcp_check_req_child(struct sock *meta_sk, -+ struct sock *child, -+ struct request_sock *req, -+ struct sk_buff *skb, -+ const struct mptcp_options_received *mopt); -+u32 __mptcp_select_window(struct sock *sk); -+void mptcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, -+ __u32 *rcv_wnd, __u32 *window_clamp, -+ int wscale_ok, __u8 *rcv_wscale, -+ __u32 init_rcv_wnd); -+unsigned int mptcp_current_mss(struct sock *meta_sk); -+void mptcp_hmac(u8 ver, const u8 *key_1, const u8 *key_2, u8 *hash_out, -+ int arg_num, ...); -+void mptcp_clean_rtx_infinite(const struct sk_buff *skb, struct sock *sk); -+void mptcp_fin(struct sock *meta_sk); -+void mptcp_meta_retransmit_timer(struct sock *meta_sk); -+void mptcp_sub_retransmit_timer(struct sock *sk); -+int mptcp_write_wakeup(struct sock *meta_sk, int mib); -+void mptcp_sub_close_wq(struct work_struct *work); -+void mptcp_sub_close(struct sock *sk, unsigned long delay); -+struct sock *mptcp_select_ack_sock(const struct sock *meta_sk); -+void mptcp_prepare_for_backlog(struct sock *sk, struct sk_buff *skb); -+void mptcp_initialize_recv_vars(struct tcp_sock *meta_tp, struct mptcp_cb *mpcb, -+ __u64 remote_key); -+int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb); -+void mptcp_ack_handler(struct timer_list *t); -+bool mptcp_check_rtt(const struct tcp_sock *tp, int time); -+int mptcp_check_snd_buf(const struct tcp_sock *tp); -+bool mptcp_handle_options(struct sock *sk, const struct tcphdr *th, -+ const struct sk_buff *skb); -+void __init mptcp_init(void); -+void mptcp_destroy_sock(struct sock *sk); -+int mptcp_rcv_synsent_state_process(struct sock *sk, struct sock **skptr, -+ const struct sk_buff *skb, -+ const struct mptcp_options_received *mopt); -+unsigned int mptcp_xmit_size_goal(const struct sock *meta_sk, u32 mss_now, -+ int large_allowed); -+int mptcp_init_tw_sock(struct sock *sk, struct tcp_timewait_sock *tw); -+void mptcp_twsk_destructor(struct tcp_timewait_sock *tw); -+void mptcp_time_wait(struct sock *sk, int state, int timeo); -+void mptcp_disconnect(struct sock *meta_sk); -+bool mptcp_should_expand_sndbuf(const struct sock *sk); -+int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb); -+void mptcp_tsq_flags(struct sock *sk); -+void mptcp_tsq_sub_deferred(struct sock *meta_sk); -+struct mp_join *mptcp_find_join(const struct sk_buff *skb); -+void mptcp_hash_remove_bh(struct tcp_sock *meta_tp); -+struct sock *mptcp_hash_find(const struct net *net, const u32 token); -+int mptcp_lookup_join(struct sk_buff *skb, struct inet_timewait_sock *tw); -+int mptcp_do_join_short(struct sk_buff *skb, -+ const struct mptcp_options_received *mopt, -+ struct net *net); -+void mptcp_reqsk_destructor(struct request_sock *req); -+void mptcp_connect_init(struct sock *sk); -+void mptcp_sub_force_close(struct sock *sk); -+int mptcp_sub_len_remove_addr_align(u16 bitfield); -+void mptcp_join_reqsk_init(const struct mptcp_cb *mpcb, -+ const struct request_sock *req, -+ struct sk_buff *skb); -+void mptcp_reqsk_init(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, bool want_cookie); -+int mptcp_conn_request(struct sock *sk, struct sk_buff *skb); -+void mptcp_enable_sock(struct sock *sk); -+void mptcp_disable_sock(struct sock *sk); -+void mptcp_disable_static_key(void); -+void mptcp_cookies_reqsk_init(struct request_sock *req, -+ struct mptcp_options_received *mopt, -+ struct sk_buff *skb); -+void mptcp_mpcb_put(struct mptcp_cb *mpcb); -+int mptcp_finish_handshake(struct sock *child, struct sk_buff *skb); -+int mptcp_get_info(const struct sock *meta_sk, char __user *optval, int optlen); -+void mptcp_clear_sk(struct sock *sk, int size); -+ -+/* MPTCP-path-manager registration/initialization functions */ -+int mptcp_register_path_manager(struct mptcp_pm_ops *pm); -+void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm); -+void mptcp_init_path_manager(struct mptcp_cb *mpcb); -+void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb); -+void mptcp_fallback_default(struct mptcp_cb *mpcb); -+void mptcp_get_default_path_manager(char *name); -+int mptcp_set_scheduler(struct sock *sk, const char *name); -+int mptcp_set_path_manager(struct sock *sk, const char *name); -+int mptcp_set_default_path_manager(const char *name); -+extern struct mptcp_pm_ops mptcp_pm_default; -+ -+/* MPTCP-scheduler registration/initialization functions */ -+int mptcp_register_scheduler(struct mptcp_sched_ops *sched); -+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); -+void mptcp_init_scheduler(struct mptcp_cb *mpcb); -+void mptcp_cleanup_scheduler(struct mptcp_cb *mpcb); -+void mptcp_get_default_scheduler(char *name); -+int mptcp_set_default_scheduler(const char *name); -+bool mptcp_is_available(struct sock *sk, const struct sk_buff *skb, -+ bool zero_wnd_test); -+bool mptcp_is_def_unavailable(struct sock *sk); -+bool subflow_is_active(const struct tcp_sock *tp); -+bool subflow_is_backup(const struct tcp_sock *tp); -+struct sock *get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, -+ bool zero_wnd_test); -+struct sk_buff *mptcp_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit); -+extern struct mptcp_sched_ops mptcp_sched_default; -+ -+/* Initializes function-pointers and MPTCP-flags */ -+static inline void mptcp_init_tcp_sock(struct sock *sk) -+{ -+ if (!mptcp_init_failed && sysctl_mptcp_enabled == MPTCP_ENABLE) -+ mptcp_enable_sock(sk); -+} -+ -+static inline void mptcp_init_listen(struct sock *sk) -+{ -+ if (!mptcp_init_failed && -+ sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP && -+#ifdef CONFIG_TCP_MD5SIG -+ !rcu_access_pointer(tcp_sk(sk)->md5sig_info) && -+#endif -+ sysctl_mptcp_enabled & MPTCP_ENABLE && -+ !(sysctl_mptcp_enabled & MPTCP_SERVER_DISABLE)) -+ mptcp_enable_sock(sk); -+} -+ -+static inline void mptcp_init_connect(struct sock *sk) -+{ -+ if (!mptcp_init_failed && -+ sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP && -+#ifdef CONFIG_TCP_MD5SIG -+ !rcu_access_pointer(tcp_sk(sk)->md5sig_info) && -+#endif -+ sysctl_mptcp_enabled & MPTCP_ENABLE && -+ !(sysctl_mptcp_enabled & MPTCP_CLIENT_DISABLE)) -+ mptcp_enable_sock(sk); -+} -+ -+static inline int mptcp_pi_to_flag(int pi) -+{ -+ return 1 << (pi - 1); -+} -+ -+static inline -+struct mptcp_request_sock *mptcp_rsk(const struct request_sock *req) -+{ -+ return (struct mptcp_request_sock *)req; -+} -+ -+static inline -+struct request_sock *rev_mptcp_rsk(const struct mptcp_request_sock *req) -+{ -+ return (struct request_sock *)req; -+} -+ -+static inline bool mptcp_can_sendpage(struct sock *sk) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ -+ if (tcp_sk(sk)->mpcb->dss_csum) -+ return false; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (!(sk_it->sk_route_caps & NETIF_F_SG)) -+ return false; -+ } -+ -+ return true; -+} -+ -+static inline void mptcp_push_pending_frames(struct sock *meta_sk) -+{ -+ /* We check packets out and send-head here. TCP only checks the -+ * send-head. But, MPTCP also checks packets_out, as this is an -+ * indication that we might want to do opportunistic reinjection. -+ */ -+ if (tcp_sk(meta_sk)->packets_out || tcp_send_head(meta_sk)) { -+ struct tcp_sock *tp = tcp_sk(meta_sk); -+ -+ /* We don't care about the MSS, because it will be set in -+ * mptcp_write_xmit. -+ */ -+ __tcp_push_pending_frames(meta_sk, 0, tp->nonagle); -+ } -+} -+ -+static inline void mptcp_send_reset(struct sock *sk) -+{ -+ if (tcp_need_reset(sk->sk_state)) -+ tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC); -+ mptcp_sub_force_close(sk); -+} -+ -+static inline void mptcp_sub_force_close_all(struct mptcp_cb *mpcb, -+ struct sock *except) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (sk_it != except) -+ mptcp_send_reset(sk_it); -+ } -+} -+ -+static inline bool mptcp_is_data_mpcapable(const struct sk_buff *skb) -+{ -+ return TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_MPC_DATA; -+} -+ -+static inline bool mptcp_is_data_seq(const struct sk_buff *skb) -+{ -+ return TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ; -+} -+ -+static inline bool mptcp_is_data_fin(const struct sk_buff *skb) -+{ -+ return TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_FIN; -+} -+ -+/* Is it a data-fin while in infinite mapping mode? -+ * In infinite mode, a subflow-fin is in fact a data-fin. -+ */ -+static inline bool mptcp_is_data_fin2(const struct sk_buff *skb, -+ const struct tcp_sock *tp) -+{ -+ return mptcp_is_data_fin(skb) || -+ (tp->mpcb->infinite_mapping_rcv && -+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)); -+} -+ -+static inline u8 mptcp_get_64_bit(u64 data_seq, struct mptcp_cb *mpcb) -+{ -+ u64 data_seq_high = (u32)(data_seq >> 32); -+ -+ if (mpcb->rcv_high_order[0] == data_seq_high) -+ return 0; -+ else if (mpcb->rcv_high_order[1] == data_seq_high) -+ return MPTCPHDR_SEQ64_INDEX; -+ else -+ return MPTCPHDR_SEQ64_OFO; -+} -+ -+/* Sets the data_seq and returns pointer to the in-skb field of the data_seq. -+ * If the packet has a 64-bit dseq, the pointer points to the last 32 bits. -+ */ -+static inline __u32 *mptcp_skb_set_data_seq(const struct sk_buff *skb, -+ u32 *data_seq, -+ struct mptcp_cb *mpcb) -+{ -+ __u32 *ptr = (__u32 *)(skb_transport_header(skb) + TCP_SKB_CB(skb)->dss_off); -+ -+ if (TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ64_SET) { -+ u64 data_seq64 = get_unaligned_be64(ptr); -+ -+ if (mpcb) -+ TCP_SKB_CB(skb)->mptcp_flags |= mptcp_get_64_bit(data_seq64, mpcb); -+ -+ *data_seq = (u32)data_seq64; -+ ptr++; -+ } else { -+ *data_seq = get_unaligned_be32(ptr); -+ } -+ -+ return ptr; -+} -+ -+static inline struct sock *mptcp_meta_sk(const struct sock *sk) -+{ -+ return tcp_sk(sk)->meta_sk; -+} -+ -+static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp) -+{ -+ return tcp_sk(tp->meta_sk); -+} -+ -+static inline int is_meta_tp(const struct tcp_sock *tp) -+{ -+ return tp->mpcb && mptcp_meta_tp(tp) == tp; -+} -+ -+static inline int is_meta_sk(const struct sock *sk) -+{ -+ return sk->sk_state != TCP_NEW_SYN_RECV && -+ sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP && -+ mptcp(tcp_sk(sk)) && mptcp_meta_sk(sk) == sk; -+} -+ -+static inline int is_master_tp(const struct tcp_sock *tp) -+{ -+ return !mptcp(tp) || (!tp->mptcp->slave_sk && !is_meta_tp(tp)); -+} -+ -+static inline void mptcp_init_mp_opt(struct mptcp_options_received *mopt) -+{ -+ mopt->saw_mpc = 0; -+ mopt->dss_csum = 0; -+ mopt->drop_me = 0; -+ -+ mopt->is_mp_join = 0; -+ mopt->join_ack = 0; -+ -+ mopt->saw_low_prio = 0; -+ mopt->low_prio = 0; -+ -+ mopt->saw_add_addr = 0; -+ mopt->more_add_addr = 0; -+ -+ mopt->saw_rem_addr = 0; -+ mopt->more_rem_addr = 0; -+ -+ mopt->mp_fail = 0; -+ mopt->mp_fclose = 0; -+} -+ -+static inline void mptcp_reset_mopt(struct tcp_sock *tp) -+{ -+ struct mptcp_options_received *mopt = &tp->mptcp->rx_opt; -+ -+ mopt->saw_low_prio = 0; -+ mopt->saw_add_addr = 0; -+ mopt->more_add_addr = 0; -+ mopt->saw_rem_addr = 0; -+ mopt->more_rem_addr = 0; -+ mopt->join_ack = 0; -+ mopt->mp_fail = 0; -+ mopt->mp_fclose = 0; -+} -+ -+static inline __be32 mptcp_get_highorder_sndbits(const struct sk_buff *skb, -+ const struct mptcp_cb *mpcb) -+{ -+ return htonl(mpcb->snd_high_order[(TCP_SKB_CB(skb)->mptcp_flags & -+ MPTCPHDR_SEQ64_INDEX) ? 1 : 0]); -+} -+ -+static inline u64 mptcp_get_data_seq_64(const struct mptcp_cb *mpcb, int index, -+ u32 data_seq_32) -+{ -+ return ((u64)mpcb->rcv_high_order[index] << 32) | data_seq_32; -+} -+ -+static inline u64 mptcp_get_rcv_nxt_64(const struct tcp_sock *meta_tp) -+{ -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ return mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index, -+ meta_tp->rcv_nxt); -+} -+ -+static inline void mptcp_check_sndseq_wrap(struct tcp_sock *meta_tp, int inc) -+{ -+ if (unlikely(meta_tp->snd_nxt > meta_tp->snd_nxt + inc)) { -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ mpcb->snd_hiseq_index = mpcb->snd_hiseq_index ? 0 : 1; -+ mpcb->snd_high_order[mpcb->snd_hiseq_index] += 2; -+ } -+} -+ -+static inline void mptcp_check_rcvseq_wrap(struct tcp_sock *meta_tp, -+ u32 old_rcv_nxt) -+{ -+ if (unlikely(old_rcv_nxt > meta_tp->rcv_nxt)) { -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ mpcb->rcv_high_order[mpcb->rcv_hiseq_index] += 2; -+ mpcb->rcv_hiseq_index = mpcb->rcv_hiseq_index ? 0 : 1; -+ } -+} -+ -+static inline int mptcp_sk_can_send(const struct sock *sk) -+{ -+ return tcp_passive_fastopen(sk) || -+ ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && -+ !tcp_sk(sk)->mptcp->pre_established); -+} -+ -+static inline int mptcp_sk_can_recv(const struct sock *sk) -+{ -+ return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2); -+} -+ -+static inline int mptcp_sk_can_send_ack(const struct sock *sk) -+{ -+ return !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV | -+ TCPF_CLOSE | TCPF_LISTEN)) && -+ !tcp_sk(sk)->mptcp->pre_established; -+} -+ -+static inline bool mptcp_can_sg(const struct sock *meta_sk) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ -+ if (tcp_sk(meta_sk)->mpcb->dss_csum) -+ return false; -+ -+ mptcp_for_each_sub(tcp_sk(meta_sk)->mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ -+ if (!mptcp_sk_can_send(sk)) -+ continue; -+ if (!(sk->sk_route_caps & NETIF_F_SG)) -+ return false; -+ } -+ return true; -+} -+ -+static inline void mptcp_set_rto(struct sock *sk) -+{ -+ struct inet_connection_sock *micsk = inet_csk(mptcp_meta_sk(sk)); -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_tcp_sock *mptcp; -+ __u32 max_rto = 0; -+ -+ /* We are in recovery-phase on the MPTCP-level. Do not update the -+ * RTO, because this would kill exponential backoff. -+ */ -+ if (micsk->icsk_retransmits) -+ return; -+ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if ((mptcp_sk_can_send(sk_it) || sk_it->sk_state == TCP_SYN_RECV) && -+ inet_csk(sk_it)->icsk_retransmits == 0 && -+ inet_csk(sk_it)->icsk_backoff == 0 && -+ inet_csk(sk_it)->icsk_rto > max_rto) -+ max_rto = inet_csk(sk_it)->icsk_rto; -+ } -+ if (max_rto) { -+ micsk->icsk_rto = max_rto << 1; -+ -+ /* A successfull rto-measurement - reset backoff counter */ -+ micsk->icsk_backoff = 0; -+ } -+} -+ -+static inline void mptcp_sub_close_passive(struct sock *sk) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(meta_sk); -+ -+ /* Only close, if the app did a send-shutdown (passive close), and we -+ * received the data-ack of the data-fin. -+ */ -+ if (tp->mpcb->passive_close && meta_tp->snd_una == meta_tp->write_seq) -+ mptcp_sub_close(sk, 0); -+} -+ -+static inline void mptcp_fallback_close(struct mptcp_cb *mpcb, -+ struct sock *except) -+{ -+ mptcp_sub_force_close_all(mpcb, except); -+ -+ if (mpcb->pm_ops->close_session) -+ mpcb->pm_ops->close_session(mptcp_meta_sk(except)); -+} -+ -+static inline bool mptcp_fallback_infinite(struct sock *sk, int flag) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ -+ /* If data has been acknowleged on the meta-level, fully_established -+ * will have been set before and thus we will not fall back to infinite -+ * mapping. -+ */ -+ if (likely(tp->mptcp->fully_established)) -+ return false; -+ -+ if (!(flag & MPTCP_FLAG_DATA_ACKED)) -+ return false; -+ -+ /* Don't fallback twice ;) */ -+ if (mpcb->infinite_mapping_snd) -+ return false; -+ -+ pr_debug("%s %#x will fallback - pi %d, src %pI4:%u dst %pI4:%u rcv_nxt %u from %pS\n", -+ __func__, mpcb->mptcp_loc_token, tp->mptcp->path_index, -+ &inet_sk(sk)->inet_saddr, ntohs(inet_sk(sk)->inet_sport), -+ &inet_sk(sk)->inet_daddr, ntohs(inet_sk(sk)->inet_dport), -+ tp->rcv_nxt, __builtin_return_address(0)); -+ if (!is_master_tp(tp)) { -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBACKSUB); -+ return true; -+ } -+ -+ mpcb->infinite_mapping_snd = 1; -+ mpcb->infinite_mapping_rcv = 1; -+ mpcb->infinite_rcv_seq = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp)); -+ tp->mptcp->fully_established = 1; -+ -+ mptcp_fallback_close(mpcb, sk); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBACKINIT); -+ -+ return false; -+} -+ -+static inline bool mptcp_v6_is_v4_mapped(const struct sock *sk) -+{ -+ return sk->sk_family == AF_INET6 && -+ ipv6_addr_type(&inet6_sk(sk)->saddr) == IPV6_ADDR_MAPPED; -+} -+ -+/* We are in or are becoming to be in infinite mapping mode */ -+static inline bool mptcp_in_infinite_mapping_weak(const struct mptcp_cb *mpcb) -+{ -+ return mpcb->infinite_mapping_rcv || -+ mpcb->infinite_mapping_snd || -+ mpcb->send_infinite_mapping; -+} -+ -+static inline bool mptcp_can_new_subflow(const struct sock *meta_sk) -+{ -+ /* Has been removed from the tk-table. Thus, no new subflows. -+ * -+ * Check for close-state is necessary, because we may have been closed -+ * without passing by mptcp_close(). -+ * -+ * When falling back, no new subflows are allowed either. -+ */ -+ return meta_sk->sk_state != TCP_CLOSE && -+ tcp_sk(meta_sk)->inside_tk_table && -+ !tcp_sk(meta_sk)->mpcb->infinite_mapping_rcv && -+ !tcp_sk(meta_sk)->mpcb->send_infinite_mapping; -+} -+ -+static inline int mptcp_subflow_count(const struct mptcp_cb *mpcb) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ int i = 0; -+ -+ mptcp_for_each_sub(mpcb, mptcp) -+ i++; -+ -+ return i; -+} -+ -+/* TCP and MPTCP mpc flag-depending functions */ -+u16 mptcp_select_window(struct sock *sk); -+void mptcp_tcp_set_rto(struct sock *sk); -+ -+#else /* CONFIG_MPTCP */ -+#define mptcp_debug(fmt, args...) \ -+ do { \ -+ } while (0) -+ -+static inline struct sock *mptcp_to_sock(const struct mptcp_tcp_sock *mptcp) -+{ -+ return NULL; -+} -+ -+#define mptcp_for_each_sub(__mpcb, __mptcp) \ -+ if (0) -+ -+#define MPTCP_INC_STATS(net, field) \ -+ do { \ -+ } while(0) -+ -+#define MPTCP_DEC_STATS(net, field) \ -+ do { \ -+ } while(0) -+ -+static inline bool mptcp_is_data_fin(const struct sk_buff *skb) -+{ -+ return false; -+} -+static inline bool mptcp_is_data_seq(const struct sk_buff *skb) -+{ -+ return false; -+} -+static inline struct sock *mptcp_meta_sk(const struct sock *sk) -+{ -+ return NULL; -+} -+static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp) -+{ -+ return NULL; -+} -+static inline int is_meta_sk(const struct sock *sk) -+{ -+ return 0; -+} -+static inline int is_master_tp(const struct tcp_sock *tp) -+{ -+ return 0; -+} -+static inline void mptcp_del_sock(const struct sock *sk) {} -+static inline void mptcp_update_metasocket(const struct sock *meta_sk) {} -+static inline void mptcp_reinject_data(struct sock *orig_sk, int clone_it) {} -+static inline void mptcp_update_sndbuf(const struct tcp_sock *tp) {} -+static inline void mptcp_clean_rtx_infinite(const struct sk_buff *skb, -+ const struct sock *sk) {} -+static inline void mptcp_sub_close(struct sock *sk, unsigned long delay) {} -+static inline void mptcp_set_rto(const struct sock *sk) {} -+static inline void mptcp_send_fin(const struct sock *meta_sk) {} -+static inline void mptcp_parse_options(const uint8_t *ptr, const int opsize, -+ struct mptcp_options_received *mopt, -+ const struct sk_buff *skb, -+ const struct tcp_sock *tp) {} -+static inline void mptcp_syn_options(const struct sock *sk, -+ struct tcp_out_options *opts, -+ unsigned *remaining) {} -+static inline void mptcp_synack_options(struct request_sock *req, -+ struct tcp_out_options *opts, -+ unsigned *remaining) {} -+ -+static inline void mptcp_established_options(struct sock *sk, -+ struct sk_buff *skb, -+ struct tcp_out_options *opts, -+ unsigned *size) {} -+static inline void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp, -+ const struct tcp_out_options *opts, -+ struct sk_buff *skb) {} -+static inline void mptcp_close(struct sock *meta_sk, long timeout) {} -+static inline bool mptcp_doit(struct sock *sk) -+{ -+ return false; -+} -+static inline int mptcp_check_req_fastopen(struct sock *child, -+ struct request_sock *req) -+{ -+ return 1; -+} -+static inline int mptcp_check_req_master(const struct sock *sk, -+ const struct sock *child, -+ const struct request_sock *req, -+ const struct sk_buff *skb, -+ const struct mptcp_options_received *mopt, -+ int drop, -+ u32 tsoff) -+{ -+ return 1; -+} -+static inline struct sock *mptcp_check_req_child(const struct sock *meta_sk, -+ const struct sock *child, -+ const struct request_sock *req, -+ struct sk_buff *skb, -+ const struct mptcp_options_received *mopt) -+{ -+ return NULL; -+} -+static inline unsigned int mptcp_current_mss(struct sock *meta_sk) -+{ -+ return 0; -+} -+static inline void mptcp_sub_close_passive(struct sock *sk) {} -+static inline bool mptcp_fallback_infinite(const struct sock *sk, int flag) -+{ -+ return false; -+} -+static inline void mptcp_init_mp_opt(const struct mptcp_options_received *mopt) {} -+static inline void mptcp_prepare_for_backlog(struct sock *sk, struct sk_buff *skb) {} -+static inline bool mptcp_check_rtt(const struct tcp_sock *tp, int time) -+{ -+ return false; -+} -+static inline int mptcp_check_snd_buf(const struct tcp_sock *tp) -+{ -+ return 0; -+} -+static inline void mptcp_push_pending_frames(struct sock *meta_sk) {} -+static inline void mptcp_send_reset(const struct sock *sk) {} -+static inline void mptcp_sub_force_close_all(struct mptcp_cb *mpcb, -+ struct sock *except) {} -+static inline bool mptcp_handle_options(struct sock *sk, -+ const struct tcphdr *th, -+ struct sk_buff *skb) -+{ -+ return false; -+} -+static inline void mptcp_reset_mopt(struct tcp_sock *tp) {} -+static inline void __init mptcp_init(void) {} -+static inline bool mptcp_can_sg(const struct sock *meta_sk) -+{ -+ return false; -+} -+static inline unsigned int mptcp_xmit_size_goal(const struct sock *meta_sk, -+ u32 mss_now, int large_allowed) -+{ -+ return 0; -+} -+static inline void mptcp_destroy_sock(struct sock *sk) {} -+static inline int mptcp_rcv_synsent_state_process(struct sock *sk, -+ struct sock **skptr, -+ struct sk_buff *skb, -+ const struct mptcp_options_received *mopt) -+{ -+ return 0; -+} -+static inline bool mptcp_can_sendpage(struct sock *sk) -+{ -+ return false; -+} -+static inline int mptcp_init_tw_sock(struct sock *sk, -+ struct tcp_timewait_sock *tw) -+{ -+ return 0; -+} -+static inline void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) {} -+static inline void mptcp_disconnect(struct sock *meta_sk) {} -+static inline void mptcp_tsq_flags(struct sock *sk) {} -+static inline void mptcp_tsq_sub_deferred(struct sock *meta_sk) {} -+static inline void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) {} -+static inline void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb, -+ const struct sk_buff *skb) {} -+static inline void mptcp_init_tcp_sock(struct sock *sk) {} -+static inline void mptcp_init_listen(struct sock *sk) {} -+static inline void mptcp_init_connect(struct sock *sk) {} -+static inline void mptcp_disable_static_key(void) {} -+static inline void mptcp_cookies_reqsk_init(struct request_sock *req, -+ struct mptcp_options_received *mopt, -+ struct sk_buff *skb) {} -+static inline void mptcp_mpcb_put(struct mptcp_cb *mpcb) {} -+static inline void mptcp_fin(struct sock *meta_sk) {} -+static inline bool mptcp_in_infinite_mapping_weak(const struct mptcp_cb *mpcb) -+{ -+ return false; -+} -+static inline bool mptcp_can_new_subflow(const struct sock *meta_sk) -+{ -+ return false; -+} -+ -+#endif /* CONFIG_MPTCP */ -+ -+#endif /* _MPTCP_H */ -diff --git a/include/net/mptcp_v4.h b/include/net/mptcp_v4.h -new file mode 100644 -index 000000000000..c58d42b11f6a ---- /dev/null -+++ b/include/net/mptcp_v4.h -@@ -0,0 +1,76 @@ -+/* -+ * MPTCP implementation -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#ifndef MPTCP_V4_H_ -+#define MPTCP_V4_H_ -+ -+ -+#include -+#include -+#include -+#include -+#include -+ -+extern struct request_sock_ops mptcp_request_sock_ops; -+extern const struct inet_connection_sock_af_ops mptcp_v4_specific; -+extern struct tcp_request_sock_ops mptcp_request_sock_ipv4_ops; -+extern struct tcp_request_sock_ops mptcp_join_request_sock_ipv4_ops; -+ -+#ifdef CONFIG_MPTCP -+ -+int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb); -+struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr, -+ const __be32 laddr, const struct net *net); -+int __mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc, -+ __be16 sport, struct mptcp_rem4 *rem, -+ struct sock **subsk); -+int mptcp_pm_v4_init(void); -+void mptcp_pm_v4_undo(void); -+u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); -+u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, -+ u32 seed); -+ -+static inline int mptcp_init4_subsockets(struct sock *meta_sk, -+ const struct mptcp_loc4 *loc, -+ struct mptcp_rem4 *rem) -+{ -+ return __mptcp_init4_subsockets(meta_sk, loc, 0, rem, NULL); -+} -+ -+#else -+ -+static inline int mptcp_v4_do_rcv(const struct sock *meta_sk, -+ const struct sk_buff *skb) -+{ -+ return 0; -+} -+ -+#endif /* CONFIG_MPTCP */ -+ -+#endif /* MPTCP_V4_H_ */ -diff --git a/include/net/mptcp_v6.h b/include/net/mptcp_v6.h -new file mode 100644 -index 000000000000..93e8c87c2eb1 ---- /dev/null -+++ b/include/net/mptcp_v6.h -@@ -0,0 +1,77 @@ -+/* -+ * MPTCP implementation -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Jaakko Korkeaniemi -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#ifndef _MPTCP_V6_H -+#define _MPTCP_V6_H -+ -+#include -+#include -+ -+#include -+ -+ -+#ifdef CONFIG_MPTCP -+extern const struct inet_connection_sock_af_ops mptcp_v6_mapped; -+extern const struct inet_connection_sock_af_ops mptcp_v6_specific; -+extern struct request_sock_ops mptcp6_request_sock_ops; -+extern struct tcp_request_sock_ops mptcp_request_sock_ipv6_ops; -+extern struct tcp_request_sock_ops mptcp_join_request_sock_ipv6_ops; -+ -+int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb); -+struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr, -+ const struct in6_addr *laddr, const struct net *net); -+int __mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc, -+ __be16 sport, struct mptcp_rem6 *rem, -+ struct sock **subsk); -+int mptcp_pm_v6_init(void); -+void mptcp_pm_v6_undo(void); -+__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr, -+ __be16 sport, __be16 dport); -+u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr, -+ __be16 sport, __be16 dport, u32 seed); -+ -+static inline int mptcp_init6_subsockets(struct sock *meta_sk, -+ const struct mptcp_loc6 *loc, -+ struct mptcp_rem6 *rem) -+{ -+ return __mptcp_init6_subsockets(meta_sk, loc, 0, rem, NULL); -+} -+ -+#else /* CONFIG_MPTCP */ -+ -+#define mptcp_v6_mapped ipv6_mapped -+ -+static inline int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ return 0; -+} -+ -+#endif /* CONFIG_MPTCP */ -+ -+#endif /* _MPTCP_V6_H */ -diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h -index 167e390ac9d4..7233acfcdb4d 100644 ---- a/include/net/net_namespace.h -+++ b/include/net/net_namespace.h -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -123,6 +124,9 @@ struct net { - #if IS_ENABLED(CONFIG_IPV6) - struct netns_ipv6 ipv6; - #endif -+#if IS_ENABLED(CONFIG_MPTCP) -+ struct netns_mptcp mptcp; -+#endif - #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) - struct netns_ieee802154_lowpan ieee802154_lowpan; - #endif -diff --git a/include/net/netns/mptcp.h b/include/net/netns/mptcp.h -new file mode 100644 -index 000000000000..6680f3bbcfc8 ---- /dev/null -+++ b/include/net/netns/mptcp.h -@@ -0,0 +1,52 @@ -+/* -+ * MPTCP implementation - MPTCP namespace -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#ifndef __NETNS_MPTCP_H__ -+#define __NETNS_MPTCP_H__ -+ -+#include -+ -+enum { -+ MPTCP_PM_FULLMESH = 0, -+ MPTCP_PM_MAX -+}; -+ -+struct mptcp_mib; -+ -+struct netns_mptcp { -+ DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics); -+ -+#ifdef CONFIG_PROC_FS -+ struct proc_dir_entry *proc_net_mptcp; -+#endif -+ -+ void *path_managers[MPTCP_PM_MAX]; -+}; -+ -+#endif /* __NETNS_MPTCP_H__ */ -diff --git a/include/net/snmp.h b/include/net/snmp.h -index cb8ced4380a6..0aa0d10af2ce 100644 ---- a/include/net/snmp.h -+++ b/include/net/snmp.h -@@ -86,7 +86,6 @@ struct icmpv6msg_mib_device { - atomic_long_t mibs[ICMP6MSG_MIB_MAX]; - }; - -- - /* TCP */ - #define TCP_MIB_MAX __TCP_MIB_MAX - struct tcp_mib { -diff --git a/include/net/sock.h b/include/net/sock.h -index 079b5f6f13d8..8ae33ecd9d0a 100644 ---- a/include/net/sock.h -+++ b/include/net/sock.h -@@ -821,6 +821,7 @@ enum sock_flags { - SOCK_TXTIME, - SOCK_XDP, /* XDP is attached */ - SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ -+ SOCK_MPTCP, /* MPTCP set on this socket */ - }; - - #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) -@@ -1133,6 +1134,7 @@ struct proto { - void (*unhash)(struct sock *sk); - void (*rehash)(struct sock *sk); - int (*get_port)(struct sock *sk, unsigned short snum); -+ void (*clear_sk)(struct sock *sk, int size); - - /* Keeping track of sockets in use */ - #ifdef CONFIG_PROC_FS -diff --git a/include/net/tcp.h b/include/net/tcp.h -index b914959cd2c6..b290be3e510c 100644 ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -182,6 +182,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); - #define TCPOPT_SACK 5 /* SACK Block */ - #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ - #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ -+#define TCPOPT_MPTCP 30 - #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ - #define TCPOPT_EXP 254 /* Experimental */ - /* Magic number to be after the option value for sharing TCP -@@ -238,6 +239,31 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); - */ - #define TFO_SERVER_WO_SOCKOPT1 0x400 - -+/* Flags from tcp_input.c for tcp_ack */ -+#define FLAG_DATA 0x01 /* Incoming frame contained data. */ -+#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ -+#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ -+#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */ -+#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ -+#define FLAG_DATA_SACKED 0x20 /* New SACK. */ -+#define FLAG_ECE 0x40 /* ECE in this ACK */ -+#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ -+#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ -+#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ -+#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ -+#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ -+#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */ -+#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ -+#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ -+#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ -+#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ -+ -+#define MPTCP_FLAG_DATA_ACKED 0x20000 -+ -+#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) -+#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) -+#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) -+#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) - - /* sysctl variables for tcp */ - extern int sysctl_tcp_max_orphans; -@@ -310,6 +336,98 @@ extern struct proto tcp_prot; - #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) - #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) - -+/**** START - Exports needed for MPTCP ****/ -+extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; -+extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; -+ -+struct mptcp_options_received; -+ -+void tcp_cleanup_rbuf(struct sock *sk, int copied); -+int tcp_close_state(struct sock *sk); -+void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, -+ const struct sk_buff *skb); -+int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib); -+void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb); -+int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, -+ gfp_t gfp_mask); -+u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now); -+unsigned int tcp_mss_split_point(const struct sock *sk, -+ const struct sk_buff *skb, -+ unsigned int mss_now, -+ unsigned int max_segs, -+ int nonagle); -+bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, -+ unsigned int cur_mss, int nonagle); -+bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, -+ unsigned int cur_mss); -+unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb); -+int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now); -+int __pskb_trim_head(struct sk_buff *skb, int len); -+void tcp_queue_skb(struct sock *sk, struct sk_buff *skb); -+void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags); -+void tcp_reset(struct sock *sk); -+bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, -+ const u32 ack_seq, const u32 nwin); -+bool tcp_urg_mode(const struct tcp_sock *tp); -+void tcp_ack_probe(struct sock *sk); -+void tcp_rearm_rto(struct sock *sk); -+int tcp_write_timeout(struct sock *sk); -+bool retransmits_timed_out(struct sock *sk, -+ unsigned int boundary, -+ unsigned int timeout); -+void tcp_write_err(struct sock *sk); -+void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr); -+void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, -+ u64 prior_wstamp); -+void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now); -+ -+void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, -+ struct request_sock *req); -+void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb); -+struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb); -+void tcp_v4_reqsk_destructor(struct request_sock *req); -+ -+void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, -+ struct request_sock *req); -+void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); -+struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb); -+int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -+int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -+void tcp_v6_destroy_sock(struct sock *sk); -+void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); -+void tcp_v6_hash(struct sock *sk); -+struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb); -+struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, -+ struct request_sock *req, -+ struct dst_entry *dst, -+ struct request_sock *req_unhash, -+ bool *own_req); -+void tcp_v6_reqsk_destructor(struct request_sock *req); -+ -+unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, -+ int large_allowed); -+u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb); -+void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, u32 prior_snd_una); -+ -+void skb_clone_fraglist(struct sk_buff *skb); -+ -+void inet_twsk_free(struct inet_timewait_sock *tw); -+int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb); -+/* These states need RST on ABORT according to RFC793 */ -+static inline bool tcp_need_reset(int state) -+{ -+ return (1 << state) & -+ (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | -+ TCPF_FIN_WAIT2 | TCPF_SYN_RECV); -+} -+ -+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, -+ bool *fragstolen); -+void tcp_ofo_queue(struct sock *sk); -+void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb); -+int linear_payload_sz(bool first_skb); -+/**** END - Exports needed for MPTCP ****/ -+ - void tcp_tasklet_init(void); - - int tcp_v4_err(struct sk_buff *skb, u32); -@@ -411,7 +529,9 @@ int tcp_mmap(struct file *file, struct socket *sock, - #endif - void tcp_parse_options(const struct net *net, const struct sk_buff *skb, - struct tcp_options_received *opt_rx, -- int estab, struct tcp_fastopen_cookie *foc); -+ struct mptcp_options_received *mopt_rx, -+ int estab, struct tcp_fastopen_cookie *foc, -+ struct tcp_sock *tp); - const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); - - /* -@@ -430,6 +550,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, - - void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); - void tcp_v4_mtu_reduced(struct sock *sk); -+void tcp_v6_mtu_reduced(struct sock *sk); - void tcp_req_err(struct sock *sk, u32 seq, bool abort); - int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); - struct sock *tcp_create_openreq_child(const struct sock *sk, -@@ -453,6 +574,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, - struct request_sock *req, - struct tcp_fastopen_cookie *foc, - enum tcp_synack_type synack_type); -+void tcp_reset_vars(struct sock *sk); - int tcp_disconnect(struct sock *sk, int flags); - - void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); -@@ -462,6 +584,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); - /* From syncookies.c */ - struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, -+ const struct mptcp_options_received *mopt, - struct dst_entry *dst, u32 tsoff); - int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie); -@@ -547,7 +670,8 @@ static inline u32 tcp_cookie_time(void) - - u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, - u16 *mssp); --__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); -+__u32 cookie_v4_init_sequence(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, __u16 *mss); - u64 cookie_init_timestamp(struct request_sock *req); - bool cookie_timestamp_decode(const struct net *net, - struct tcp_options_received *opt); -@@ -561,7 +685,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); - - u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, - const struct tcphdr *th, u16 *mssp); --__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); -+__u32 cookie_v6_init_sequence(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, __u16 *mss); - #endif - /* tcp_output.c */ - -@@ -597,10 +722,16 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto); - void tcp_skb_collapse_tstamp(struct sk_buff *skb, - const struct sk_buff *next_skb); - -+u16 tcp_select_window(struct sock *sk); -+bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, -+ int push_one, gfp_t gfp); -+ - /* tcp_input.c */ - void tcp_rearm_rto(struct sock *sk); - void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); - void tcp_reset(struct sock *sk); -+void tcp_set_rto(struct sock *sk); -+bool tcp_should_expand_sndbuf(const struct sock *sk); - void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); - void tcp_fin(struct sock *sk); - -@@ -645,7 +776,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) - } - - /* tcp.c */ --void tcp_get_info(struct sock *, struct tcp_info *); -+void tcp_get_info(struct sock *, struct tcp_info *, bool no_lock); - - /* Read 'sendfile()'-style from a TCP socket */ - int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, -@@ -723,7 +854,7 @@ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) - * Rcv_nxt can be after the window if our peer push more data - * than the offered window. - */ --static inline u32 tcp_receive_window(const struct tcp_sock *tp) -+static inline u32 tcp_receive_window_now(const struct tcp_sock *tp) - { - s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; - -@@ -732,6 +863,32 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp) - return (u32) win; - } - -+/* right edge only moves forward, even if window shrinks due -+ * to mptcp meta -+ */ -+static inline void tcp_update_rcv_right_edge(struct tcp_sock *tp) -+{ -+ if (after(tp->rcv_wup + tp->rcv_wnd, tp->rcv_right_edge)) -+ tp->rcv_right_edge = tp->rcv_wup + tp->rcv_wnd; -+} -+ -+/* Compute receive window which will never shrink. The way MPTCP handles -+ * the receive window can cause the effective right edge to shrink, -+ * causing valid segments to become out of window. -+ * This function should be used when checking if a segment is valid for -+ * the max right edge announced. -+ */ -+static inline u32 tcp_receive_window_no_shrink(const struct tcp_sock *tp) -+{ -+ s32 win = tp->rcv_right_edge - tp->rcv_nxt; -+ -+ win = max_t(s32, win, tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt); -+ -+ if (unlikely(win < 0)) -+ win = 0; -+ return (u32) win; -+} -+ - /* Choose a new window, without checks for shrinking, and without - * scaling applied to the result. The caller does these things - * if necessary. This is a "raw" window selection. -@@ -829,6 +986,12 @@ struct tcp_skb_cb { - u16 tcp_gso_size; - }; - }; -+ -+#ifdef CONFIG_MPTCP -+ __u8 mptcp_flags; /* flags for the MPTCP layer */ -+ __u8 dss_off; /* Number of 4-byte words until -+ * seq-number */ -+#endif - __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ - - __u8 sacked; /* State flags for SACK. */ -@@ -847,6 +1010,14 @@ struct tcp_skb_cb { - has_rxtstamp:1, /* SKB has a RX timestamp */ - unused:5; - __u32 ack_seq; /* Sequence number ACK'd */ -+ -+#ifdef CONFIG_MPTCP -+ union { /* For MPTCP outgoing frames */ -+ __u32 path_mask; /* paths that tried to send this skb */ -+ __u32 dss[6]; /* DSS options */ -+ }; -+#endif -+ - union { - struct { - /* There is space for up to 24 bytes */ -@@ -1088,6 +1259,8 @@ void tcp_get_allowed_congestion_control(char *buf, size_t len); - int tcp_set_allowed_congestion_control(char *allowed); - int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, - bool reinit, bool cap_net_admin); -+int __tcp_set_congestion_control(struct sock *sk, const char *name, bool load, -+ bool reinit, bool cap_net_admin); - u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); - void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); - -@@ -1389,6 +1562,19 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) - space - (space>>tcp_adv_win_scale); - } - -+#ifdef CONFIG_MPTCP -+extern struct static_key mptcp_static_key; -+static inline bool mptcp(const struct tcp_sock *tp) -+{ -+ return static_key_false(&mptcp_static_key) && tp->mpc; -+} -+#else -+static inline bool mptcp(const struct tcp_sock *tp) -+{ -+ return 0; -+} -+#endif -+ - /* Note: caller must be prepared to deal with negative returns */ - static inline int tcp_space(const struct sock *sk) - { -@@ -1981,6 +2167,30 @@ struct tcp_sock_af_ops { - #endif - }; - -+/* TCP/MPTCP-specific functions */ -+struct tcp_sock_ops { -+ u32 (*__select_window)(struct sock *sk); -+ u16 (*select_window)(struct sock *sk); -+ void (*select_initial_window)(const struct sock *sk, int __space, -+ __u32 mss, __u32 *rcv_wnd, -+ __u32 *window_clamp, int wscale_ok, -+ __u8 *rcv_wscale, __u32 init_rcv_wnd); -+ void (*init_buffer_space)(struct sock *sk); -+ void (*set_rto)(struct sock *sk); -+ bool (*should_expand_sndbuf)(const struct sock *sk); -+ void (*send_fin)(struct sock *sk); -+ bool (*write_xmit)(struct sock *sk, unsigned int mss_now, int nonagle, -+ int push_one, gfp_t gfp); -+ void (*send_active_reset)(struct sock *sk, gfp_t priority); -+ int (*write_wakeup)(struct sock *sk, int mib); -+ void (*retransmit_timer)(struct sock *sk); -+ void (*time_wait)(struct sock *sk, int state, int timeo); -+ void (*cleanup_rbuf)(struct sock *sk, int copied); -+ int (*set_cong_ctrl)(struct sock *sk, const char *name, bool load, -+ bool reinit, bool cap_net_admin); -+}; -+extern const struct tcp_sock_ops tcp_specific; -+ - struct tcp_request_sock_ops { - u16 mss_clamp; - #ifdef CONFIG_TCP_MD5SIG -@@ -1991,12 +2201,13 @@ struct tcp_request_sock_ops { - const struct sock *sk, - const struct sk_buff *skb); - #endif -- void (*init_req)(struct request_sock *req, -- const struct sock *sk_listener, -- struct sk_buff *skb); -+ int (*init_req)(struct request_sock *req, -+ const struct sock *sk_listener, -+ struct sk_buff *skb, -+ bool want_cookie); - #ifdef CONFIG_SYN_COOKIES -- __u32 (*cookie_init_seq)(const struct sk_buff *skb, -- __u16 *mss); -+ __u32 (*cookie_init_seq)(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, __u16 *mss); - #endif - struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, - const struct request_sock *req); -@@ -2010,15 +2221,17 @@ struct tcp_request_sock_ops { - - #ifdef CONFIG_SYN_COOKIES - static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, -+ struct request_sock *req, - const struct sock *sk, struct sk_buff *skb, - __u16 *mss) - { - tcp_synq_overflow(sk); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); -- return ops->cookie_init_seq(skb, mss); -+ return ops->cookie_init_seq(req, sk, skb, mss); - } - #else - static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, -+ struct request_sock *req, - const struct sock *sk, struct sk_buff *skb, - __u16 *mss) - { -diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h -index cc00118acca1..11084091e798 100644 ---- a/include/net/tcp_states.h -+++ b/include/net/tcp_states.h -@@ -22,6 +22,7 @@ enum { - TCP_LISTEN, - TCP_CLOSING, /* Now a valid state */ - TCP_NEW_SYN_RECV, -+ TCP_RST_WAIT, - - TCP_MAX_STATES /* Leave at the end! */ - }; -@@ -43,6 +44,7 @@ enum { - TCPF_LISTEN = (1 << TCP_LISTEN), - TCPF_CLOSING = (1 << TCP_CLOSING), - TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV), -+ TCPF_RST_WAIT = (1 << TCP_RST_WAIT), - }; - - #endif /* _LINUX_TCP_STATES_H */ -diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h -index a8f6020f1196..5e70b086fdfb 100644 ---- a/include/net/transp_v6.h -+++ b/include/net/transp_v6.h -@@ -58,6 +58,8 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, - - /* address family specific functions */ - extern const struct inet_connection_sock_af_ops ipv4_specific; -+extern const struct inet_connection_sock_af_ops ipv6_mapped; -+extern const struct inet_connection_sock_af_ops ipv6_specific; - - void inet6_destroy_sock(struct sock *sk); - -diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h -index cf97f6339acb..cf48dc87a734 100644 ---- a/include/trace/events/tcp.h -+++ b/include/trace/events/tcp.h -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - #include - - #define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ -@@ -181,6 +182,13 @@ DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, - TP_ARGS(sk) - ); - -+DEFINE_EVENT(tcp_event_sk_skb, mptcp_retransmit, -+ -+ TP_PROTO(const struct sock *sk, const struct sk_buff *skb), -+ -+ TP_ARGS(sk, skb) -+); -+ - TRACE_EVENT(tcp_retransmit_synack, - - TP_PROTO(const struct sock *sk, const struct request_sock *req), -@@ -248,6 +256,7 @@ TRACE_EVENT(tcp_probe, - __field(__u32, srtt) - __field(__u32, rcv_wnd) - __field(__u64, sock_cookie) -+ __field(__u8, mptcp) - ), - - TP_fast_assign( -@@ -274,13 +283,15 @@ TRACE_EVENT(tcp_probe, - __entry->ssthresh = tcp_current_ssthresh(sk); - __entry->srtt = tp->srtt_us >> 3; - __entry->sock_cookie = sock_gen_cookie(sk); -+ __entry->mptcp = mptcp(tp) ? tp->mptcp->path_index : 0; - ), - -- TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", -+ TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx mptcp=%d", - __entry->saddr, __entry->daddr, __entry->mark, - __entry->data_len, __entry->snd_nxt, __entry->snd_una, - __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, -- __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie) -+ __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie, -+ __entry->mptcp) - ); - - #endif /* _TRACE_TCP_H */ -diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h -index 63038eb23560..7150eb62db86 100644 ---- a/include/uapi/linux/bpf.h -+++ b/include/uapi/linux/bpf.h -@@ -3438,6 +3438,7 @@ enum { - BPF_TCP_LISTEN, - BPF_TCP_CLOSING, /* Now a valid state */ - BPF_TCP_NEW_SYN_RECV, -+ BPF_TCP_RST_WAIT, - - BPF_TCP_MAX_STATES /* Leave at the end! */ - }; -diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h -index 7fea0fd7d6f5..7255e08393db 100644 ---- a/include/uapi/linux/if.h -+++ b/include/uapi/linux/if.h -@@ -132,6 +132,9 @@ enum net_device_flags { - #define IFF_ECHO IFF_ECHO - #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ - -+#define IFF_NOMULTIPATH 0x80000 /* Disable for MPTCP */ -+#define IFF_MPBACKUP 0x100000 /* Use as backup path for MPTCP */ -+ - #define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\ - IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT) - -diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h -index 60e1241d4b77..ff6185b1d79f 100644 ---- a/include/uapi/linux/in.h -+++ b/include/uapi/linux/in.h -@@ -76,6 +76,8 @@ enum { - #define IPPROTO_MPLS IPPROTO_MPLS - IPPROTO_RAW = 255, /* Raw IP packets */ - #define IPPROTO_RAW IPPROTO_RAW -+ IPPROTO_MPTCP = 262, /* Multipath TCP connection */ -+#define IPPROTO_MPTCP IPPROTO_MPTCP - IPPROTO_MAX - }; - #endif -diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h -new file mode 100644 -index 000000000000..02078c80c846 ---- /dev/null -+++ b/include/uapi/linux/mptcp.h -@@ -0,0 +1,151 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+/* -+ * Netlink API for Multipath TCP -+ * -+ * Author: Gregory Detal -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#ifndef _LINUX_MPTCP_H -+#define _LINUX_MPTCP_H -+ -+#define MPTCP_GENL_NAME "mptcp" -+#define MPTCP_GENL_EV_GRP_NAME "mptcp_events" -+#define MPTCP_GENL_CMD_GRP_NAME "mptcp_commands" -+#define MPTCP_GENL_VER 0x1 -+ -+/* -+ * ATTR types defined for MPTCP -+ */ -+enum { -+ MPTCP_ATTR_UNSPEC = 0, -+ -+ MPTCP_ATTR_TOKEN, /* u32 */ -+ MPTCP_ATTR_FAMILY, /* u16 */ -+ MPTCP_ATTR_LOC_ID, /* u8 */ -+ MPTCP_ATTR_REM_ID, /* u8 */ -+ MPTCP_ATTR_SADDR4, /* u32 */ -+ MPTCP_ATTR_SADDR6, /* struct in6_addr */ -+ MPTCP_ATTR_DADDR4, /* u32 */ -+ MPTCP_ATTR_DADDR6, /* struct in6_addr */ -+ MPTCP_ATTR_SPORT, /* u16 */ -+ MPTCP_ATTR_DPORT, /* u16 */ -+ MPTCP_ATTR_BACKUP, /* u8 */ -+ MPTCP_ATTR_ERROR, /* u8 */ -+ MPTCP_ATTR_FLAGS, /* u16 */ -+ MPTCP_ATTR_TIMEOUT, /* u32 */ -+ MPTCP_ATTR_IF_IDX, /* s32 */ -+ -+ __MPTCP_ATTR_AFTER_LAST -+}; -+ -+#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1) -+ -+/* -+ * Events generated by MPTCP: -+ * - MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6, -+ * sport, dport -+ * A new connection has been created. It is the good time to allocate -+ * memory and send ADD_ADDR if needed. Depending on the traffic-patterns -+ * it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent. -+ * -+ * - MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6, -+ * sport, dport -+ * A connection is established (can start new subflows). -+ * -+ * - MPTCP_EVENT_CLOSED: token -+ * A connection has stopped. -+ * -+ * - MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] -+ * A new address has been announced by the peer. -+ * -+ * - MPTCP_EVENT_REMOVED: token, rem_id -+ * An address has been lost by the peer. -+ * -+ * - MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, -+ * saddr4 | saddr6, daddr4 | daddr6, sport, -+ * dport, backup, if_idx [, error] -+ * A new subflow has been established. 'error' should not be set. -+ * -+ * - MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, -+ * daddr4 | daddr6, sport, dport, backup, if_idx -+ * [, error] -+ * A subflow has been closed. An error (copy of sk_err) could be set if an -+ * error has been detected for this subflow. -+ * -+ * - MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, -+ * daddr4 | daddr6, sport, dport, backup, if_idx -+ * [, error] -+ * The priority of a subflow has changed. 'error' should not be set. -+ * -+ * Commands for MPTCP: -+ * - MPTCP_CMD_ANNOUNCE: token, loc_id, family, saddr4 | saddr6 [, sport] -+ * Announce a new address to the peer. -+ * -+ * - MPTCP_CMD_REMOVE: token, loc_id -+ * Announce that an address has been lost to the peer. -+ * -+ * - MPTCP_CMD_SUB_CREATE: token, family, loc_id, rem_id, daddr4 | daddr6, -+ * dport [, saddr4 | saddr6, sport, backup, if_idx] -+ * Create a new subflow. -+ * -+ * - MPTCP_CMD_SUB_DESTROY: token, family, saddr4 | saddr6, daddr4 | daddr6, -+ * sport, dport -+ * Close a subflow. -+ * -+ * - MPTCP_CMD_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, -+ * sport, dport, backup -+ * Change the priority of a subflow. -+ * -+ * - MPTCP_CMD_SET_FILTER: flags -+ * Set the filter on events. Set MPTCPF_* flags to only receive specific -+ * events. Default is to receive all events. -+ * -+ * - MPTCP_CMD_EXIST: token -+ * Check if this token is linked to an existing socket. -+ */ -+enum { -+ MPTCP_CMD_UNSPEC = 0, -+ -+ MPTCP_EVENT_CREATED, -+ MPTCP_EVENT_ESTABLISHED, -+ MPTCP_EVENT_CLOSED, -+ -+ MPTCP_CMD_ANNOUNCE, -+ MPTCP_CMD_REMOVE, -+ MPTCP_EVENT_ANNOUNCED, -+ MPTCP_EVENT_REMOVED, -+ -+ MPTCP_CMD_SUB_CREATE, -+ MPTCP_CMD_SUB_DESTROY, -+ MPTCP_EVENT_SUB_ESTABLISHED, -+ MPTCP_EVENT_SUB_CLOSED, -+ -+ MPTCP_CMD_SUB_PRIORITY, -+ MPTCP_EVENT_SUB_PRIORITY, -+ -+ MPTCP_CMD_SET_FILTER, -+ -+ MPTCP_CMD_EXIST, -+ -+ __MPTCP_CMD_AFTER_LAST -+}; -+ -+#define MPTCP_CMD_MAX (__MPTCP_CMD_AFTER_LAST - 1) -+ -+enum { -+ MPTCPF_EVENT_CREATED = (1 << 1), -+ MPTCPF_EVENT_ESTABLISHED = (1 << 2), -+ MPTCPF_EVENT_CLOSED = (1 << 3), -+ MPTCPF_EVENT_ANNOUNCED = (1 << 4), -+ MPTCPF_EVENT_REMOVED = (1 << 5), -+ MPTCPF_EVENT_SUB_ESTABLISHED = (1 << 6), -+ MPTCPF_EVENT_SUB_CLOSED = (1 << 7), -+ MPTCPF_EVENT_SUB_PRIORITY = (1 << 8), -+}; -+ -+#endif /* _LINUX_MPTCP_H */ -diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h -index 81e697978e8b..09ef515261d2 100644 ---- a/include/uapi/linux/tcp.h -+++ b/include/uapi/linux/tcp.h -@@ -18,9 +18,15 @@ - #ifndef _UAPI_LINUX_TCP_H - #define _UAPI_LINUX_TCP_H - --#include -+#ifndef __KERNEL__ -+#include -+#endif -+ - #include -+#include -+#include - #include -+#include - - struct tcphdr { - __be16 source; -@@ -134,6 +140,13 @@ enum { - #define TCP_REPAIR_OFF 0 - #define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ - -+#define MPTCP_ENABLED 42 -+#define MPTCP_SCHEDULER 43 -+#define MPTCP_PATH_MANAGER 44 -+#define MPTCP_INFO 45 -+ -+#define MPTCP_INFO_FLAG_SAVE_MASTER 0x01 -+ - struct tcp_repair_opt { - __u32 opt_code; - __u32 opt_val; -@@ -305,6 +318,53 @@ enum { - TCP_NLA_SRTT, /* smoothed RTT in usecs */ - }; - -+struct mptcp_meta_info { -+ __u8 mptcpi_state; -+ __u8 mptcpi_retransmits; -+ __u8 mptcpi_probes; -+ __u8 mptcpi_backoff; -+ -+ __u32 mptcpi_rto; -+ __u32 mptcpi_unacked; -+ -+ /* Times. */ -+ __u32 mptcpi_last_data_sent; -+ __u32 mptcpi_last_data_recv; -+ __u32 mptcpi_last_ack_recv; -+ -+ __u32 mptcpi_total_retrans; -+ -+ __u64 mptcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ -+ __u64 mptcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ -+}; -+ -+struct mptcp_sub_info { -+ union { -+ struct sockaddr src; -+ struct sockaddr_in src_v4; -+ struct sockaddr_in6 src_v6; -+ }; -+ -+ union { -+ struct sockaddr dst; -+ struct sockaddr_in dst_v4; -+ struct sockaddr_in6 dst_v6; -+ }; -+}; -+ -+struct mptcp_info { -+ __u32 tcp_info_len; /* Length of each struct tcp_info in subflows pointer */ -+ __u32 sub_len; /* Total length of memory pointed to by subflows pointer */ -+ __u32 meta_len; /* Length of memory pointed to by meta_info */ -+ __u32 sub_info_len; /* Length of each struct mptcp_sub_info in subflow_info pointer */ -+ __u32 total_sub_info_len; /* Total length of memory pointed to by subflow_info */ -+ -+ struct mptcp_meta_info *meta_info; -+ struct tcp_info *initial; -+ struct tcp_info *subflows; /* Pointer to array of tcp_info structs */ -+ struct mptcp_sub_info *subflow_info; -+}; -+ - /* for TCP_MD5SIG socket option */ - #define TCP_MD5SIG_MAXKEYLEN 80 - -diff --git a/net/Kconfig b/net/Kconfig -index 0b2fecc83452..66f9158a3040 100644 ---- a/net/Kconfig -+++ b/net/Kconfig -@@ -94,6 +94,7 @@ if INET - source "net/ipv4/Kconfig" - source "net/ipv6/Kconfig" - source "net/netlabel/Kconfig" -+source "net/mptcp/Kconfig" - - endif # if INET - -diff --git a/net/Makefile b/net/Makefile -index 449fc0b221f8..08683343642e 100644 ---- a/net/Makefile -+++ b/net/Makefile -@@ -20,6 +20,7 @@ obj-$(CONFIG_TLS) += tls/ - obj-$(CONFIG_XFRM) += xfrm/ - obj-$(CONFIG_UNIX_SCM) += unix/ - obj-$(CONFIG_NET) += ipv6/ -+obj-$(CONFIG_MPTCP) += mptcp/ - obj-$(CONFIG_BPFILTER) += bpfilter/ - obj-$(CONFIG_PACKET) += packet/ - obj-$(CONFIG_NET_KEY) += key/ -diff --git a/net/core/dev.c b/net/core/dev.c -index 3810eaf89b26..a8a1fba9b4ec 100644 ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -7880,7 +7880,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, - - dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | - IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | -- IFF_AUTOMEDIA)) | -+ IFF_AUTOMEDIA | IFF_NOMULTIPATH | IFF_MPBACKUP)) | - (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | - IFF_ALLMULTI)); - -diff --git a/net/core/filter.c b/net/core/filter.c -index 0e161a6dff7e..431996bd5a16 100644 ---- a/net/core/filter.c -+++ b/net/core/filter.c -@@ -73,6 +73,7 @@ - #include - #include - #include -+#include - - /** - * sk_filter_trim_cap - run a packet through a socket filter -@@ -4280,6 +4281,19 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, - if (sk->sk_mark != val) { - sk->sk_mark = val; - sk_dst_reset(sk); -+ -+ if (is_meta_sk(sk)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (val != sk_it->sk_mark) { -+ sk_it->sk_mark = val; -+ sk_dst_reset(sk_it); -+ } -+ } -+ } - } - break; - default: -@@ -4302,6 +4316,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, - if (val == -1) - val = 0; - inet->tos = val; -+ -+ /* Update TOS on mptcp subflow */ -+ if (is_meta_sk(sk)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) -+ inet_sk(mptcp_to_sock(mptcp))->tos = val; -+ } - } - break; - default: -@@ -4324,6 +4346,17 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, - if (val == -1) - val = 0; - np->tclass = val; -+ -+ if (is_meta_sk(sk)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (sk_it->sk_family == AF_INET6) -+ inet6_sk(sk_it)->tclass = val; -+ } -+ } - } - break; - default: -diff --git a/net/core/net-traces.c b/net/core/net-traces.c -index 283ddb2dbc7d..8f526a0d1912 100644 ---- a/net/core/net-traces.c -+++ b/net/core/net-traces.c -@@ -60,3 +60,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); - EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); - - EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset); -+ -+EXPORT_TRACEPOINT_SYMBOL_GPL(mptcp_retransmit); -diff --git a/net/core/skbuff.c b/net/core/skbuff.c -index 7dba091bc861..79ed7efe1c0c 100644 ---- a/net/core/skbuff.c -+++ b/net/core/skbuff.c -@@ -582,7 +582,7 @@ static inline void skb_drop_fraglist(struct sk_buff *skb) - skb_drop_list(&skb_shinfo(skb)->frag_list); - } - --static void skb_clone_fraglist(struct sk_buff *skb) -+void skb_clone_fraglist(struct sk_buff *skb) - { - struct sk_buff *list; - -diff --git a/net/core/sock.c b/net/core/sock.c -index 57b7a10703c3..8d716113e273 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -135,6 +135,11 @@ - - #include - -+#ifdef CONFIG_MPTCP -+#include -+#include -+#endif -+ - #include - #include - -@@ -1063,6 +1068,19 @@ int sock_setsockopt(struct socket *sock, int level, int optname, - } else if (val != sk->sk_mark) { - sk->sk_mark = val; - sk_dst_reset(sk); -+ -+ if (is_meta_sk(sk)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (val != sk_it->sk_mark) { -+ sk_it->sk_mark = val; -+ sk_dst_reset(sk_it); -+ } -+ } -+ } - } - break; - -@@ -1563,6 +1581,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname, - */ - static inline void sock_lock_init(struct sock *sk) - { -+#ifdef CONFIG_MPTCP -+ /* Reclassify the lock-class for subflows */ -+ if (sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP) -+ if (mptcp(tcp_sk(sk)) || tcp_sk(sk)->is_master_sk) { -+ sock_lock_init_class_and_name(sk, meta_slock_key_name, -+ &meta_slock_key, -+ meta_key_name, -+ &meta_key); -+ -+ /* We don't yet have the mptcp-point. -+ * Thus we still need inet_sock_destruct -+ */ -+ sk->sk_destruct = inet_sock_destruct; -+ return; -+ } -+#endif -+ - if (sk->sk_kern_sock) - sock_lock_init_class_and_name( - sk, -@@ -1611,8 +1646,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, - sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); - if (!sk) - return sk; -- if (want_init_on_alloc(priority)) -- sk_prot_clear_nulls(sk, prot->obj_size); -+ if (want_init_on_alloc(priority)) { -+ if (prot->clear_sk) -+ prot->clear_sk(sk, prot->obj_size); -+ else -+ sk_prot_clear_nulls(sk, prot->obj_size); -+ } - } else - sk = kmalloc(prot->obj_size, priority); - -@@ -1846,6 +1885,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) - atomic_set(&newsk->sk_zckey, 0); - - sock_reset_flag(newsk, SOCK_DONE); -+ sock_reset_flag(newsk, SOCK_MPTCP); - - /* sk->sk_memcg will be populated at accept() time */ - newsk->sk_memcg = NULL; -diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig -index a926de2e42b5..6d73dc6e2586 100644 ---- a/net/ipv4/Kconfig -+++ b/net/ipv4/Kconfig -@@ -655,6 +655,51 @@ config TCP_CONG_BBR - bufferbloat, policers, or AQM schemes that do not provide a delay - signal. It requires the fq ("Fair Queue") pacing packet scheduler. - -+config TCP_CONG_LIA -+ tristate "MPTCP Linked Increase" -+ depends on MPTCP -+ default n -+ ---help--- -+ MultiPath TCP Linked Increase Congestion Control -+ To enable it, just put 'lia' in tcp_congestion_control -+ -+config TCP_CONG_OLIA -+ tristate "MPTCP Opportunistic Linked Increase" -+ depends on MPTCP -+ default n -+ ---help--- -+ MultiPath TCP Opportunistic Linked Increase Congestion Control -+ To enable it, just put 'olia' in tcp_congestion_control -+ -+config TCP_CONG_WVEGAS -+ tristate "MPTCP WVEGAS CONGESTION CONTROL" -+ depends on MPTCP -+ default n -+ ---help--- -+ wVegas congestion control for MPTCP -+ To enable it, just put 'wvegas' in tcp_congestion_control -+ -+config TCP_CONG_BALIA -+ tristate "MPTCP BALIA CONGESTION CONTROL" -+ depends on MPTCP -+ default n -+ ---help--- -+ Multipath TCP Balanced Linked Adaptation Congestion Control -+ To enable it, just put 'balia' in tcp_congestion_control -+ -+config TCP_CONG_MCTCPDESYNC -+ tristate "DESYNCHRONIZED MCTCP CONGESTION CONTROL (EXPERIMENTAL)" -+ depends on MPTCP -+ default n -+ ---help--- -+ Desynchronized MultiChannel TCP Congestion Control. This is experimental -+ code that only supports single path and must have set mptcp_ndiffports -+ larger than one. -+ To enable it, just put 'mctcpdesync' in tcp_congestion_control -+ For further details see: -+ http://ieeexplore.ieee.org/abstract/document/6911722/ -+ https://doi.org/10.1016/j.comcom.2015.07.010 -+ - choice - prompt "Default TCP congestion control" - default DEFAULT_CUBIC -@@ -692,6 +737,21 @@ choice - config DEFAULT_BBR - bool "BBR" if TCP_CONG_BBR=y - -+ config DEFAULT_LIA -+ bool "Lia" if TCP_CONG_LIA=y -+ -+ config DEFAULT_OLIA -+ bool "Olia" if TCP_CONG_OLIA=y -+ -+ config DEFAULT_WVEGAS -+ bool "Wvegas" if TCP_CONG_WVEGAS=y -+ -+ config DEFAULT_BALIA -+ bool "Balia" if TCP_CONG_BALIA=y -+ -+ config DEFAULT_MCTCPDESYNC -+ bool "Mctcpdesync (EXPERIMENTAL)" if TCP_CONG_MCTCPDESYNC=y -+ - config DEFAULT_RENO - bool "Reno" - endchoice -@@ -712,6 +772,10 @@ config DEFAULT_TCP_CONG - default "vegas" if DEFAULT_VEGAS - default "westwood" if DEFAULT_WESTWOOD - default "veno" if DEFAULT_VENO -+ default "lia" if DEFAULT_LIA -+ default "olia" if DEFAULT_OLIA -+ default "wvegas" if DEFAULT_WVEGAS -+ default "balia" if DEFAULT_BALIA - default "reno" if DEFAULT_RENO - default "dctcp" if DEFAULT_DCTCP - default "cdg" if DEFAULT_CDG -diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c -index 70f92aaca411..0f4633257c75 100644 ---- a/net/ipv4/af_inet.c -+++ b/net/ipv4/af_inet.c -@@ -100,6 +100,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -150,6 +151,9 @@ void inet_sock_destruct(struct sock *sk) - return; - } - -+ if (sock_flag(sk, SOCK_MPTCP)) -+ mptcp_disable_static_key(); -+ - WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(refcount_read(&sk->sk_wmem_alloc)); - WARN_ON(sk->sk_wmem_queued); -@@ -227,6 +231,8 @@ int inet_listen(struct socket *sock, int backlog) - tcp_fastopen_init_key_once(sock_net(sk)); - } - -+ mptcp_init_listen(sk); -+ - err = inet_csk_listen_start(sk, backlog); - if (err) - goto out; -@@ -244,8 +250,7 @@ EXPORT_SYMBOL(inet_listen); - * Create an inet socket. - */ - --static int inet_create(struct net *net, struct socket *sock, int protocol, -- int kern) -+int inet_create(struct net *net, struct socket *sock, int protocol, int kern) - { - struct sock *sk; - struct inet_protosw *answer; -@@ -739,6 +744,24 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, - lock_sock(sk2); - - sock_rps_record_flow(sk2); -+ -+ if (sk2->sk_protocol == IPPROTO_TCP && mptcp(tcp_sk(sk2))) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk2)->mpcb, mptcp) { -+ sock_rps_record_flow(mptcp_to_sock(mptcp)); -+ } -+ -+ if (tcp_sk(sk2)->mpcb->master_sk) { -+ struct sock *sk_it = tcp_sk(sk2)->mpcb->master_sk; -+ -+ write_lock_bh(&sk_it->sk_callback_lock); -+ rcu_assign_pointer(sk_it->sk_wq, &newsock->wq); -+ sk_it->sk_socket = newsock; -+ write_unlock_bh(&sk_it->sk_callback_lock); -+ } -+ } -+ - WARN_ON(!((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_SYN_RECV | - TCPF_CLOSE_WAIT | TCPF_CLOSE))); -@@ -1974,6 +1997,9 @@ static int __init inet_init(void) - - ip_init(); - -+ /* We must initialize MPTCP before TCP. */ -+ mptcp_init(); -+ - /* Setup TCP slab cache for open requests. */ - tcp_init(); - -diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c -index 85a88425edc4..f3de2d6eb1a4 100644 ---- a/net/ipv4/inet_connection_sock.c -+++ b/net/ipv4/inet_connection_sock.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -730,7 +731,10 @@ static void reqsk_timer_handler(struct timer_list *t) - int max_retries, thresh; - u8 defer_accept; - -- if (inet_sk_state_load(sk_listener) != TCP_LISTEN) -+ if (!is_meta_sk(sk_listener) && inet_sk_state_load(sk_listener) != TCP_LISTEN) -+ goto drop; -+ -+ if (is_meta_sk(sk_listener) && !mptcp_can_new_subflow(sk_listener)) - goto drop; - - max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; -@@ -819,7 +823,9 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, - const struct request_sock *req, - const gfp_t priority) - { -- struct sock *newsk = sk_clone_lock(sk, priority); -+ struct sock *newsk; -+ -+ newsk = sk_clone_lock(sk, priority); - - if (newsk) { - struct inet_connection_sock *newicsk = inet_csk(newsk); -@@ -1019,7 +1025,14 @@ void inet_csk_listen_stop(struct sock *sk) - */ - while ((req = reqsk_queue_remove(queue, sk)) != NULL) { - struct sock *child = req->sk; -+ bool mutex_taken = false; -+ struct mptcp_cb *mpcb = tcp_sk(child)->mpcb; - -+ if (is_meta_sk(child)) { -+ WARN_ON(refcount_inc_not_zero(&mpcb->mpcb_refcnt) == 0); -+ mutex_lock(&mpcb->mpcb_mutex); -+ mutex_taken = true; -+ } - local_bh_disable(); - bh_lock_sock(child); - WARN_ON(sock_owned_by_user(child)); -@@ -1029,6 +1042,10 @@ void inet_csk_listen_stop(struct sock *sk) - reqsk_put(req); - bh_unlock_sock(child); - local_bh_enable(); -+ if (mutex_taken) { -+ mutex_unlock(&mpcb->mpcb_mutex); -+ mptcp_mpcb_put(mpcb); -+ } - sock_put(child); - - cond_resched(); -diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c -index aa3fd61818c4..8b3e955ec165 100644 ---- a/net/ipv4/ip_sockglue.c -+++ b/net/ipv4/ip_sockglue.c -@@ -44,6 +44,8 @@ - #endif - #include - -+#include -+ - #include - #include - -@@ -657,7 +659,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, - break; - old = rcu_dereference_protected(inet->inet_opt, - lockdep_sock_is_held(sk)); -- if (inet->is_icsk) { -+ if (inet->is_icsk && !is_meta_sk(sk)) { - struct inet_connection_sock *icsk = inet_csk(sk); - #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET || -@@ -751,6 +753,20 @@ static int do_ip_setsockopt(struct sock *sk, int level, - inet->tos = val; - sk->sk_priority = rt_tos2priority(val); - sk_dst_reset(sk); -+ /* Update TOS on mptcp subflow */ -+ if (is_meta_sk(sk)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (inet_sk(sk_it)->tos != inet_sk(sk)->tos) { -+ inet_sk(sk_it)->tos = inet_sk(sk)->tos; -+ sk_it->sk_priority = sk->sk_priority; -+ sk_dst_reset(sk_it); -+ } -+ } -+ } - } - break; - case IP_TTL: -diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c -index 2b45d1455592..f988be944eda 100644 ---- a/net/ipv4/syncookies.c -+++ b/net/ipv4/syncookies.c -@@ -12,6 +12,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -175,7 +177,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, - } - EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); - --__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) -+__u32 cookie_v4_init_sequence(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, __u16 *mssp) - { - const struct iphdr *iph = ip_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); -@@ -200,14 +203,33 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); - - struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, -+ const struct mptcp_options_received *mopt, - struct dst_entry *dst, u32 tsoff) - { - struct inet_connection_sock *icsk = inet_csk(sk); - struct sock *child; - bool own_req; -+#ifdef CONFIG_MPTCP -+ int ret; -+#endif - - child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, - NULL, &own_req); -+ -+#ifdef CONFIG_MPTCP -+ if (!child) -+ goto listen_overflow; -+ -+ ret = mptcp_check_req_master(sk, child, req, skb, mopt, 0, tsoff); -+ if (ret < 0) -+ return NULL; -+ -+ if (!ret) -+ return tcp_sk(child)->mpcb->master_sk; -+ -+listen_overflow: -+#endif -+ - if (child) { - refcount_set(&req->rsk_refcnt, 1); - tcp_sk(child)->tsoffset = tsoff; -@@ -284,6 +306,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) - { - struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; - struct tcp_options_received tcp_opt; -+ struct mptcp_options_received mopt; - struct inet_request_sock *ireq; - struct tcp_request_sock *treq; - struct tcp_sock *tp = tcp_sk(sk); -@@ -313,7 +336,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) - - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); -- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); -+ mptcp_init_mp_opt(&mopt); -+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, &mopt, 0, NULL, NULL); - - if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(sock_net(sk), -@@ -326,7 +350,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) - goto out; - - ret = NULL; -- req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */ -+#ifdef CONFIG_MPTCP -+ if (mopt.saw_mpc) -+ req = inet_reqsk_alloc(&mptcp_request_sock_ops, sk, false); /* for safety */ -+ else -+#endif -+ req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */ - if (!req) - goto out; - -@@ -346,6 +375,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; -+ ireq->mptcp_rqsk = 0; -+ ireq->saw_mpc = 0; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = 0; - treq->tfo_listener = false; -@@ -354,6 +385,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) - - ireq->ir_iif = inet_request_bound_dev_if(sk, skb); - -+ if (mopt.saw_mpc) -+ mptcp_cookies_reqsk_init(req, &mopt, skb); -+ - /* We throwed the options of the initial SYN away, so we hope - * the ACK carries the same options again (see RFC1122 4.2.3.8) - */ -@@ -392,15 +426,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) - (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) - req->rsk_window_clamp = full_space; - -- tcp_select_initial_window(sk, full_space, req->mss, -- &req->rsk_rcv_wnd, &req->rsk_window_clamp, -- ireq->wscale_ok, &rcv_wscale, -- dst_metric(&rt->dst, RTAX_INITRWND)); -+ tp->ops->select_initial_window(sk, full_space, req->mss, -+ &req->rsk_rcv_wnd, &req->rsk_window_clamp, -+ ireq->wscale_ok, &rcv_wscale, -+ dst_metric(&rt->dst, RTAX_INITRWND)); - - ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); - -- ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); -+ ret = tcp_get_cookie_sock(sk, skb, req, &mopt, &rt->dst, tsoff); - /* ip_queue_xmit() depends on our flow being setup - * Normal sockets get it right from inet_csk_route_child_sock() - */ -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 5c8d0fb49825..19fbc2cebb07 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -270,6 +270,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -400,6 +401,23 @@ static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp) - return rate64; - } - -+const struct tcp_sock_ops tcp_specific = { -+ .__select_window = __tcp_select_window, -+ .select_window = tcp_select_window, -+ .select_initial_window = tcp_select_initial_window, -+ .init_buffer_space = tcp_init_buffer_space, -+ .set_rto = tcp_set_rto, -+ .should_expand_sndbuf = tcp_should_expand_sndbuf, -+ .send_fin = tcp_send_fin, -+ .write_xmit = tcp_write_xmit, -+ .send_active_reset = tcp_send_active_reset, -+ .write_wakeup = tcp_write_wakeup, -+ .retransmit_timer = tcp_retransmit_timer, -+ .time_wait = tcp_time_wait, -+ .cleanup_rbuf = tcp_cleanup_rbuf, -+ .set_cong_ctrl = __tcp_set_congestion_control, -+}; -+ - /* Address-family independent initialization for a tcp_sock. - * - * NOTE: A lot of things set to zero explicitly by call to -@@ -453,6 +471,11 @@ void tcp_init_sock(struct sock *sk) - WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); - WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); - -+ tp->ops = &tcp_specific; -+ -+ /* Initialize MPTCP-specific stuff and function-pointers */ -+ mptcp_init_tcp_sock(sk); -+ - sk_sockets_allocated_inc(sk); - sk->sk_route_forced_caps = NETIF_F_GSO; - } -@@ -484,7 +507,7 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, - return true; - if (tcp_rmem_pressure(sk)) - return true; -- if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) -+ if (tcp_receive_window_now(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) - return true; - } - if (sk->sk_prot->stream_memory_read) -@@ -787,6 +810,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, - int ret; - - sock_rps_record_flow(sk); -+ - /* - * We can't seek on a socket input - */ -@@ -797,6 +821,16 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, - - lock_sock(sk); - -+#ifdef CONFIG_MPTCP -+ if (mptcp(tcp_sk(sk))) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ sock_rps_record_flow(mptcp_to_sock(mptcp)); -+ } -+ } -+#endif -+ - timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK); - while (tss.len) { - ret = __tcp_splice_read(sk, &tss); -@@ -912,8 +946,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, - return NULL; - } - --static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, -- int large_allowed) -+unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) - { - struct tcp_sock *tp = tcp_sk(sk); - u32 new_size_goal, size_goal; -@@ -941,8 +974,13 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) - { - int mss_now; - -- mss_now = tcp_current_mss(sk); -- *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); -+ if (mptcp(tcp_sk(sk))) { -+ mss_now = mptcp_current_mss(sk); -+ *size_goal = mptcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); -+ } else { -+ mss_now = tcp_current_mss(sk); -+ *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); -+ } - - return mss_now; - } -@@ -982,12 +1020,34 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, - * is fully established. - */ - if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && -- !tcp_passive_fastopen(sk)) { -+ !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ? -+ tp->mpcb->master_sk : sk)) { - err = sk_stream_wait_connect(sk, &timeo); - if (err != 0) - goto out_err; - } - -+ if (mptcp(tp)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ /* We must check this with socket-lock hold because we iterate -+ * over the subflows. -+ */ -+ if (!mptcp_can_sendpage(sk)) { -+ ssize_t ret; -+ -+ release_sock(sk); -+ ret = sock_no_sendpage(sk->sk_socket, page, offset, -+ size, flags); -+ lock_sock(sk); -+ return ret; -+ } -+ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ sock_rps_record_flow(mptcp_to_sock(mptcp)); -+ } -+ } -+ - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - mss_now = tcp_send_mss(sk, &size_goal, flags); -@@ -1109,7 +1169,8 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages); - int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags) - { -- if (!(sk->sk_route_caps & NETIF_F_SG)) -+ /* If MPTCP is enabled, we check it later after establishment */ -+ if (!mptcp(tcp_sk(sk)) && !(sk->sk_route_caps & NETIF_F_SG)) - return sock_no_sendpage_locked(sk, page, offset, size, flags); - - tcp_rate_check_app_limited(sk); /* is sending application-limited? */ -@@ -1231,12 +1292,21 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) - * is fully established. - */ - if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && -- !tcp_passive_fastopen(sk)) { -+ !tcp_passive_fastopen(mptcp(tp) && tp->mpcb->master_sk ? -+ tp->mpcb->master_sk : sk)) { - err = sk_stream_wait_connect(sk, &timeo); - if (err != 0) - goto do_error; - } - -+ if (mptcp(tp)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ sock_rps_record_flow(mptcp_to_sock(mptcp)); -+ } -+ } -+ - if (unlikely(tp->repair)) { - if (tp->repair_queue == TCP_RECV_QUEUE) { - copied = tcp_send_rcvq(sk, msg, size); -@@ -1529,7 +1599,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) - * calculation of whether or not we must ACK for the sake of - * a window update. - */ --static void tcp_cleanup_rbuf(struct sock *sk, int copied) -+void tcp_cleanup_rbuf(struct sock *sk, int copied) - { - struct tcp_sock *tp = tcp_sk(sk); - bool time_to_ack = false; -@@ -1568,11 +1638,11 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) - * in states, where we will not receive more. It is useless. - */ - if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { -- __u32 rcv_window_now = tcp_receive_window(tp); -+ __u32 rcv_window_now = tcp_receive_window_now(tp); - - /* Optimize, __tcp_select_window() is not cheap. */ - if (2*rcv_window_now <= tp->window_clamp) { -- __u32 new_window = __tcp_select_window(sk); -+ __u32 new_window = tp->ops->__select_window(sk); - - /* Send ACK now, if this read freed lots of space - * in our buffer. Certainly, new_window is new window. -@@ -1688,7 +1758,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, - /* Clean up data we have read: This will do ACK frames. */ - if (copied > 0) { - tcp_recv_skb(sk, seq, &offset); -- tcp_cleanup_rbuf(sk, copied); -+ tp->ops->cleanup_rbuf(sk, copied); - } - return copied; - } -@@ -1979,6 +2049,16 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - - lock_sock(sk); - -+#ifdef CONFIG_MPTCP -+ if (mptcp(tp)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ sock_rps_record_flow(mptcp_to_sock(mptcp)); -+ } -+ } -+#endif -+ - err = -ENOTCONN; - if (sk->sk_state == TCP_LISTEN) - goto out; -@@ -2097,7 +2177,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - } - } - -- tcp_cleanup_rbuf(sk, copied); -+ tp->ops->cleanup_rbuf(sk, copied); - - if (copied >= target) { - /* Do not sleep, just process backlog. */ -@@ -2189,7 +2269,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, - */ - - /* Clean up data we have read: This will do ACK frames. */ -- tcp_cleanup_rbuf(sk, copied); -+ tp->ops->cleanup_rbuf(sk, copied); - - release_sock(sk); - -@@ -2248,8 +2328,11 @@ void tcp_set_state(struct sock *sk, int state) - - switch (state) { - case TCP_ESTABLISHED: -- if (oldstate != TCP_ESTABLISHED) -+ if (oldstate != TCP_ESTABLISHED) { - TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); -+ if (is_meta_sk(sk)) -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); -+ } - break; - - case TCP_CLOSE: -@@ -2262,8 +2345,11 @@ void tcp_set_state(struct sock *sk, int state) - inet_put_port(sk); - /* fall through */ - default: -- if (oldstate == TCP_ESTABLISHED) -+ if (oldstate == TCP_ESTABLISHED) { - TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); -+ if (is_meta_sk(sk)) -+ MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); -+ } - } - - /* Change state AFTER socket is unhashed to avoid closed -@@ -2297,7 +2383,7 @@ static const unsigned char new_state[16] = { - [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ - }; - --static int tcp_close_state(struct sock *sk) -+int tcp_close_state(struct sock *sk) - { - int next = (int)new_state[sk->sk_state]; - int ns = next & TCP_STATE_MASK; -@@ -2327,7 +2413,7 @@ void tcp_shutdown(struct sock *sk, int how) - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { - /* Clear out any half completed packets. FIN if needed. */ - if (tcp_close_state(sk)) -- tcp_send_fin(sk); -+ tcp_sk(sk)->ops->send_fin(sk); - } - } - EXPORT_SYMBOL(tcp_shutdown); -@@ -2352,6 +2438,17 @@ void tcp_close(struct sock *sk, long timeout) - int data_was_unread = 0; - int state; - -+ if (is_meta_sk(sk)) { -+ /* TODO: Currently forcing timeout to 0 because -+ * sk_stream_wait_close will complain during lockdep because -+ * of the mpcb_mutex (circular lock dependency through -+ * inet_csk_listen_stop()). -+ * We should find a way to get rid of the mpcb_mutex. -+ */ -+ mptcp_close(sk, 0); -+ return; -+ } -+ - lock_sock(sk); - sk->sk_shutdown = SHUTDOWN_MASK; - -@@ -2396,7 +2493,7 @@ void tcp_close(struct sock *sk, long timeout) - /* Unread data was tossed, zap the connection. */ - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); - tcp_set_state(sk, TCP_CLOSE); -- tcp_send_active_reset(sk, sk->sk_allocation); -+ tcp_sk(sk)->ops->send_active_reset(sk, sk->sk_allocation); - } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { - /* Check zero linger _after_ checking for unread data. */ - sk->sk_prot->disconnect(sk, 0); -@@ -2470,7 +2567,7 @@ void tcp_close(struct sock *sk, long timeout) - struct tcp_sock *tp = tcp_sk(sk); - if (tp->linger2 < 0) { - tcp_set_state(sk, TCP_CLOSE); -- tcp_send_active_reset(sk, GFP_ATOMIC); -+ tp->ops->send_active_reset(sk, GFP_ATOMIC); - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPABORTONLINGER); - } else { -@@ -2480,7 +2577,8 @@ void tcp_close(struct sock *sk, long timeout) - inet_csk_reset_keepalive_timer(sk, - tmo - TCP_TIMEWAIT_LEN); - } else { -- tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); -+ tcp_sk(sk)->ops->time_wait(sk, TCP_FIN_WAIT2, -+ tmo); - goto out; - } - } -@@ -2489,7 +2587,7 @@ void tcp_close(struct sock *sk, long timeout) - sk_mem_reclaim(sk); - if (tcp_check_oom(sk, 0)) { - tcp_set_state(sk, TCP_CLOSE); -- tcp_send_active_reset(sk, GFP_ATOMIC); -+ tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC); - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPABORTONMEMORY); - } else if (!check_net(sock_net(sk))) { -@@ -2521,15 +2619,6 @@ void tcp_close(struct sock *sk, long timeout) - } - EXPORT_SYMBOL(tcp_close); - --/* These states need RST on ABORT according to RFC793 */ -- --static inline bool tcp_need_reset(int state) --{ -- return (1 << state) & -- (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | -- TCPF_FIN_WAIT2 | TCPF_SYN_RECV); --} -- - static void tcp_rtx_queue_purge(struct sock *sk) - { - struct rb_node *p = rb_first(&sk->tcp_rtx_queue); -@@ -2551,6 +2640,10 @@ void tcp_write_queue_purge(struct sock *sk) - { - struct sk_buff *skb; - -+ if (mptcp(tcp_sk(sk)) && !is_meta_sk(sk) && -+ !tcp_rtx_and_write_queues_empty(sk)) -+ mptcp_reinject_data(sk, 0); -+ - tcp_chrono_stop(sk, TCP_CHRONO_BUSY); - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { - tcp_skb_tsorted_anchor_cleanup(skb); -@@ -2569,6 +2662,36 @@ void tcp_write_queue_purge(struct sock *sk) - inet_csk(sk)->icsk_backoff = 0; - } - -+void tcp_reset_vars(struct sock *sk) -+{ -+ struct inet_connection_sock *icsk = inet_csk(sk); -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ tp->srtt_us = 0; -+ tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); -+ tp->rcv_rtt_last_tsecr = 0; -+ icsk->icsk_probes_tstamp = 0; -+ icsk->icsk_rto = TCP_TIMEOUT_INIT; -+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -+ tp->snd_cwnd = TCP_INIT_CWND; -+ tp->snd_cwnd_cnt = 0; -+ tp->delivered = 0; -+ tp->delivered_ce = 0; -+ tp->is_sack_reneg = 0; -+ tcp_clear_retrans(tp); -+ tp->segs_in = 0; -+ tp->segs_out = 0; -+ tp->bytes_sent = 0; -+ tp->bytes_acked = 0; -+ tp->bytes_received = 0; -+ tp->bytes_retrans = 0; -+ tp->total_retrans = 0; -+ tp->data_segs_in = 0; -+ tp->data_segs_out = 0; -+ /* There's a bubble in the pipe until at least the first ACK. */ -+ tp->app_limited = ~0U; -+} -+ - int tcp_disconnect(struct sock *sk, int flags) - { - struct inet_sock *inet = inet_sk(sk); -@@ -2591,7 +2714,7 @@ int tcp_disconnect(struct sock *sk, int flags) - /* The last check adjusts for discrepancy of Linux wrt. RFC - * states - */ -- tcp_send_active_reset(sk, gfp_any()); -+ tp->ops->send_active_reset(sk, gfp_any()); - sk->sk_err = ECONNRESET; - } else if (old_state == TCP_SYN_SENT) - sk->sk_err = ECONNRESET; -@@ -2613,11 +2736,15 @@ int tcp_disconnect(struct sock *sk, int flags) - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); - -+ if (is_meta_sk(sk)) { -+ mptcp_disconnect(sk); -+ } else { -+ if (tp->inside_tk_table) -+ mptcp_hash_remove_bh(tp); -+ } -+ - sk->sk_shutdown = 0; - sock_reset_flag(sk, SOCK_DONE); -- tp->srtt_us = 0; -- tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); -- tp->rcv_rtt_last_tsecr = 0; - - seq = tp->write_seq + tp->max_window + 2; - if (!seq) -@@ -2627,21 +2754,14 @@ int tcp_disconnect(struct sock *sk, int flags) - icsk->icsk_backoff = 0; - tp->snd_cwnd = 2; - icsk->icsk_probes_out = 0; -- icsk->icsk_probes_tstamp = 0; -- icsk->icsk_rto = TCP_TIMEOUT_INIT; -- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -- tp->snd_cwnd = TCP_INIT_CWND; -- tp->snd_cwnd_cnt = 0; - tp->window_clamp = 0; -- tp->delivered = 0; -- tp->delivered_ce = 0; -+ -+ tcp_reset_vars(sk); -+ - if (icsk->icsk_ca_ops->release) - icsk->icsk_ca_ops->release(sk); - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); - tcp_set_ca_state(sk, TCP_CA_Open); -- tp->is_sack_reneg = 0; -- tcp_clear_retrans(tp); -- tp->total_retrans = 0; - inet_csk_delack_init(sk); - /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 - * issue in __tcp_select_window() -@@ -2653,14 +2773,6 @@ int tcp_disconnect(struct sock *sk, int flags) - sk->sk_rx_dst = NULL; - tcp_saved_syn_free(tp); - tp->compressed_ack = 0; -- tp->segs_in = 0; -- tp->segs_out = 0; -- tp->bytes_sent = 0; -- tp->bytes_acked = 0; -- tp->bytes_received = 0; -- tp->bytes_retrans = 0; -- tp->data_segs_in = 0; -- tp->data_segs_out = 0; - tp->duplicate_sack[0].start_seq = 0; - tp->duplicate_sack[0].end_seq = 0; - tp->dsack_dups = 0; -@@ -2669,8 +2781,6 @@ int tcp_disconnect(struct sock *sk, int flags) - tp->sacked_out = 0; - tp->tlp_high_seq = 0; - tp->last_oow_ack_time = 0; -- /* There's a bubble in the pipe until at least the first ACK. */ -- tp->app_limited = ~0U; - tp->rack.mstamp = 0; - tp->rack.advanced = 0; - tp->rack.reo_wnd_steps = 1; -@@ -2704,7 +2814,7 @@ EXPORT_SYMBOL(tcp_disconnect); - static inline bool tcp_can_repair_sock(const struct sock *sk) - { - return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && -- (sk->sk_state != TCP_LISTEN); -+ (sk->sk_state != TCP_LISTEN) && !sock_flag(sk, SOCK_MPTCP); - } - - static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) -@@ -2735,6 +2845,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l - - tp->rcv_wnd = opt.rcv_wnd; - tp->rcv_wup = opt.rcv_wup; -+ tp->rcv_right_edge = tp->rcv_wup + tp->rcv_wnd; - - return 0; - } -@@ -2873,6 +2984,61 @@ static int do_tcp_setsockopt(struct sock *sk, int level, - - return tcp_fastopen_reset_cipher(net, sk, key, backup_key); - } -+#ifdef CONFIG_MPTCP -+ case MPTCP_SCHEDULER: { -+ char name[MPTCP_SCHED_NAME_MAX]; -+ -+ if (optlen < 1) -+ return -EINVAL; -+ -+ /* Cannot be used if MPTCP is not used or we already have -+ * established an MPTCP-connection. -+ */ -+ if (mptcp_init_failed || !sysctl_mptcp_enabled || -+ sk->sk_state != TCP_CLOSE) -+ return -EPERM; -+ -+ val = strncpy_from_user(name, optval, -+ min_t(long, MPTCP_SCHED_NAME_MAX - 1, -+ optlen)); -+ -+ if (val < 0) -+ return -EFAULT; -+ name[val] = 0; -+ -+ lock_sock(sk); -+ err = mptcp_set_scheduler(sk, name); -+ release_sock(sk); -+ return err; -+ } -+ -+ case MPTCP_PATH_MANAGER: { -+ char name[MPTCP_PM_NAME_MAX]; -+ -+ if (optlen < 1) -+ return -EINVAL; -+ -+ /* Cannot be used if MPTCP is not used or we already have -+ * established an MPTCP-connection. -+ */ -+ if (mptcp_init_failed || !sysctl_mptcp_enabled || -+ sk->sk_state != TCP_CLOSE) -+ return -EPERM; -+ -+ val = strncpy_from_user(name, optval, -+ min_t(long, MPTCP_PM_NAME_MAX - 1, -+ optlen)); -+ -+ if (val < 0) -+ return -EFAULT; -+ name[val] = 0; -+ -+ lock_sock(sk); -+ err = mptcp_set_path_manager(sk, name); -+ release_sock(sk); -+ return err; -+ } -+#endif - default: - /* fallthru */ - break; -@@ -3062,6 +3228,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, - break; - - case TCP_DEFER_ACCEPT: -+ /* An established MPTCP-connection (mptcp(tp) only returns true -+ * if the socket is established) should not use DEFER on new -+ * subflows. -+ */ -+ if (mptcp(tp)) -+ break; - /* Translate value in seconds to number of retransmits */ - icsk->icsk_accept_queue.rskq_defer_accept = - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, -@@ -3089,7 +3261,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - inet_csk_ack_scheduled(sk)) { - icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; -- tcp_cleanup_rbuf(sk, 1); -+ tp->ops->cleanup_rbuf(sk, 1); - if (!(val & 1)) - inet_csk_enter_pingpong_mode(sk); - } -@@ -3099,7 +3271,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, - #ifdef CONFIG_TCP_MD5SIG - case TCP_MD5SIG: - case TCP_MD5SIG_EXT: -- err = tp->af_specific->md5_parse(sk, optname, optval, optlen); -+ if (!sock_flag(sk, SOCK_MPTCP)) -+ err = tp->af_specific->md5_parse(sk, optname, optval, optlen); -+ else -+ err = -EINVAL; - break; - #endif - case TCP_USER_TIMEOUT: -@@ -3155,6 +3330,32 @@ static int do_tcp_setsockopt(struct sock *sk, int level, - tp->notsent_lowat = val; - sk->sk_write_space(sk); - break; -+#ifdef CONFIG_MPTCP -+ case MPTCP_ENABLED: -+ if (mptcp_init_failed || !sysctl_mptcp_enabled || -+ sk->sk_state != TCP_CLOSE -+#ifdef CONFIG_TCP_MD5SIG -+ || rcu_access_pointer(tp->md5sig_info) -+#endif -+ ) { -+ err = -EPERM; -+ break; -+ } -+ -+ if (val) -+ mptcp_enable_sock(sk); -+ else -+ mptcp_disable_sock(sk); -+ break; -+ case MPTCP_INFO: -+ if (mptcp_init_failed || !sysctl_mptcp_enabled) { -+ err = -EPERM; -+ break; -+ } -+ -+ tp->record_master_info = !!(val & MPTCP_INFO_FLAG_SAVE_MASTER); -+ break; -+#endif - case TCP_INQ: - if (val > 1 || val < 0) - err = -EINVAL; -@@ -3219,7 +3420,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, - } - - /* Return information about state of tcp endpoint in API format. */ --void tcp_get_info(struct sock *sk, struct tcp_info *info) -+void tcp_get_info(struct sock *sk, struct tcp_info *info, bool no_lock) - { - const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ - const struct inet_connection_sock *icsk = inet_csk(sk); -@@ -3256,7 +3457,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) - return; - } - -- slow = lock_sock_fast(sk); -+ if (!no_lock) -+ slow = lock_sock_fast(sk); - - info->tcpi_ca_state = icsk->icsk_ca_state; - info->tcpi_retransmits = icsk->icsk_retransmits; -@@ -3332,7 +3534,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) - info->tcpi_reord_seen = tp->reord_seen; - info->tcpi_rcv_ooopack = tp->rcv_ooopack; - info->tcpi_snd_wnd = tp->snd_wnd; -- unlock_sock_fast(sk, slow); -+ -+ if (!no_lock) -+ unlock_sock_fast(sk, slow); - } - EXPORT_SYMBOL_GPL(tcp_get_info); - -@@ -3479,7 +3683,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, - if (get_user(len, optlen)) - return -EFAULT; - -- tcp_get_info(sk, &info); -+ tcp_get_info(sk, &info, false); - - len = min_t(unsigned int, len, sizeof(info)); - if (put_user(len, optlen)) -@@ -3668,6 +3872,87 @@ static int do_tcp_getsockopt(struct sock *sk, int level, - } - return 0; - } -+#ifdef CONFIG_MPTCP -+ case MPTCP_SCHEDULER: -+ if (get_user(len, optlen)) -+ return -EFAULT; -+ len = min_t(unsigned int, len, MPTCP_SCHED_NAME_MAX); -+ if (put_user(len, optlen)) -+ return -EFAULT; -+ -+ lock_sock(sk); -+ if (mptcp(tcp_sk(sk))) { -+ struct mptcp_cb *mpcb = tcp_sk(mptcp_meta_sk(sk))->mpcb; -+ -+ if (copy_to_user(optval, mpcb->sched_ops->name, len)) { -+ release_sock(sk); -+ return -EFAULT; -+ } -+ } else { -+ if (copy_to_user(optval, tcp_sk(sk)->mptcp_sched_name, -+ len)) { -+ release_sock(sk); -+ return -EFAULT; -+ } -+ } -+ release_sock(sk); -+ return 0; -+ -+ case MPTCP_PATH_MANAGER: -+ if (get_user(len, optlen)) -+ return -EFAULT; -+ len = min_t(unsigned int, len, MPTCP_PM_NAME_MAX); -+ if (put_user(len, optlen)) -+ return -EFAULT; -+ -+ lock_sock(sk); -+ if (mptcp(tcp_sk(sk))) { -+ struct mptcp_cb *mpcb = tcp_sk(mptcp_meta_sk(sk))->mpcb; -+ -+ if (copy_to_user(optval, mpcb->pm_ops->name, len)) { -+ release_sock(sk); -+ return -EFAULT; -+ } -+ } else { -+ if (copy_to_user(optval, tcp_sk(sk)->mptcp_pm_name, -+ len)) { -+ release_sock(sk); -+ return -EFAULT; -+ } -+ } -+ release_sock(sk); -+ return 0; -+ -+ case MPTCP_ENABLED: -+ if (sk->sk_state != TCP_SYN_SENT) -+ val = mptcp(tp) ? 1 : 0; -+ else -+ val = sock_flag(sk, SOCK_MPTCP) ? 1 : 0; -+ break; -+ case MPTCP_INFO: -+ { -+ int ret; -+ -+ if (!mptcp(tp)) -+ return -EINVAL; -+ -+ if (get_user(len, optlen)) -+ return -EFAULT; -+ -+ len = min_t(unsigned int, len, sizeof(struct mptcp_info)); -+ -+ lock_sock(sk); -+ ret = mptcp_get_info(sk, optval, len); -+ release_sock(sk); -+ -+ if (ret) -+ return ret; -+ -+ if (put_user(len, optlen)) -+ return -EFAULT; -+ return 0; -+ } -+#endif - #ifdef CONFIG_MMU - case TCP_ZEROCOPY_RECEIVE: { - struct tcp_zerocopy_receive zc; -@@ -3873,7 +4158,9 @@ void tcp_done(struct sock *sk) - if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); - -+ WARN_ON(sk->sk_state == TCP_CLOSE); - tcp_set_state(sk, TCP_CLOSE); -+ - tcp_clear_xmit_timers(sk); - if (req) - reqsk_fastopen_remove(sk, req, false); -@@ -3889,6 +4176,8 @@ EXPORT_SYMBOL_GPL(tcp_done); - - int tcp_abort(struct sock *sk, int err) - { -+ struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk; -+ - if (!sk_fullsock(sk)) { - if (sk->sk_state == TCP_NEW_SYN_RECV) { - struct request_sock *req = inet_reqsk(sk); -@@ -3902,7 +4191,7 @@ int tcp_abort(struct sock *sk, int err) - } - - /* Don't race with userspace socket closes such as tcp_close. */ -- lock_sock(sk); -+ lock_sock(meta_sk); - - if (sk->sk_state == TCP_LISTEN) { - tcp_set_state(sk, TCP_CLOSE); -@@ -3911,7 +4200,7 @@ int tcp_abort(struct sock *sk, int err) - - /* Don't race with BH socket closes such as inet_csk_listen_stop. */ - local_bh_disable(); -- bh_lock_sock(sk); -+ bh_lock_sock(meta_sk); - - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; -@@ -3919,14 +4208,14 @@ int tcp_abort(struct sock *sk, int err) - smp_wmb(); - sk->sk_error_report(sk); - if (tcp_need_reset(sk->sk_state)) -- tcp_send_active_reset(sk, GFP_ATOMIC); -+ tcp_sk(sk)->ops->send_active_reset(sk, GFP_ATOMIC); - tcp_done(sk); - } - -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - local_bh_enable(); - tcp_write_queue_purge(sk); -- release_sock(sk); -+ release_sock(meta_sk); - return 0; - } - EXPORT_SYMBOL_GPL(tcp_abort); -diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c -index 6d5600889dcf..247c1168b6a5 100644 ---- a/net/ipv4/tcp_cong.c -+++ b/net/ipv4/tcp_cong.c -@@ -337,13 +337,19 @@ int tcp_set_allowed_congestion_control(char *val) - return ret; - } - -+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, -+ bool reinit, bool cap_net_admin) -+{ -+ return tcp_sk(sk)->ops->set_cong_ctrl(sk, name, load, reinit, cap_net_admin); -+} -+ - /* Change congestion control for socket. If load is false, then it is the - * responsibility of the caller to call tcp_init_congestion_control or - * tcp_reinit_congestion_control (if the current congestion control was - * already initialized. - */ --int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, -- bool reinit, bool cap_net_admin) -+int __tcp_set_congestion_control(struct sock *sk, const char *name, bool load, -+ bool reinit, bool cap_net_admin) - { - struct inet_connection_sock *icsk = inet_csk(sk); - const struct tcp_congestion_ops *ca; -diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c -index 549506162dde..e5a530e0b1c5 100644 ---- a/net/ipv4/tcp_diag.c -+++ b/net/ipv4/tcp_diag.c -@@ -31,7 +31,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, - r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una; - } - if (info) -- tcp_get_info(sk, info); -+ tcp_get_info(sk, info, false); - } - - #ifdef CONFIG_TCP_MD5SIG -diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c -index a5ec77a5ad6f..f9fb4a268b9b 100644 ---- a/net/ipv4/tcp_fastopen.c -+++ b/net/ipv4/tcp_fastopen.c -@@ -9,6 +9,7 @@ - #include - #include - #include -+#include - - void tcp_fastopen_init_key_once(struct net *net) - { -@@ -136,8 +137,6 @@ static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, - const siphash_key_t *key, - struct tcp_fastopen_cookie *foc) - { -- BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); -- - if (req->rsk_ops->family == AF_INET) { - const struct iphdr *iph = ip_hdr(syn); - -@@ -258,8 +257,9 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, - { - struct tcp_sock *tp; - struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; -- struct sock *child; -+ struct sock *child, *meta_sk; - bool own_req; -+ int ret; - - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - NULL, &own_req); -@@ -294,15 +294,27 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, - - refcount_set(&req->rsk_refcnt, 2); - -- /* Now finish processing the fastopen child socket. */ -- tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); -- - tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - - tcp_fastopen_add_skb(child, skb); - - tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; - tp->rcv_wup = tp->rcv_nxt; -+ tp->rcv_right_edge = tp->rcv_wup + tp->rcv_wnd; -+ -+ meta_sk = child; -+ ret = mptcp_check_req_fastopen(meta_sk, req); -+ if (ret < 0) -+ return NULL; -+ -+ if (ret == 0) { -+ child = tcp_sk(meta_sk)->mpcb->master_sk; -+ tp = tcp_sk(child); -+ } -+ -+ /* Now finish processing the fastopen child socket. */ -+ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); -+ - /* tcp_conn_request() is sending the SYNACK, - * and queues the child into listener accept queue. - */ -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index c0fcfa296468..dae2ce9656b8 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -76,35 +76,15 @@ - #include - #include - #include -+#include -+#include -+#include - #include - #include - #include - - int sysctl_tcp_max_orphans __read_mostly = NR_FILE; - --#define FLAG_DATA 0x01 /* Incoming frame contained data. */ --#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ --#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ --#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */ --#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ --#define FLAG_DATA_SACKED 0x20 /* New SACK. */ --#define FLAG_ECE 0x40 /* ECE in this ACK */ --#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ --#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ --#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ --#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ --#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ --#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */ --#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ --#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ --#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ --#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */ -- --#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) --#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) --#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) --#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) -- - #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) - #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) - -@@ -349,8 +329,12 @@ static void tcp_sndbuf_expand(struct sock *sk) - per_mss = roundup_pow_of_two(per_mss) + - SKB_DATA_ALIGN(sizeof(struct sk_buff)); - -- nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); -- nr_segs = max_t(u32, nr_segs, tp->reordering + 1); -+ if (mptcp(tp)) { -+ nr_segs = mptcp_check_snd_buf(tp); -+ } else { -+ nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); -+ nr_segs = max_t(u32, nr_segs, tp->reordering + 1); -+ } - - /* Fast Recovery (RFC 5681 3.2) : - * Cubic needs 1.7 factor, rounded to 2 to include -@@ -359,9 +343,17 @@ static void tcp_sndbuf_expand(struct sock *sk) - sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; - sndmem *= nr_segs * per_mss; - -- if (sk->sk_sndbuf < sndmem) -+ /* MPTCP: after this sndmem is the new contribution of the -+ * current subflow to the aggregated sndbuf */ -+ if (sk->sk_sndbuf < sndmem) { -+ int old_sndbuf = sk->sk_sndbuf; - WRITE_ONCE(sk->sk_sndbuf, - min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); -+ /* MPTCP: ok, the subflow sndbuf has grown, reflect -+ * this in the aggregate buffer.*/ -+ if (mptcp(tp) && old_sndbuf != sk->sk_sndbuf) -+ mptcp_update_sndbuf(tp); -+ } - } - - /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) -@@ -410,9 +402,14 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) - static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) - { - struct tcp_sock *tp = tcp_sk(sk); -+ struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); - int room; - -- room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; -+ if (is_meta_sk(sk)) -+ return; -+ -+ room = min_t(int, meta_tp->window_clamp, tcp_space(meta_sk)) - meta_tp->rcv_ssthresh; - - /* Check #1 */ - if (room > 0 && !tcp_under_memory_pressure(sk)) { -@@ -422,13 +419,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) - * will fit to rcvbuf in future. - */ - if (tcp_win_from_space(sk, skb->truesize) <= skb->len) -- incr = 2 * tp->advmss; -+ incr = 2 * meta_tp->advmss; - else -- incr = __tcp_grow_window(sk, skb); -+ incr = __tcp_grow_window(meta_sk, skb); - - if (incr) { - incr = max_t(int, incr, 2 * skb->len); -- tp->rcv_ssthresh += min(room, incr); -+ meta_tp->rcv_ssthresh += min(room, incr); - inet_csk(sk)->icsk_ack.quick |= 1; +--- a/net/core/dev.c 2018-08-10 10:31:41.199494561 +0200 ++++ b/net/core/dev.c 2018-08-10 10:32:03.635272509 +0200 +@@ -6613,9 +6613,11 @@ } } -@@ -612,7 +609,10 @@ void tcp_rcv_space_adjust(struct sock *sk) - - tcp_mstamp_refresh(tp); - time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); -- if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) -+ if (mptcp(tp)) { -+ if (mptcp_check_rtt(tp, time)) -+ return; -+ } else if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) - return; - - /* Number of bytes copied to user in last RTT */ -@@ -835,7 +835,7 @@ static void tcp_update_pacing_rate(struct sock *sk) - /* Calculate rto without backoff. This is the second half of Van Jacobson's - * routine referred to above. - */ --static void tcp_set_rto(struct sock *sk) -+void tcp_set_rto(struct sock *sk) - { - const struct tcp_sock *tp = tcp_sk(sk); - /* Old crap is replaced with new one. 8) -@@ -1407,6 +1407,13 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, - int len; - int in_sack; - -+ /* For MPTCP we cannot shift skb-data and remove one skb from the -+ * send-queue, because this will make us loose the DSS-option (which -+ * is stored in TCP_SKB_CB(skb)->dss) of the skb we are removing. -+ */ -+ if (mptcp(tp)) -+ goto fallback; -+ - /* Normally R but no L won't result in plain S */ - if (!dup_sack && - (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) -@@ -2962,7 +2969,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, - */ - tcp_update_rtt_min(sk, ca_rtt_us, flag); - tcp_rtt_estimator(sk, seq_rtt_us); -- tcp_set_rto(sk); -+ tp->ops->set_rto(sk); - - /* RFC6298: only reset backoff on valid RTT measurement. */ - inet_csk(sk)->icsk_backoff = 0; -@@ -3030,7 +3037,7 @@ static void tcp_set_xmit_timer(struct sock *sk) - } - - /* If we get here, the whole TSO packet has not been acked. */ --static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) -+u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) - { - struct tcp_sock *tp = tcp_sk(sk); - u32 packets_acked; -@@ -3050,8 +3057,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) - return packets_acked; - } - --static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, -- u32 prior_snd_una) -+void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, u32 prior_snd_una) - { - const struct skb_shared_info *shinfo; - -@@ -3156,6 +3162,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, - */ - if (likely(!(scb->tcp_flags & TCPHDR_SYN))) { - flag |= FLAG_DATA_ACKED; -+ if (mptcp(tp) && mptcp_is_data_seq(skb)) -+ flag |= MPTCP_FLAG_DATA_ACKED; - } else { - flag |= FLAG_SYN_ACKED; - tp->retrans_stamp = 0; -@@ -3276,7 +3284,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, - return flag; - } - --static void tcp_ack_probe(struct sock *sk) -+void tcp_ack_probe(struct sock *sk) - { - struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *head = tcp_send_head(sk); -@@ -3350,9 +3358,8 @@ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, - /* Check that window update is acceptable. - * The function assumes that snd_una<=ack<=snd_next. - */ --static inline bool tcp_may_update_window(const struct tcp_sock *tp, -- const u32 ack, const u32 ack_seq, -- const u32 nwin) -+bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, -+ const u32 ack_seq, const u32 nwin) - { - return after(ack, tp->snd_una) || - after(ack_seq, tp->snd_wl1) || -@@ -3590,7 +3597,7 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag) - } - - /* This routine deals with incoming acks, but not outgoing ones. */ --static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) -+static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) - { - struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); -@@ -3713,6 +3720,16 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - - tcp_rack_update_reo_wnd(sk, &rs); - -+ if (mptcp(tp)) { -+ if (mptcp_fallback_infinite(sk, flag)) { -+ pr_debug("%s resetting flow\n", __func__); -+ mptcp_send_reset(sk); -+ return -1; -+ } -+ -+ mptcp_clean_rtx_infinite(skb, sk); -+ } -+ - if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); - -@@ -3856,8 +3873,10 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) - */ - void tcp_parse_options(const struct net *net, - const struct sk_buff *skb, -- struct tcp_options_received *opt_rx, int estab, -- struct tcp_fastopen_cookie *foc) -+ struct tcp_options_received *opt_rx, -+ struct mptcp_options_received *mopt, -+ int estab, struct tcp_fastopen_cookie *foc, -+ struct tcp_sock *tp) - { - const unsigned char *ptr; - const struct tcphdr *th = tcp_hdr(skb); -@@ -3943,6 +3962,10 @@ void tcp_parse_options(const struct net *net, - */ - break; - #endif -+ case TCPOPT_MPTCP: -+ mptcp_parse_options(ptr - 2, opsize, mopt, skb, tp); -+ break; -+ - case TCPOPT_FASTOPEN: - tcp_parse_fastopen_option( - opsize - TCPOLEN_FASTOPEN_BASE, -@@ -4010,7 +4033,9 @@ static bool tcp_fast_parse_options(const struct net *net, - return true; - } - -- tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL); -+ tcp_parse_options(net, skb, &tp->rx_opt, -+ mptcp(tp) ? &tp->mptcp->rx_opt : NULL, 1, NULL, tp); -+ - if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) - tp->rx_opt.rcv_tsecr -= tp->tsoffset; - -@@ -4120,7 +4145,7 @@ static inline bool tcp_paws_discard(const struct sock *sk, - static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) - { - return !before(end_seq, tp->rcv_wup) && -- !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); -+ !after(seq, tp->rcv_nxt + tcp_receive_window_no_shrink(tp)); - } - - /* When we get a reset we do this. */ -@@ -4169,6 +4194,11 @@ void tcp_fin(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); - -+ if (is_meta_sk(sk)) { -+ mptcp_fin(sk); -+ return; -+ } -+ - inet_csk_schedule_ack(sk); - - sk->sk_shutdown |= RCV_SHUTDOWN; -@@ -4179,6 +4209,10 @@ void tcp_fin(struct sock *sk) - case TCP_ESTABLISHED: - /* Move to CLOSE_WAIT */ - tcp_set_state(sk, TCP_CLOSE_WAIT); -+ -+ if (mptcp(tp)) -+ mptcp_sub_close_passive(sk); -+ - inet_csk_enter_pingpong_mode(sk); - break; - -@@ -4201,9 +4235,16 @@ void tcp_fin(struct sock *sk) - tcp_set_state(sk, TCP_CLOSING); - break; - case TCP_FIN_WAIT2: -+ if (mptcp(tp)) { -+ /* The socket will get closed by mptcp_data_ready. -+ * We first have to process all data-sequences. -+ */ -+ tp->close_it = 1; -+ break; -+ } - /* Received a FIN -- send ACK and enter TIME_WAIT. */ - tcp_send_ack(sk); -- tcp_time_wait(sk, TCP_TIME_WAIT, 0); -+ tp->ops->time_wait(sk, TCP_TIME_WAIT, 0); - break; - default: - /* Only TCP_LISTEN and TCP_CLOSE are left, in these -@@ -4225,6 +4266,10 @@ void tcp_fin(struct sock *sk) - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_state_change(sk); - -+ /* Don't wake up MPTCP-subflows */ -+ if (mptcp(tp)) -+ return; -+ - /* Do not send POLL_HUP for half duplex close. */ - if (sk->sk_shutdown == SHUTDOWN_MASK || - sk->sk_state == TCP_CLOSE) -@@ -4439,6 +4484,9 @@ static bool tcp_try_coalesce(struct sock *sk, - - *fragstolen = false; - -+ if (mptcp(tcp_sk(sk)) && !is_meta_sk(sk)) -+ return false; -+ - /* Its possible this segment overlaps with prior segment in queue */ - if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) - return false; -@@ -4493,7 +4541,7 @@ static void tcp_drop(struct sock *sk, struct sk_buff *skb) - /* This one checks to see if we can put data from the - * out_of_order queue into the receive_queue. - */ --static void tcp_ofo_queue(struct sock *sk) -+void tcp_ofo_queue(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); - __u32 dsack_high = tp->rcv_nxt; -@@ -4516,7 +4564,14 @@ static void tcp_ofo_queue(struct sock *sk) - p = rb_next(p); - rb_erase(&skb->rbnode, &tp->out_of_order_queue); - -- if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { -+ /* In case of MPTCP, the segment may be empty if it's a -+ * non-data DATA_FIN. (see beginning of tcp_data_queue) -+ * -+ * But this only holds true for subflows, not for the -+ * meta-socket. -+ */ -+ if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt) && -+ (is_meta_sk(sk) || !mptcp(tp) || TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq))) { - tcp_drop(sk, skb); - continue; - } -@@ -4546,6 +4601,9 @@ static int tcp_prune_queue(struct sock *sk); - static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, - unsigned int size) - { -+ if (mptcp(tcp_sk(sk))) -+ sk = mptcp_meta_sk(sk); -+ - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, skb, size)) { - -@@ -4560,7 +4618,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, - return 0; - } - --static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) -+void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) - { - struct tcp_sock *tp = tcp_sk(sk); - struct rb_node **p, *parent; -@@ -4632,7 +4690,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) - continue; - } - if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { -- if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { -+ if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq) && -+ (is_meta_sk(sk) || !mptcp(tp) || end_seq != seq)) { - /* All the bits are present. Drop. */ - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPOFOMERGE); -@@ -4679,6 +4738,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) - end_seq); - break; - } -+ /* MPTCP allows non-data data-fin to be in the ofo-queue */ -+ if (mptcp(tp) && !is_meta_sk(sk) && TCP_SKB_CB(skb1)->seq == TCP_SKB_CB(skb1)->end_seq) { -+ skb = skb1; -+ continue; -+ } - rb_erase(&skb1->rbnode, &tp->out_of_order_queue); - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - TCP_SKB_CB(skb1)->end_seq); -@@ -4690,7 +4754,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) - tp->ooo_last_skb = skb; - - add_sack: -- if (tcp_is_sack(tp)) -+ if (tcp_is_sack(tp) && seq != end_seq) - tcp_sack_new_ofo_skb(sk, seq, end_seq); - end: - if (skb) { -@@ -4704,8 +4768,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) - } - } - --static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, -- bool *fragstolen) -+int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, -+ bool *fragstolen) - { - int eaten; - struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); -@@ -4780,7 +4844,8 @@ void tcp_data_ready(struct sock *sk) - - if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && - !sock_flag(sk, SOCK_DONE) && -- tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) -+ tcp_receive_window_now(tp) > inet_csk(sk)->icsk_ack.rcv_mss && -+ !mptcp(tp)) - return; - - sk->sk_data_ready(sk); -@@ -4792,10 +4857,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) - bool fragstolen; - int eaten; - -- if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { -+ /* If no data is present, but a data_fin is in the options, we still -+ * have to call mptcp_queue_skb later on. */ -+ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq && -+ !(mptcp(tp) && mptcp_is_data_fin(skb))) { - __kfree_skb(skb); - return; - } -+ - skb_dst_drop(skb); - __skb_pull(skb, tcp_hdr(skb)->doff * 4); - -@@ -4806,7 +4875,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) - * Out of sequence packets to the out_of_order_queue. - */ - if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { -- if (tcp_receive_window(tp) == 0) { -+ if (tcp_receive_window_no_shrink(tp) == 0) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); - goto out_of_window; - } -@@ -4822,7 +4891,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) - } - - eaten = tcp_queue_rcv(sk, skb, &fragstolen); -- if (skb->len) -+ if (skb->len || mptcp_is_data_fin(skb)) - tcp_event_data_recv(sk, skb); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - tcp_fin(sk); -@@ -4844,7 +4913,11 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) - - if (eaten > 0) - kfree_skb_partial(skb, fragstolen); -- if (!sock_flag(sk, SOCK_DEAD)) -+ if (!sock_flag(sk, SOCK_DEAD) || mptcp(tp)) -+ /* MPTCP: we always have to call data_ready, because -+ * we may be about to receive a data-fin, which still -+ * must get queued. -+ */ - tcp_data_ready(sk); - return; - } -@@ -4864,7 +4937,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) - } - - /* Out of window. F.e. zero window probe. */ -- if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) -+ if (!before(TCP_SKB_CB(skb)->seq, -+ tp->rcv_nxt + tcp_receive_window_no_shrink(tp))) - goto out_of_window; - - if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { -@@ -4874,7 +4948,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) - /* If window is closed, drop tail of packet. But after - * remembering D-SACK for its head made in previous line. - */ -- if (!tcp_receive_window(tp)) { -+ if (!tcp_receive_window_no_shrink(tp)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP); - goto out_of_window; - } -@@ -5187,7 +5261,7 @@ static int tcp_prune_queue(struct sock *sk) - return -1; - } - --static bool tcp_should_expand_sndbuf(const struct sock *sk) -+bool tcp_should_expand_sndbuf(const struct sock *sk) - { - const struct tcp_sock *tp = tcp_sk(sk); - -@@ -5222,7 +5296,7 @@ static void tcp_new_space(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); - -- if (tcp_should_expand_sndbuf(sk)) { -+ if (tp->ops->should_expand_sndbuf(sk)) { - tcp_sndbuf_expand(sk); - tp->snd_cwnd_stamp = tcp_jiffies32; - } -@@ -5236,10 +5310,11 @@ static void tcp_check_space(struct sock *sk) - sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); - /* pairs with tcp_poll() */ - smp_mb(); -- if (sk->sk_socket && -- test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { -+ if (mptcp(tcp_sk(sk)) || -+ (sk->sk_socket && -+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))) { - tcp_new_space(sk); -- if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) -+ if (sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) - tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); - } - } -@@ -5258,6 +5333,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) - { - struct tcp_sock *tp = tcp_sk(sk); - unsigned long rtt, delay; -+ struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); - - /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && -@@ -5266,8 +5343,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) - * If application uses SO_RCVLOWAT, we want send ack now if - * we have not received enough bytes to satisfy the condition. - */ -- (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || -- __tcp_select_window(sk) >= tp->rcv_wnd)) || -+ (meta_tp->rcv_nxt - meta_tp->copied_seq < meta_sk->sk_rcvlowat || -+ tp->ops->__select_window(sk) >= tp->rcv_wnd)) || - /* We ACK each frame or... */ - tcp_in_quickack_mode(sk) || - /* Protocol state mandates a one-time immediate ACK */ -@@ -5402,6 +5479,10 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t - { - struct tcp_sock *tp = tcp_sk(sk); - -+ /* MPTCP urgent data is not yet supported */ -+ if (mptcp(tp)) -+ return; -+ - /* Check if we get a new urgent pointer - normally not. */ - if (th->urg) - tcp_check_urg(sk, th); -@@ -5544,9 +5625,15 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, - goto discard; - } - -+ /* If valid: post process the received MPTCP options. */ -+ if (mptcp(tp) && mptcp_handle_options(sk, th, skb)) -+ goto discard; -+ - return true; - - discard: -+ if (mptcp(tp)) -+ mptcp_reset_mopt(tp); - tcp_drop(sk, skb); - return false; - } -@@ -5603,6 +5690,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) - - tp->rx_opt.saw_tstamp = 0; - -+ /* MPTCP: force slowpath. */ -+ if (mptcp(tp)) -+ goto slow_path; -+ - /* pred_flags is 0xS?10 << 16 + snd_wnd - * if header_prediction is to be made - * 'S' will always be tp->tcp_header_len >> 2 -@@ -5777,7 +5868,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op) - - tcp_call_bpf(sk, bpf_op, 0, NULL); - tcp_init_congestion_control(sk); -- tcp_init_buffer_space(sk); -+ tcp_sk(sk)->ops->init_buffer_space(sk); - } - - void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) -@@ -5814,17 +5905,24 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, - struct tcp_fastopen_cookie *cookie) - { - struct tcp_sock *tp = tcp_sk(sk); -- struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL; -+ struct sk_buff *data = NULL; - u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; - bool syn_drop = false; - -+ if (tp->syn_data) { -+ if (mptcp(tp)) -+ data = tcp_write_queue_head(mptcp_meta_sk(sk)); -+ else -+ data = tcp_rtx_queue_head(sk); -+ } -+ - if (mss == tp->rx_opt.user_mss) { - struct tcp_options_received opt; - - /* Get original SYNACK MSS value if user MSS sets mss_clamp */ - tcp_clear_options(&opt); - opt.user_mss = opt.mss_clamp = 0; -- tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL); -+ tcp_parse_options(sock_net(sk), synack, &opt, NULL, 0, NULL, NULL); - mss = opt.mss_clamp; - } - -@@ -5848,7 +5946,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, - - tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); - -- if (data) { /* Retransmit unacked data in SYN */ -+ /* In mptcp case, we do not rely on "retransmit", but instead on -+ * "transmit", because if fastopen data is not acked, the retransmission -+ * becomes the first MPTCP data (see mptcp_rcv_synsent_fastopen). -+ */ -+ if (data && !mptcp(tp)) { /* Retransmit unacked data in SYN */ - skb_rbtree_walk_from(data) { - if (__tcp_retransmit_skb(sk, data, 1)) - break; -@@ -5903,9 +6005,13 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - struct tcp_sock *tp = tcp_sk(sk); - struct tcp_fastopen_cookie foc = { .len = -1 }; - int saved_clamp = tp->rx_opt.mss_clamp; -+ struct mptcp_options_received mopt; - bool fastopen_fail; - -- tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); -+ mptcp_init_mp_opt(&mopt); -+ -+ tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, -+ mptcp(tp) ? &tp->mptcp->rx_opt : &mopt, 0, &foc, tp); - if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) - tp->rx_opt.rcv_tsecr -= tp->tsoffset; - -@@ -5966,11 +6072,41 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - tcp_try_undo_spurious_syn(sk); - tcp_ack(sk, skb, FLAG_SLOWPATH); - -+ if (tp->request_mptcp || mptcp(tp)) { -+ int ret; -+ -+ rcu_read_lock(); -+ local_bh_disable(); -+ ret = mptcp_rcv_synsent_state_process(sk, &sk, -+ skb, &mopt); -+ local_bh_enable(); -+ rcu_read_unlock(); -+ -+ /* May have changed if we support MPTCP */ -+ tp = tcp_sk(sk); -+ icsk = inet_csk(sk); -+ -+ if (ret == 1) -+ goto reset_and_undo; -+ if (ret == 2) -+ goto discard; -+ } -+ -+ if (mptcp(tp) && !is_master_tp(tp)) { -+ /* Timer for repeating the ACK until an answer -+ * arrives. Used only when establishing an additional -+ * subflow inside of an MPTCP connection. -+ */ -+ sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer, -+ jiffies + icsk->icsk_rto); -+ } -+ - /* Ok.. it's good. Set up sequence numbers and - * move to established. - */ - WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); - tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; -+ tcp_update_rcv_right_edge(tp); - - /* RFC1323: The window in SYN & SYN/ACK segments is - * never scaled. -@@ -5992,6 +6128,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - tp->tcp_header_len = sizeof(struct tcphdr); - } - -+ if (mptcp(tp)) { -+ tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN; -+ tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN; -+ } -+ - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - tcp_initialize_rcv_mss(sk); - -@@ -6015,9 +6156,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - } - if (fastopen_fail) - return -1; -- if (sk->sk_write_pending || -+ /* With MPTCP we cannot send data on the third ack due to the -+ * lack of option-space to combine with an MP_CAPABLE. -+ */ -+ if (!mptcp(tp) && (sk->sk_write_pending || - icsk->icsk_accept_queue.rskq_defer_accept || -- inet_csk_in_pingpong_mode(sk)) { -+ inet_csk_in_pingpong_mode(sk))) { - /* Save one ACK. Data will be ready after - * several ticks, if write_pending is set. - * -@@ -6056,6 +6200,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - tcp_paws_reject(&tp->rx_opt, 0)) - goto discard_and_undo; - -+ /* TODO - check this here for MPTCP */ - if (th->syn) { - /* We see SYN without ACK. It is attempt of - * simultaneous connect with crossed SYNs. -@@ -6072,9 +6217,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - tp->tcp_header_len = sizeof(struct tcphdr); - } - -+ if (mptcp(tp)) { -+ tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN; -+ tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN; -+ } -+ - WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); - WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); - tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; -+ tcp_update_rcv_right_edge(tp); - - /* RFC1323: The window in SYN & SYN/ACK segments is - * never scaled. -@@ -6162,6 +6313,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) - */ - - int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) -+ __releases(&sk->sk_lock.slock) - { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); -@@ -6204,6 +6356,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - tp->rx_opt.saw_tstamp = 0; - tcp_mstamp_refresh(tp); - queued = tcp_rcv_synsent_state_process(sk, skb, th); -+ if (is_meta_sk(sk)) { -+ sk = tcp_sk(sk)->mpcb->master_sk; -+ tp = tcp_sk(sk); -+ -+ /* Need to call it here, because it will announce new -+ * addresses, which can only be done after the third ack -+ * of the 3-way handshake. -+ */ -+ mptcp_update_metasocket(tp->meta_sk); -+ } - if (queued >= 0) - return queued; - -@@ -6276,6 +6438,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; -+ if (mptcp(tp)) -+ tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN; - - if (!inet_csk(sk)->icsk_ca_ops->cong_control) - tcp_update_pacing_rate(sk); -@@ -6285,6 +6449,30 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); -+ -+ /* Send an ACK when establishing a new MPTCP subflow, i.e. -+ * using an MP_JOIN subtype. -+ */ -+ if (mptcp(tp)) { -+ if (is_master_tp(tp)) { -+ mptcp_update_metasocket(mptcp_meta_sk(sk)); -+ } else { -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ tcp_send_ack(sk); -+ -+ /* Update RTO as it might be worse/better */ -+ mptcp_set_rto(sk); -+ -+ /* If the new RTO would fire earlier, pull it in! */ -+ if (tcp_sk(meta_sk)->packets_out && -+ icsk->icsk_timeout > inet_csk(meta_sk)->icsk_rto + jiffies) { -+ tcp_rearm_rto(meta_sk); -+ } -+ -+ mptcp_push_pending_frames(mptcp_meta_sk(sk)); -+ } -+ } - break; - - case TCP_FIN_WAIT1: { -@@ -6325,7 +6513,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); -- } else if (th->fin || sock_owned_by_user(sk)) { -+ } else if (th->fin || mptcp_is_data_fin(skb) || -+ sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, -@@ -6334,7 +6523,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { -- tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); -+ tp->ops->time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } - break; -@@ -6342,7 +6531,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - - case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) { -- tcp_time_wait(sk, TCP_TIME_WAIT, 0); -+ tp->ops->time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; - } - break; -@@ -6354,6 +6543,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - goto discard; - } - break; -+ case TCP_CLOSE: -+ if (tp->mp_killed) -+ goto discard; - } - - /* step 6: check the URG bit */ -@@ -6375,7 +6567,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) - */ - if (sk->sk_shutdown & RCV_SHUTDOWN) { - if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && -- after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { -+ after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt) && -+ !mptcp(tp)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - tcp_reset(sk); - return 1; -@@ -6477,6 +6670,8 @@ static void tcp_openreq_init(struct request_sock *req, - ireq->wscale_ok = rx_opt->wscale_ok; - ireq->acked = 0; - ireq->ecn_ok = 0; -+ ireq->mptcp_rqsk = 0; -+ ireq->saw_mpc = 0; - ireq->ir_rmt_port = tcp_hdr(skb)->source; - ireq->ir_num = ntohs(tcp_hdr(skb)->dest); - ireq->ir_mark = inet_request_mark(sk, skb); -@@ -6602,12 +6797,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. -+ * -+ * MPTCP: new subflows cannot be established in a stateless manner. - */ -- if ((net->ipv4.sysctl_tcp_syncookies == 2 || -+ if (((!is_meta_sk(sk) && net->ipv4.sysctl_tcp_syncookies == 2) || - inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); - if (!want_cookie) - goto drop; -+ -+ if (is_meta_sk(sk)) -+ goto drop; - } - - if (sk_acceptq_is_full(sk)) { -@@ -6625,8 +6825,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, - tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = af_ops->mss_clamp; - tmp_opt.user_mss = tp->rx_opt.user_mss; -- tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, -- want_cookie ? NULL : &foc); -+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, NULL, 0, -+ want_cookie ? NULL : &foc, NULL); - - if (want_cookie && !tmp_opt.saw_tstamp) - tcp_clear_options(&tmp_opt); -@@ -6641,7 +6841,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, - /* Note: tcp_v6_init_req() might override ir_iif for link locals */ - inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); - -- af_ops->init_req(req, sk, skb); -+ if (af_ops->init_req(req, sk, skb, want_cookie)) -+ goto drop_and_free; - - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; -@@ -6677,7 +6878,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, - tcp_ecn_create_request(req, skb, sk, dst); - - if (want_cookie) { -- isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); -+ isn = cookie_init_sequence(af_ops, req, sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - if (!tmp_opt.tstamp_ok) - inet_rsk(req)->ecn_ok = 0; -@@ -6692,17 +6893,25 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, - fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); - } - if (fastopen_sk) { -+ struct sock *meta_sk = fastopen_sk; -+ -+ if (mptcp(tcp_sk(fastopen_sk))) -+ meta_sk = mptcp_meta_sk(fastopen_sk); - af_ops->send_synack(fastopen_sk, dst, &fl, req, - &foc, TCP_SYNACK_FASTOPEN); - /* Add the child socket directly into the accept queue */ -- if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) { -+ if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) { - reqsk_fastopen_remove(fastopen_sk, req, false); - bh_unlock_sock(fastopen_sk); -+ if (meta_sk != fastopen_sk) -+ bh_unlock_sock(meta_sk); - sock_put(fastopen_sk); - goto drop_and_free; - } - sk->sk_data_ready(sk); - bh_unlock_sock(fastopen_sk); -+ if (meta_sk != fastopen_sk) -+ bh_unlock_sock(meta_sk); - sock_put(fastopen_sk); - } else { - tcp_rsk(req)->tfo_listener = false; -diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 2ce85e52aea7..2e76c006ad16 100644 ---- a/net/ipv4/tcp_ipv4.c -+++ b/net/ipv4/tcp_ipv4.c -@@ -62,6 +62,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -209,6 +211,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) - struct ip_options_rcu *inet_opt; - struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; - -+ mptcp_init_connect(sk); -+ - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - -@@ -430,7 +434,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - struct inet_sock *inet; - const int type = icmp_hdr(icmp_skb)->type; - const int code = icmp_hdr(icmp_skb)->code; -- struct sock *sk; -+ struct sock *sk, *meta_sk; - struct sk_buff *skb; - struct request_sock *fastopen; - u32 seq, snd_una; -@@ -460,13 +464,19 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - return 0; - } - -- bh_lock_sock(sk); -+ tp = tcp_sk(sk); -+ if (mptcp(tp)) -+ meta_sk = mptcp_meta_sk(sk); -+ else -+ meta_sk = sk; -+ -+ bh_lock_sock(meta_sk); - /* If too many ICMPs get dropped on busy - * servers this needs to be solved differently. - * We do take care of PMTU discovery (RFC1191) special case : - * we can receive locally generated ICMP messages while socket is held. - */ -- if (sock_owned_by_user(sk)) { -+ if (sock_owned_by_user(meta_sk)) { - if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) - __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); - } -@@ -479,7 +489,6 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - } - - icsk = inet_csk(sk); -- tp = tcp_sk(sk); - /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ - fastopen = rcu_dereference(tp->fastopen_rsk); - snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; -@@ -513,11 +522,13 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - goto out; - - WRITE_ONCE(tp->mtu_info, info); -- if (!sock_owned_by_user(sk)) { -+ if (!sock_owned_by_user(meta_sk)) { - tcp_v4_mtu_reduced(sk); - } else { - if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) - sock_hold(sk); -+ if (mptcp(tp)) -+ mptcp_tsq_flags(sk); - } - goto out; - } -@@ -531,7 +542,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - !icsk->icsk_backoff || fastopen) - break; - -- if (sock_owned_by_user(sk)) -+ if (sock_owned_by_user(meta_sk)) - break; - - skb = tcp_rtx_queue_head(sk); -@@ -555,7 +566,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - } else { - /* RTO revert clocked out retransmission. - * Will retransmit now */ -- tcp_retransmit_timer(sk); -+ tcp_sk(sk)->ops->retransmit_timer(sk); - } - - break; -@@ -575,7 +586,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - if (fastopen && !fastopen->sk) - break; - -- if (!sock_owned_by_user(sk)) { -+ if (!sock_owned_by_user(meta_sk)) { - sk->sk_err = err; - - sk->sk_error_report(sk); -@@ -604,7 +615,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - */ - - inet = inet_sk(sk); -- if (!sock_owned_by_user(sk) && inet->recverr) { -+ if (!sock_owned_by_user(meta_sk) && inet->recverr) { - sk->sk_err = err; - sk->sk_error_report(sk); - } else { /* Only an error on timeout */ -@@ -612,7 +623,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) - } - - out: -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - sock_put(sk); - return 0; - } -@@ -648,7 +659,7 @@ EXPORT_SYMBOL(tcp_v4_send_check); - * Exception: precedence violation. We do not implement it in any case. - */ - --static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) -+void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) - { - const struct tcphdr *th = tcp_hdr(skb); - struct { -@@ -800,10 +811,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) - */ - - static void tcp_v4_send_ack(const struct sock *sk, -- struct sk_buff *skb, u32 seq, u32 ack, -+ struct sk_buff *skb, u32 seq, u32 ack, u32 data_ack, - u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, -- int reply_flags, u8 tos) -+ int reply_flags, u8 tos, int mptcp) - { - const struct tcphdr *th = tcp_hdr(skb); - struct { -@@ -811,6 +822,10 @@ static void tcp_v4_send_ack(const struct sock *sk, - __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) - #ifdef CONFIG_TCP_MD5SIG - + (TCPOLEN_MD5SIG_ALIGNED >> 2) -+#endif -+#ifdef CONFIG_MPTCP -+ + ((MPTCP_SUB_LEN_DSS >> 2) + -+ (MPTCP_SUB_LEN_ACK >> 2)) - #endif - ]; - } rep; -@@ -858,6 +873,21 @@ static void tcp_v4_send_ack(const struct sock *sk, - ip_hdr(skb)->daddr, &rep.th); - } - #endif -+#ifdef CONFIG_MPTCP -+ if (mptcp) { -+ int offset = (tsecr) ? 3 : 0; -+ /* Construction of 32-bit data_ack */ -+ rep.opt[offset++] = htonl((TCPOPT_MPTCP << 24) | -+ ((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) | -+ (0x20 << 8) | -+ (0x01)); -+ rep.opt[offset] = htonl(data_ack); -+ -+ arg.iov[0].iov_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK; -+ rep.th.doff = arg.iov[0].iov_len / 4; -+ } -+#endif /* CONFIG_MPTCP */ -+ - arg.flags = reply_flags; - arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, /* XXX */ -@@ -889,28 +919,36 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) - { - struct inet_timewait_sock *tw = inet_twsk(sk); - struct tcp_timewait_sock *tcptw = tcp_twsk(sk); -+ u32 data_ack = 0; -+ int mptcp = 0; -+ -+ if (tcptw->mptcp_tw) { -+ data_ack = (u32)tcptw->mptcp_tw->rcv_nxt; -+ mptcp = 1; -+ } - - tcp_v4_send_ack(sk, skb, -- tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, -+ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, data_ack, - tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp_raw() + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, - tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw), - tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, -- tw->tw_tos -+ tw->tw_tos, mptcp - ); - - inet_twsk_put(tw); - } - --static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, -- struct request_sock *req) -+void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, -+ struct request_sock *req) - { - /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV - * sk->sk_state == TCP_SYN_RECV -> for Fast Open. - */ -- u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : -+ u32 seq = (sk->sk_state == TCP_LISTEN || is_meta_sk(sk)) ? -+ tcp_rsk(req)->snt_isn + 1 : - tcp_sk(sk)->snd_nxt; - - /* RFC 7323 2.3 -@@ -919,7 +957,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - * Rcv.Wind.Shift bits: - */ - tcp_v4_send_ack(sk, skb, seq, -- tcp_rsk(req)->rcv_nxt, -+ tcp_rsk(req)->rcv_nxt, 0, - req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, - req->ts_recent, -@@ -927,7 +965,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr, - AF_INET), - inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, -- ip_hdr(skb)->tos); -+ ip_hdr(skb)->tos, 0); - } - - /* -@@ -935,11 +973,11 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - * This still operates on a request_sock only, not on a big - * socket. - */ --static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, -- struct flowi *fl, -- struct request_sock *req, -- struct tcp_fastopen_cookie *foc, -- enum tcp_synack_type synack_type) -+int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, -+ struct flowi *fl, -+ struct request_sock *req, -+ struct tcp_fastopen_cookie *foc, -+ enum tcp_synack_type synack_type) - { - const struct inet_request_sock *ireq = inet_rsk(req); - struct flowi4 fl4; -@@ -969,7 +1007,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, - /* - * IPv4 request_sock destructor. - */ --static void tcp_v4_reqsk_destructor(struct request_sock *req) -+void tcp_v4_reqsk_destructor(struct request_sock *req) - { - kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); - } -@@ -1354,9 +1392,10 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk, - return false; - } - --static void tcp_v4_init_req(struct request_sock *req, -- const struct sock *sk_listener, -- struct sk_buff *skb) -+static int tcp_v4_init_req(struct request_sock *req, -+ const struct sock *sk_listener, -+ struct sk_buff *skb, -+ bool want_cookie) - { - struct inet_request_sock *ireq = inet_rsk(req); - struct net *net = sock_net(sk_listener); -@@ -1364,6 +1403,8 @@ static void tcp_v4_init_req(struct request_sock *req, - sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); - sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); -+ -+ return 0; - } - - static struct dst_entry *tcp_v4_route_req(const struct sock *sk, -@@ -1383,7 +1424,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { - .syn_ack_timeout = tcp_syn_ack_timeout, - }; - --static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { -+const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { - .mss_clamp = TCP_MSS_DEFAULT, - #ifdef CONFIG_TCP_MD5SIG - .req_md5_lookup = tcp_v4_md5_lookup, -@@ -1520,7 +1561,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, - } - EXPORT_SYMBOL(tcp_v4_syn_recv_sock); - --static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) -+struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) - { - #ifdef CONFIG_SYN_COOKIES - const struct tcphdr *th = tcp_hdr(skb); -@@ -1558,6 +1599,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) - { - struct sock *rsk; - -+ if (is_meta_sk(sk)) -+ return mptcp_v4_do_rcv(sk, skb); -+ - if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; - -@@ -1803,6 +1847,10 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff * 4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); -+#ifdef CONFIG_MPTCP -+ TCP_SKB_CB(skb)->mptcp_flags = 0; -+ TCP_SKB_CB(skb)->dss_off = 0; -+#endif - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; - TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); -@@ -1822,8 +1870,8 @@ int tcp_v4_rcv(struct sk_buff *skb) - int sdif = inet_sdif(skb); - const struct iphdr *iph; - const struct tcphdr *th; -+ struct sock *sk, *meta_sk = NULL; - bool refcounted; -- struct sock *sk; - int ret; - - if (skb->pkt_type != PACKET_HOST) -@@ -1877,7 +1925,11 @@ int tcp_v4_rcv(struct sk_buff *skb) - reqsk_put(req); - goto csum_error; - } -- if (unlikely(sk->sk_state != TCP_LISTEN)) { -+ if (unlikely(sk->sk_state != TCP_LISTEN && !is_meta_sk(sk))) { -+ inet_csk_reqsk_queue_drop_and_put(sk, req); -+ goto lookup; -+ } -+ if (unlikely(is_meta_sk(sk) && !mptcp_can_new_subflow(sk))) { - inet_csk_reqsk_queue_drop_and_put(sk, req); - goto lookup; - } -@@ -1886,6 +1938,7 @@ int tcp_v4_rcv(struct sk_buff *skb) - */ - sock_hold(sk); - refcounted = true; -+ - nsk = NULL; - if (!tcp_filter(sk, skb)) { - th = (const struct tcphdr *)skb->data; -@@ -1946,19 +1999,28 @@ int tcp_v4_rcv(struct sk_buff *skb) - - sk_incoming_cpu_update(sk); - -- bh_lock_sock_nested(sk); -+ if (mptcp(tcp_sk(sk))) { -+ meta_sk = mptcp_meta_sk(sk); -+ -+ bh_lock_sock_nested(meta_sk); -+ if (sock_owned_by_user(meta_sk)) -+ mptcp_prepare_for_backlog(sk, skb); -+ } else { -+ meta_sk = sk; -+ bh_lock_sock_nested(sk); -+ } - tcp_segs_in(tcp_sk(sk), skb); - ret = 0; -- if (!sock_owned_by_user(sk)) { -+ if (!sock_owned_by_user(meta_sk)) { - skb_to_free = sk->sk_rx_skb_cache; - sk->sk_rx_skb_cache = NULL; - ret = tcp_v4_do_rcv(sk, skb); - } else { -- if (tcp_add_backlog(sk, skb)) -+ if (tcp_add_backlog(meta_sk, skb)) - goto discard_and_relse; - skb_to_free = NULL; - } -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - if (skb_to_free) - __kfree_skb(skb_to_free); - -@@ -1974,6 +2036,19 @@ int tcp_v4_rcv(struct sk_buff *skb) - - tcp_v4_fill_cb(skb, iph, th); - -+#ifdef CONFIG_MPTCP -+ if (!sk && th->syn && !th->ack) { -+ int ret = mptcp_lookup_join(skb, NULL); -+ -+ if (ret < 0) { -+ tcp_v4_send_reset(NULL, skb); -+ goto discard_it; -+ } else if (ret > 0) { -+ return 0; -+ } -+ } -+#endif -+ - if (tcp_checksum_complete(skb)) { - csum_error: - __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); -@@ -2022,6 +2097,18 @@ int tcp_v4_rcv(struct sk_buff *skb) - refcounted = false; - goto process; - } -+#ifdef CONFIG_MPTCP -+ if (th->syn && !th->ack) { -+ int ret = mptcp_lookup_join(skb, inet_twsk(sk)); -+ -+ if (ret < 0) { -+ tcp_v4_send_reset(NULL, skb); -+ goto discard_it; -+ } else if (ret > 0) { -+ return 0; -+ } -+ } -+#endif - } - /* to ACK */ - /* fall through */ -@@ -2091,7 +2178,12 @@ static int tcp_v4_init_sock(struct sock *sk) - - tcp_init_sock(sk); - -- icsk->icsk_af_ops = &ipv4_specific; -+#ifdef CONFIG_MPTCP -+ if (sock_flag(sk, SOCK_MPTCP)) -+ icsk->icsk_af_ops = &mptcp_v4_specific; -+ else -+#endif -+ icsk->icsk_af_ops = &ipv4_specific; - - #ifdef CONFIG_TCP_MD5SIG - tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; -@@ -2110,6 +2202,11 @@ void tcp_v4_destroy_sock(struct sock *sk) - - tcp_cleanup_congestion_control(sk); - -+ if (mptcp(tp)) -+ mptcp_destroy_sock(sk); -+ if (tp->inside_tk_table) -+ mptcp_hash_remove_bh(tp); -+ - tcp_cleanup_ulp(sk); - - /* Cleanup up the write buffer. */ -@@ -2615,6 +2712,11 @@ struct proto tcp_prot = { - .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), - .max_header = MAX_TCP_HEADER, - .obj_size = sizeof(struct tcp_sock), -+#ifdef CONFIG_MPTCP -+ .useroffset = offsetof(struct tcp_sock, mptcp_sched_name), -+ .usersize = sizeof_field(struct tcp_sock, mptcp_sched_name) + -+ sizeof_field(struct tcp_sock, mptcp_pm_name), -+#endif - .slab_flags = SLAB_TYPESAFE_BY_RCU, - .twsk_prot = &tcp_timewait_sock_ops, - .rsk_prot = &tcp_request_sock_ops, -@@ -2625,6 +2727,9 @@ struct proto tcp_prot = { - .compat_getsockopt = compat_tcp_getsockopt, - #endif - .diag_destroy = tcp_abort, -+#ifdef CONFIG_MPTCP -+ .clear_sk = mptcp_clear_sk, -+#endif - }; - EXPORT_SYMBOL(tcp_prot); - -diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c -index 194743bd3fc1..b35942faf7df 100644 ---- a/net/ipv4/tcp_minisocks.c -+++ b/net/ipv4/tcp_minisocks.c -@@ -19,11 +19,13 @@ - * Jorge Cwik, - */ - -+#include - #include - #include - #include - #include - #include -+#include - #include - #include - #include -@@ -95,10 +97,14 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct tcp_options_received tmp_opt; - struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - bool paws_reject = false; -+ struct mptcp_options_received mopt; - - tmp_opt.saw_tstamp = 0; -- if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { -- tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); -+ if (th->doff > (sizeof(*th) >> 2) && -+ (tcptw->tw_ts_recent_stamp || tcptw->mptcp_tw)) { -+ mptcp_init_mp_opt(&mopt); -+ -+ tcp_parse_options(twsk_net(tw), skb, &tmp_opt, &mopt, 0, NULL, NULL); - - if (tmp_opt.saw_tstamp) { - if (tmp_opt.rcv_tsecr) -@@ -107,6 +113,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - paws_reject = tcp_paws_reject(&tmp_opt, th->rst); - } -+ -+ if (unlikely(mopt.mp_fclose) && tcptw->mptcp_tw) { -+ if (mopt.mptcp_sender_key == tcptw->mptcp_tw->loc_key) -+ return TCP_TW_RST; -+ } - } - - if (tw->tw_substate == TCP_FIN_WAIT2) { -@@ -130,6 +141,16 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - if (!th->ack || - !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || - TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { -+ /* If mptcp_is_data_fin() returns true, we are sure that -+ * mopt has been initialized - otherwise it would not -+ * be a DATA_FIN. -+ */ -+ if (tcptw->mptcp_tw && tcptw->mptcp_tw->meta_tw && -+ mptcp_is_data_fin(skb) && -+ TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && -+ mopt.data_seq + 1 == (u32)tcptw->mptcp_tw->rcv_nxt) -+ return TCP_TW_ACK; -+ - inet_twsk_put(tw); - return TCP_TW_SUCCESS; - } -@@ -270,11 +291,25 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) - tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; - tcptw->tw_rcv_nxt = tp->rcv_nxt; - tcptw->tw_snd_nxt = tp->snd_nxt; -- tcptw->tw_rcv_wnd = tcp_receive_window(tp); -+ /* no need to keep track of the right-most right edge -+ * when in time wait, can directly use the currently -+ * advertised window. -+ */ -+ tcptw->tw_rcv_wnd = tcp_receive_window_now(tp); - tcptw->tw_ts_recent = tp->rx_opt.ts_recent; - tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; - tcptw->tw_ts_offset = tp->tsoffset; - tcptw->tw_last_oow_ack_time = 0; -+ -+ if (mptcp(tp)) { -+ if (mptcp_init_tw_sock(sk, tcptw)) { -+ inet_twsk_free(tw); -+ goto exit; -+ } -+ } else { -+ tcptw->mptcp_tw = NULL; -+ } -+ - tcptw->tw_tx_delay = tp->tcp_tx_delay; - #if IS_ENABLED(CONFIG_IPV6) - if (tw->tw_family == PF_INET6) { -@@ -336,6 +371,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); - } - -+exit: - tcp_update_metrics(sk); - tcp_done(sk); - } -@@ -343,6 +379,10 @@ EXPORT_SYMBOL(tcp_time_wait); - - void tcp_twsk_destructor(struct sock *sk) - { -+ struct tcp_timewait_sock *twsk = tcp_twsk(sk); -+ -+ if (twsk->mptcp_tw) -+ mptcp_twsk_destructor(twsk); - #ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed)) { - struct tcp_timewait_sock *twsk = tcp_twsk(sk); -@@ -386,8 +426,9 @@ void tcp_openreq_init_rwin(struct request_sock *req, - full_space = rcv_wnd * mss; - - /* tcp_full_space because it is guaranteed to be the first packet */ -- tcp_select_initial_window(sk_listener, full_space, -- mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), -+ tp->ops->select_initial_window(sk_listener, full_space, -+ mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) - -+ (ireq->saw_mpc ? MPTCP_SUB_LEN_DSM_ALIGN : 0), - &req->rsk_rcv_wnd, - &req->rsk_window_clamp, - ireq->wscale_ok, -@@ -487,6 +528,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, - WRITE_ONCE(newtp->snd_nxt, seq); - newtp->snd_up = seq; - -+ newtp->out_of_order_queue = RB_ROOT; -+ newsk->tcp_rtx_queue = RB_ROOT; - INIT_LIST_HEAD(&newtp->tsq_node); - INIT_LIST_HEAD(&newtp->tsorted_sent_queue); - -@@ -511,6 +554,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, - newtp->window_clamp = req->rsk_window_clamp; - newtp->rcv_ssthresh = req->rsk_rcv_wnd; - newtp->rcv_wnd = req->rsk_rcv_wnd; -+ newtp->rcv_right_edge = newtp->rcv_wnd + newtp->rcv_wup; - newtp->rx_opt.wscale_ok = ireq->wscale_ok; - if (newtp->rx_opt.wscale_ok) { - newtp->rx_opt.snd_wscale = ireq->snd_wscale; -@@ -530,6 +574,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, - newtp->rx_opt.ts_recent_stamp = 0; - newtp->tcp_header_len = sizeof(struct tcphdr); - } -+ if (ireq->saw_mpc) -+ newtp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN; - if (req->num_timeout) { - newtp->undo_marker = treq->snt_isn; - newtp->retrans_stamp = div_u64(treq->snt_synack, -@@ -547,6 +593,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, - tcp_ecn_openreq_child(newtp, req); - newtp->fastopen_req = NULL; - RCU_INIT_POINTER(newtp->fastopen_rsk, NULL); -+ newtp->inside_tk_table = 0; - - __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); - -@@ -570,15 +617,20 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, - bool fastopen, bool *req_stolen) - { - struct tcp_options_received tmp_opt; -+ struct mptcp_options_received mopt; - struct sock *child; - const struct tcphdr *th = tcp_hdr(skb); - __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); - bool paws_reject = false; - bool own_req; -+ bool meta_locked = false; - - tmp_opt.saw_tstamp = 0; -+ -+ mptcp_init_mp_opt(&mopt); -+ - if (th->doff > (sizeof(struct tcphdr)>>2)) { -- tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); -+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, &mopt, 0, NULL, NULL); - - if (tmp_opt.saw_tstamp) { - tmp_opt.ts_recent = req->ts_recent; -@@ -619,7 +671,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, - * - * Reset timer after retransmitting SYNACK, similar to - * the idea of fast retransmit in recovery. -+ * -+ * Fall back to TCP if MP_CAPABLE is not set. - */ -+ -+ if (inet_rsk(req)->saw_mpc && !mopt.saw_mpc) -+ inet_rsk(req)->saw_mpc = false; -+ -+ - if (!tcp_oow_rate_limited(sock_net(sk), skb, - LINUX_MIB_TCPACKSKIPPEDSYNRECV, - &tcp_rsk(req)->last_oow_ack_time) && -@@ -767,17 +826,40 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, - * ESTABLISHED STATE. If it will be dropped after - * socket is created, wait for troubles. - */ -+ if (is_meta_sk(sk)) { -+ bh_lock_sock_nested(sk); -+ meta_locked = true; -+ } - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - req, &own_req); - if (!child) - goto listen_overflow; - -+ if (own_req && !is_meta_sk(sk)) { -+ int ret = mptcp_check_req_master(sk, child, req, skb, &mopt, 1, 0); -+ if (ret < 0) -+ goto listen_overflow; -+ -+ /* MPTCP-supported */ -+ if (!ret) -+ return tcp_sk(child)->mpcb->master_sk; -+ } else if (own_req) { -+ return mptcp_check_req_child(sk, child, req, skb, &mopt); -+ } -+ -+ if (meta_locked) -+ bh_unlock_sock(sk); -+ - sock_rps_save_rxhash(child, skb); - tcp_synack_rtt_meas(child, req); - *req_stolen = !own_req; -+ - return inet_csk_complete_hashdance(sk, child, req, own_req); - - listen_overflow: -+ if (meta_locked) -+ bh_unlock_sock(sk); -+ - if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { - inet_rsk(req)->acked = 1; - return NULL; -@@ -823,12 +905,13 @@ int tcp_child_process(struct sock *parent, struct sock *child, - { - int ret = 0; - int state = child->sk_state; -+ struct sock *meta_sk = mptcp(tcp_sk(child)) ? mptcp_meta_sk(child) : child; - - /* record NAPI ID of child */ - sk_mark_napi_id(child, skb); - - tcp_segs_in(tcp_sk(child), skb); -- if (!sock_owned_by_user(child)) { -+ if (!sock_owned_by_user(meta_sk)) { - ret = tcp_rcv_state_process(child, skb); - /* Wakeup parent, send SIGIO */ - if (state == TCP_SYN_RECV && child->sk_state != state) -@@ -838,10 +921,14 @@ int tcp_child_process(struct sock *parent, struct sock *child, - * in main socket hash table and lock on listening - * socket does not protect us more. - */ -- __sk_add_backlog(child, skb); -+ if (mptcp(tcp_sk(child))) -+ mptcp_prepare_for_backlog(child, skb); -+ __sk_add_backlog(meta_sk, skb); - } - - bh_unlock_sock(child); -+ if (mptcp(tcp_sk(child))) -+ bh_unlock_sock(meta_sk); - sock_put(child); - return ret; - } -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 638d7b49ad71..d246e537e686 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -37,6 +37,12 @@ - - #define pr_fmt(fmt) "TCP: " fmt - -+#include -+#include -+#if IS_ENABLED(CONFIG_IPV6) -+#include -+#endif -+#include - #include - - #include -@@ -57,11 +63,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp) - tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC); - } - --static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, -- int push_one, gfp_t gfp); -- - /* Account for new data that has been sent to the network. */ --static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) -+void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) - { - struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); -@@ -255,12 +258,16 @@ EXPORT_SYMBOL(tcp_select_initial_window); - * value can be stuffed directly into th->window for an outgoing - * frame. - */ --static u16 tcp_select_window(struct sock *sk) -+u16 tcp_select_window(struct sock *sk) - { - struct tcp_sock *tp = tcp_sk(sk); - u32 old_win = tp->rcv_wnd; -- u32 cur_win = tcp_receive_window(tp); -- u32 new_win = __tcp_select_window(sk); -+ /* The window must never shrink at the meta-level. At the subflow we -+ * have to allow this. Otherwise we may announce a window too large -+ * for the current meta-level sk_rcvbuf. -+ */ -+ u32 cur_win = tcp_receive_window_now(mptcp(tp) ? tcp_sk(mptcp_meta_sk(sk)) : tp); -+ u32 new_win = tp->ops->__select_window(sk); - - /* Never shrink the offered window */ - if (new_win < cur_win) { -@@ -276,8 +283,10 @@ static u16 tcp_select_window(struct sock *sk) - LINUX_MIB_TCPWANTZEROWINDOWADV); - new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); - } -+ - tp->rcv_wnd = new_win; - tp->rcv_wup = tp->rcv_nxt; -+ tcp_update_rcv_right_edge(tp); - - /* Make sure we do not exceed the maximum possible - * scaled window. -@@ -388,7 +397,7 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, - /* Constructs common control bits of non-data skb. If SYN/FIN is present, - * auto increment end seqno. - */ --static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) -+void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) - { - skb->ip_summed = CHECKSUM_PARTIAL; - -@@ -403,7 +412,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) - TCP_SKB_CB(skb)->end_seq = seq; - } - --static inline bool tcp_urg_mode(const struct tcp_sock *tp) -+bool tcp_urg_mode(const struct tcp_sock *tp) - { - return tp->snd_una != tp->snd_up; - } -@@ -414,6 +423,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) - #define OPTION_WSCALE (1 << 3) - #define OPTION_FAST_OPEN_COOKIE (1 << 8) - #define OPTION_SMC (1 << 9) -+/* Before adding here - take a look at OPTION_MPTCP in include/net/mptcp.h */ - - static void smc_options_write(__be32 *ptr, u16 *options) - { -@@ -430,17 +440,6 @@ static void smc_options_write(__be32 *ptr, u16 *options) - #endif - } - --struct tcp_out_options { -- u16 options; /* bit field of OPTION_* */ -- u16 mss; /* 0 to disable */ -- u8 ws; /* window scale, 0 to disable */ -- u8 num_sack_blocks; /* number of SACK blocks to include */ -- u8 hash_size; /* bytes in hash_location */ -- __u8 *hash_location; /* temporary pointer, overloaded */ -- __u32 tsval, tsecr; /* need to include OPTION_TS */ -- struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ --}; -- - /* Write previously computed TCP options to the packet. - * - * Beware: Something in the Internet is very sensitive to the ordering of -@@ -455,7 +454,7 @@ struct tcp_out_options { - * (but it may well be that other scenarios fail similarly). - */ - static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, -- struct tcp_out_options *opts) -+ struct tcp_out_options *opts, struct sk_buff *skb) - { - u16 options = opts->options; /* mungable copy */ - -@@ -549,6 +548,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, - } - - smc_options_write(ptr, &options); -+ -+ if (unlikely(OPTION_MPTCP & opts->options)) -+ mptcp_options_write(ptr, tp, opts, skb); - } - - static void smc_set_option(const struct tcp_sock *tp, -@@ -635,6 +637,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, - if (unlikely(!(OPTION_TS & opts->options))) - remaining -= TCPOLEN_SACKPERM_ALIGNED; - } -+ if (tp->request_mptcp || mptcp(tp)) -+ mptcp_syn_options(sk, opts, &remaining); - - if (fastopen && fastopen->cookie.len >= 0) { - u32 need = fastopen->cookie.len; -@@ -718,6 +722,9 @@ static unsigned int tcp_synack_options(const struct sock *sk, - - smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); - -+ if (ireq->saw_mpc) -+ mptcp_synack_options(req, opts, &remaining); -+ - return MAX_TCP_OPTION_SPACE - remaining; - } - -@@ -752,14 +759,19 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb - opts->tsecr = tp->rx_opt.ts_recent; - size += TCPOLEN_TSTAMP_ALIGNED; - } -+ if (mptcp(tp)) -+ mptcp_established_options(sk, skb, opts, &size); - - eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; - if (unlikely(eff_sacks)) { -- const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; -- opts->num_sack_blocks = -- min_t(unsigned int, eff_sacks, -- (remaining - TCPOLEN_SACK_BASE_ALIGNED) / -- TCPOLEN_SACK_PERBLOCK); -+ const unsigned remaining = MAX_TCP_OPTION_SPACE - size; -+ if (remaining < TCPOLEN_SACK_BASE_ALIGNED) -+ opts->num_sack_blocks = 0; -+ else -+ opts->num_sack_blocks = -+ min_t(unsigned int, eff_sacks, -+ (remaining - TCPOLEN_SACK_BASE_ALIGNED) / -+ TCPOLEN_SACK_PERBLOCK); - if (likely(opts->num_sack_blocks)) - size += TCPOLEN_SACK_BASE_ALIGNED + - opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; -@@ -802,19 +814,31 @@ static void tcp_tsq_write(struct sock *sk) - tcp_xmit_retransmit_queue(sk); - } - -- tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, -- 0, GFP_ATOMIC); -+ tcp_sk(sk)->ops->write_xmit(sk, tcp_current_mss(sk), -+ tcp_sk(sk)->nonagle, 0, GFP_ATOMIC); - } - } - - static void tcp_tsq_handler(struct sock *sk) - { -- bh_lock_sock(sk); -- if (!sock_owned_by_user(sk)) -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; -+ -+ bh_lock_sock(meta_sk); -+ if (!sock_owned_by_user(meta_sk)) { - tcp_tsq_write(sk); -- else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) -- sock_hold(sk); -- bh_unlock_sock(sk); -+ -+ if (mptcp(tp)) -+ tcp_tsq_write(meta_sk); -+ } else { -+ if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) -+ sock_hold(sk); -+ -+ if ((mptcp(tp)) && (sk->sk_state != TCP_CLOSE)) -+ mptcp_tsq_flags(sk); -+ } -+ -+ bh_unlock_sock(meta_sk); - } - /* - * One tasklet per cpu tries to send more skbs. -@@ -851,7 +875,9 @@ static void tcp_tasklet_func(unsigned long data) - #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ - TCPF_WRITE_TIMER_DEFERRED | \ - TCPF_DELACK_TIMER_DEFERRED | \ -- TCPF_MTU_REDUCED_DEFERRED) -+ TCPF_MTU_REDUCED_DEFERRED | \ -+ TCPF_PATH_MANAGER_DEFERRED |\ -+ TCPF_SUB_DEFERRED) - /** - * tcp_release_cb - tcp release_sock() callback - * @sk: socket -@@ -874,6 +900,9 @@ void tcp_release_cb(struct sock *sk) - if (flags & TCPF_TSQ_DEFERRED) { - tcp_tsq_write(sk); - __sock_put(sk); -+ -+ if (mptcp(tcp_sk(sk))) -+ tcp_tsq_write(mptcp_meta_sk(sk)); - } - /* Here begins the tricky part : - * We are called from release_sock() with : -@@ -898,6 +927,13 @@ void tcp_release_cb(struct sock *sk) - inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); - __sock_put(sk); - } -+ if (flags & TCPF_PATH_MANAGER_DEFERRED) { -+ if (tcp_sk(sk)->mpcb->pm_ops->release_sock) -+ tcp_sk(sk)->mpcb->pm_ops->release_sock(sk); -+ __sock_put(sk); -+ } -+ if (flags & TCPF_SUB_DEFERRED) -+ mptcp_tsq_sub_deferred(sk); - } - EXPORT_SYMBOL(tcp_release_cb); - -@@ -981,8 +1017,8 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) - return HRTIMER_NORESTART; - } - --static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, -- u64 prior_wstamp) -+void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, -+ u64 prior_wstamp) - { - struct tcp_sock *tp = tcp_sk(sk); - -@@ -1128,10 +1164,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, - } - } - -- tcp_options_write((__be32 *)(th + 1), tp, &opts); -+ tcp_options_write((__be32 *)(th + 1), tp, &opts, skb); - skb_shinfo(skb)->gso_type = sk->sk_gso_type; - if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) { -- th->window = htons(tcp_select_window(sk)); -+ th->window = htons(tp->ops->select_window(sk)); - tcp_ecn_send(sk, skb, th, tcp_header_size); - } else { - /* RFC1323: The window in SYN & SYN/ACK segments -@@ -1189,8 +1225,8 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, - return err; - } - --static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, -- gfp_t gfp_mask) -+int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, -+ gfp_t gfp_mask) - { - return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, - tcp_sk(sk)->rcv_nxt); -@@ -1201,7 +1237,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, - * otherwise socket can stall. - */ --static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) -+void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) - { - struct tcp_sock *tp = tcp_sk(sk); - -@@ -1214,7 +1250,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) - } - - /* Initialize TSO segments for a packet. */ --static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) -+void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) - { - if (skb->len <= mss_now) { - /* Avoid the costly divide in the normal -@@ -1231,7 +1267,7 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) - /* Pcount in the middle of the write queue got changed, we need to do various - * tweaks to fix counters - */ --static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) -+void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) - { - struct tcp_sock *tp = tcp_sk(sk); - -@@ -1400,7 +1436,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, - /* This is similar to __pskb_pull_tail(). The difference is that pulled - * data is not copied, but immediately discarded. - */ --static int __pskb_trim_head(struct sk_buff *skb, int len) -+int __pskb_trim_head(struct sk_buff *skb, int len) - { - struct skb_shared_info *shinfo; - int i, k, eat; -@@ -1623,6 +1659,7 @@ unsigned int tcp_current_mss(struct sock *sk) - - return mss_now; - } -+EXPORT_SYMBOL(tcp_current_mss); - - /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. - * As additional protections, we do not touch cwnd in retransmission phases, -@@ -1682,8 +1719,11 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) - * 2) not cwnd limited (this else condition) - * 3) no more data to send (tcp_write_queue_empty()) - * 4) application is hitting buffer limit (SOCK_NOSPACE) -+ * 5) For MPTCP subflows, the scheduler determines -+ * sndbuf limited. - */ - if (tcp_write_queue_empty(sk) && sk->sk_socket && -+ !(mptcp(tcp_sk(sk)) && !is_meta_sk(sk)) && - test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && - (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) - tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); -@@ -1705,8 +1745,8 @@ static bool tcp_minshall_check(const struct tcp_sock *tp) - * But we can avoid doing the divide again given we already have - * skb_pcount = skb->len / mss_now - */ --static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, -- const struct sk_buff *skb) -+void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, -+ const struct sk_buff *skb) - { - if (skb->len < tcp_skb_pcount(skb) * mss_now) - tp->snd_sml = TCP_SKB_CB(skb)->end_seq; -@@ -1752,7 +1792,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - /* Return the number of segments we want in the skb we are transmitting. - * See if congestion control module wants to decide; otherwise, autosize. - */ --static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) -+u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) - { - const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 min_tso, tso_segs; -@@ -1766,11 +1806,11 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) - } - - /* Returns the portion of skb which can be sent right away */ --static unsigned int tcp_mss_split_point(const struct sock *sk, -- const struct sk_buff *skb, -- unsigned int mss_now, -- unsigned int max_segs, -- int nonagle) -+unsigned int tcp_mss_split_point(const struct sock *sk, -+ const struct sk_buff *skb, -+ unsigned int mss_now, -+ unsigned int max_segs, -+ int nonagle) - { - const struct tcp_sock *tp = tcp_sk(sk); - u32 partial, needed, window, max_len; -@@ -1800,13 +1840,14 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, - /* Can at least one segment of SKB be sent right now, according to the - * congestion window rules? If so, return how many segments are allowed. - */ --static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, -- const struct sk_buff *skb) -+unsigned int tcp_cwnd_test(const struct tcp_sock *tp, -+ const struct sk_buff *skb) - { - u32 in_flight, cwnd, halfcwnd; - - /* Don't be strict about the congestion window for the final FIN. */ -- if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && -+ if (skb && -+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && - tcp_skb_pcount(skb) == 1) - return 1; - -@@ -1821,12 +1862,13 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, - halfcwnd = max(cwnd >> 1, 1U); - return min(halfcwnd, cwnd - in_flight); - } -+EXPORT_SYMBOL(tcp_cwnd_test); - - /* Initialize TSO state of a skb. - * This must be invoked the first time we consider transmitting - * SKB onto the wire. - */ --static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) -+int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) - { - int tso_segs = tcp_skb_pcount(skb); - -@@ -1841,8 +1883,8 @@ static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) - /* Return true if the Nagle test allows this packet to be - * sent now. - */ --static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, -- unsigned int cur_mss, int nonagle) -+bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, -+ unsigned int cur_mss, int nonagle) - { - /* Nagle rule does not apply to frames, which sit in the middle of the - * write_queue (they have no chances to get new data). -@@ -1854,7 +1896,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf - return true; - - /* Don't use the nagle rule for urgent data (or for the final FIN). */ -- if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) -+ if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || -+ mptcp_is_data_fin(skb)) - return true; - - if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle)) -@@ -1864,9 +1907,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf - } - - /* Does at least the first segment of SKB fit into the send window? */ --static bool tcp_snd_wnd_test(const struct tcp_sock *tp, -- const struct sk_buff *skb, -- unsigned int cur_mss) -+bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, -+ unsigned int cur_mss) - { - u32 end_seq = TCP_SKB_CB(skb)->end_seq; - -@@ -1875,6 +1917,7 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, - - return !after(end_seq, tcp_wnd_end(tp)); - } -+EXPORT_SYMBOL(tcp_snd_wnd_test); - - /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet - * which is put after SKB on the list. It is very much like -@@ -2033,7 +2076,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, - - /* If this packet won't get more data, do not wait. */ - if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || -- TCP_SKB_CB(skb)->eor) -+ TCP_SKB_CB(skb)->eor || -+ mptcp_is_data_fin(skb)) - goto send_now; - - return true; -@@ -2366,7 +2410,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) - * Returns true, if no segments are in flight and we have queued segments, - * but cannot send anything now because of SWS or another problem. - */ --static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, -+bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, - int push_one, gfp_t gfp) - { - struct tcp_sock *tp = tcp_sk(sk); -@@ -2380,7 +2424,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, - sent_pkts = 0; - - tcp_mstamp_refresh(tp); -- if (!push_one) { -+ -+ /* pmtu not yet supported with MPTCP. Should be possible, by early -+ * exiting the loop inside tcp_mtu_probe, making sure that only one -+ * single DSS-mapping gets probed. -+ */ -+ if (!push_one && !mptcp(tp)) { - /* Do MTU probing. */ - result = tcp_mtu_probe(sk); - if (!result) { -@@ -2576,7 +2625,7 @@ void tcp_send_loss_probe(struct sock *sk) - skb = tcp_send_head(sk); - if (skb && tcp_snd_wnd_test(tp, skb, mss)) { - pcount = tp->packets_out; -- tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); -+ tp->ops->write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); - if (tp->packets_out > pcount) - goto probe_sent; - goto rearm_timer; -@@ -2638,8 +2687,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, - if (unlikely(sk->sk_state == TCP_CLOSE)) - return; - -- if (tcp_write_xmit(sk, cur_mss, nonagle, 0, -- sk_gfp_mask(sk, GFP_ATOMIC))) -+ if (tcp_sk(sk)->ops->write_xmit(sk, cur_mss, nonagle, 0, -+ sk_gfp_mask(sk, GFP_ATOMIC))) - tcp_check_probe_timer(sk); - } - -@@ -2652,7 +2701,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) - - BUG_ON(!skb || skb->len < mss_now); - -- tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); -+ tcp_sk(sk)->ops->write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, -+ sk->sk_allocation); - } - - /* This function returns the amount that we can raise the -@@ -2874,6 +2924,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) - return; - -+ /* Currently not supported for MPTCP - but it should be possible */ -+ if (mptcp(tp)) -+ return; -+ - skb_rbtree_walk_from_safe(skb, tmp) { - if (!tcp_can_collapse(sk, skb)) - break; -@@ -3355,7 +3409,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, - - /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ - th->window = htons(min(req->rsk_rcv_wnd, 65535U)); -- tcp_options_write((__be32 *)(th + 1), NULL, &opts); -+ tcp_options_write((__be32 *)(th + 1), NULL, &opts, skb); - th->doff = (tcp_header_size >> 2); - __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); - -@@ -3437,13 +3491,13 @@ static void tcp_connect_init(struct sock *sk) - if (rcv_wnd == 0) - rcv_wnd = dst_metric(dst, RTAX_INITRWND); - -- tcp_select_initial_window(sk, tcp_full_space(sk), -- tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), -- &tp->rcv_wnd, -- &tp->window_clamp, -- sock_net(sk)->ipv4.sysctl_tcp_window_scaling, -- &rcv_wscale, -- rcv_wnd); -+ tp->ops->select_initial_window(sk, tcp_full_space(sk), -+ tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), -+ &tp->rcv_wnd, -+ &tp->window_clamp, -+ sock_net(sk)->ipv4.sysctl_tcp_window_scaling, -+ &rcv_wscale, -+ rcv_wnd); - - tp->rx_opt.rcv_wscale = rcv_wscale; - tp->rcv_ssthresh = tp->rcv_wnd; -@@ -3463,11 +3517,43 @@ static void tcp_connect_init(struct sock *sk) - else - tp->rcv_tstamp = tcp_jiffies32; - tp->rcv_wup = tp->rcv_nxt; -+ /* force set rcv_right_edge here at start of connection */ -+ tp->rcv_right_edge = tp->rcv_wup + tp->rcv_wnd; - WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); - - inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); - inet_csk(sk)->icsk_retransmits = 0; - tcp_clear_retrans(tp); -+ -+#ifdef CONFIG_MPTCP -+ if (sock_flag(sk, SOCK_MPTCP) && mptcp_doit(sk)) { -+ if (is_master_tp(tp)) { -+ tp->request_mptcp = 1; -+ mptcp_connect_init(sk); -+ } else if (tp->mptcp) { -+ struct inet_sock *inet = inet_sk(sk); -+ -+ tp->mptcp->snt_isn = tp->write_seq; -+ tp->mptcp->init_rcv_wnd = tp->rcv_wnd; -+ -+ /* Set nonce for new subflows */ -+ if (sk->sk_family == AF_INET) -+ tp->mptcp->mptcp_loc_nonce = mptcp_v4_get_nonce( -+ inet->inet_saddr, -+ inet->inet_daddr, -+ inet->inet_sport, -+ inet->inet_dport); -+#if IS_ENABLED(CONFIG_IPV6) -+ else -+ tp->mptcp->mptcp_loc_nonce = mptcp_v6_get_nonce( -+ inet6_sk(sk)->saddr.s6_addr32, -+ sk->sk_v6_daddr.s6_addr32, -+ inet->inet_sport, -+ inet->inet_dport); -+#endif -+ } -+ } -+#endif - } - - static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) -@@ -3731,6 +3817,7 @@ void tcp_send_ack(struct sock *sk) - { - __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); - } -+EXPORT_SYMBOL_GPL(tcp_send_ack); - - /* This routine sends a packet with an out of date sequence - * number. It assumes the other end will try to ack it. -@@ -3743,7 +3830,7 @@ void tcp_send_ack(struct sock *sk) - * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is - * out-of-date with SND.UNA-1 to probe window. - */ --static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) -+int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) - { - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; -@@ -3830,7 +3917,7 @@ void tcp_send_probe0(struct sock *sk) - unsigned long timeout; - int err; - -- err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); -+ err = tp->ops->write_wakeup(sk, LINUX_MIB_TCPWINPROBE); - - if (tp->packets_out || tcp_write_queue_empty(sk)) { - /* Cancel probe timer, if it is not required. */ -diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index fa2ae96ecdc4..36199efe2837 100644 ---- a/net/ipv4/tcp_timer.c -+++ b/net/ipv4/tcp_timer.c -@@ -21,6 +21,7 @@ - - #include - #include -+#include - #include - - static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) -@@ -65,7 +66,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) - * Returns: Nothing (void) - */ - --static void tcp_write_err(struct sock *sk) -+void tcp_write_err(struct sock *sk) - { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; - sk->sk_error_report(sk); -@@ -121,7 +122,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) - (!tp->snd_wnd && !tp->packets_out)) - do_reset = true; - if (do_reset) -- tcp_send_active_reset(sk, GFP_ATOMIC); -+ tp->ops->send_active_reset(sk, GFP_ATOMIC); - tcp_done(sk); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); - return 1; -@@ -206,9 +207,9 @@ static unsigned int tcp_model_timeout(struct sock *sk, - * after "boundary" unsuccessful, exponentially backed-off - * retransmissions with an initial RTO of TCP_RTO_MIN. - */ --static bool retransmits_timed_out(struct sock *sk, -- unsigned int boundary, -- unsigned int timeout) -+bool retransmits_timed_out(struct sock *sk, -+ unsigned int boundary, -+ unsigned int timeout) - { - unsigned int start_ts; - -@@ -228,7 +229,7 @@ static bool retransmits_timed_out(struct sock *sk, - } - - /* A write timeout has occurred. Process the after effects. */ --static int tcp_write_timeout(struct sock *sk) -+int tcp_write_timeout(struct sock *sk) - { - struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); -@@ -243,6 +244,17 @@ static int tcp_write_timeout(struct sock *sk) - sk_rethink_txhash(sk); - } - retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; -+ -+#ifdef CONFIG_MPTCP -+ /* Stop retransmitting MP_CAPABLE options in SYN if timed out. */ -+ if (tcp_sk(sk)->request_mptcp && -+ icsk->icsk_retransmits >= sysctl_mptcp_syn_retries) { -+ tcp_sk(sk)->request_mptcp = 0; -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLERETRANSFALLBACK); -+ } -+#endif /* CONFIG_MPTCP */ -+ - expired = icsk->icsk_retransmits >= retry_until; - } else { - if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { -@@ -338,18 +350,22 @@ static void tcp_delack_timer(struct timer_list *t) - struct inet_connection_sock *icsk = - from_timer(icsk, t, icsk_delack_timer); - struct sock *sk = &icsk->icsk_inet.sk; -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; - -- bh_lock_sock(sk); -- if (!sock_owned_by_user(sk)) { -+ bh_lock_sock(meta_sk); -+ if (!sock_owned_by_user(meta_sk)) { - tcp_delack_timer_handler(sk); - } else { - icsk->icsk_ack.blocked = 1; -- __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); -+ __NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_DELAYEDACKLOCKED); - /* deleguate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) - sock_hold(sk); -+ if (mptcp(tp)) -+ mptcp_tsq_flags(sk); - } -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - sock_put(sk); - } - -@@ -393,7 +409,12 @@ static void tcp_probe_timer(struct sock *sk) - } - - if (icsk->icsk_probes_out >= max_probes) { --abort: tcp_write_err(sk); -+abort: -+ tcp_write_err(sk); -+ if (is_meta_sk(sk) && -+ mptcp_in_infinite_mapping_weak(tp->mpcb)) { -+ mptcp_sub_force_close_all(tp->mpcb, NULL); -+ } - } else { - /* Only send another probe if we didn't close things up. */ - tcp_send_probe0(sk); -@@ -614,7 +635,7 @@ void tcp_write_timer_handler(struct sock *sk) - break; - case ICSK_TIME_RETRANS: - icsk->icsk_pending = 0; -- tcp_retransmit_timer(sk); -+ tcp_sk(sk)->ops->retransmit_timer(sk); - break; - case ICSK_TIME_PROBE0: - icsk->icsk_pending = 0; -@@ -631,16 +652,19 @@ static void tcp_write_timer(struct timer_list *t) - struct inet_connection_sock *icsk = - from_timer(icsk, t, icsk_retransmit_timer); - struct sock *sk = &icsk->icsk_inet.sk; -+ struct sock *meta_sk = mptcp(tcp_sk(sk)) ? mptcp_meta_sk(sk) : sk; - -- bh_lock_sock(sk); -- if (!sock_owned_by_user(sk)) { -+ bh_lock_sock(meta_sk); -+ if (!sock_owned_by_user(meta_sk)) { - tcp_write_timer_handler(sk); - } else { - /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) - sock_hold(sk); -+ if (mptcp(tcp_sk(sk))) -+ mptcp_tsq_flags(sk); - } -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - sock_put(sk); - } - -@@ -670,11 +694,12 @@ static void tcp_keepalive_timer (struct timer_list *t) - struct sock *sk = from_timer(sk, t, sk_timer); - struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); -+ struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; - u32 elapsed; - - /* Only process if socket is not in use. */ -- bh_lock_sock(sk); -- if (sock_owned_by_user(sk)) { -+ bh_lock_sock(meta_sk); -+ if (sock_owned_by_user(meta_sk)) { - /* Try again later. */ - inet_csk_reset_keepalive_timer (sk, HZ/20); - goto out; -@@ -686,16 +711,31 @@ static void tcp_keepalive_timer (struct timer_list *t) - } - - tcp_mstamp_refresh(tp); -+ -+ if (tp->send_mp_fclose) { -+ if (icsk->icsk_retransmits >= MPTCP_FASTCLOSE_RETRIES) { -+ tcp_write_err(sk); -+ goto out; -+ } -+ -+ tcp_send_ack(sk); -+ icsk->icsk_retransmits++; -+ -+ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); -+ elapsed = icsk->icsk_rto; -+ goto resched; -+ } -+ - if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { - if (tp->linger2 >= 0) { - const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; - - if (tmo > 0) { -- tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); -+ tp->ops->time_wait(sk, TCP_FIN_WAIT2, tmo); - goto out; - } - } -- tcp_send_active_reset(sk, GFP_ATOMIC); -+ tp->ops->send_active_reset(sk, GFP_ATOMIC); - goto death; - } - -@@ -720,11 +760,11 @@ static void tcp_keepalive_timer (struct timer_list *t) - icsk->icsk_probes_out > 0) || - (icsk->icsk_user_timeout == 0 && - icsk->icsk_probes_out >= keepalive_probes(tp))) { -- tcp_send_active_reset(sk, GFP_ATOMIC); -+ tp->ops->send_active_reset(sk, GFP_ATOMIC); - tcp_write_err(sk); - goto out; - } -- if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { -+ if (tp->ops->write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { - icsk->icsk_probes_out++; - elapsed = keepalive_intvl_when(tp); - } else { -@@ -748,7 +788,7 @@ static void tcp_keepalive_timer (struct timer_list *t) - tcp_done(sk); - - out: -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - sock_put(sk); - } - -diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c -index 366c3792b860..edf439019e37 100644 ---- a/net/ipv6/addrconf.c -+++ b/net/ipv6/addrconf.c -@@ -967,6 +967,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) - - kfree_rcu(ifp, rcu); - } -+EXPORT_SYMBOL(inet6_ifa_finish_destroy); - - static void - ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) -diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c -index 14ac1d911287..a3c93ec02c96 100644 ---- a/net/ipv6/af_inet6.c -+++ b/net/ipv6/af_inet6.c -@@ -104,8 +104,7 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) - return (struct ipv6_pinfo *)(((u8 *)sk) + offset); - } - --static int inet6_create(struct net *net, struct socket *sock, int protocol, -- int kern) -+int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) - { - struct inet_sock *inet; - struct ipv6_pinfo *np; -diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c -index 5352c7e68c42..534a9d2e4858 100644 ---- a/net/ipv6/ipv6_sockglue.c -+++ b/net/ipv6/ipv6_sockglue.c -@@ -44,6 +44,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -221,7 +223,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, - sock_prot_inuse_add(net, &tcp_prot, 1); - local_bh_enable(); - sk->sk_prot = &tcp_prot; -- icsk->icsk_af_ops = &ipv4_specific; -+#ifdef CONFIG_MPTCP -+ if (sock_flag(sk, SOCK_MPTCP)) -+ icsk->icsk_af_ops = &mptcp_v4_specific; -+ else -+#endif -+ icsk->icsk_af_ops = &ipv4_specific; - sk->sk_socket->ops = &inet_stream_ops; - sk->sk_family = PF_INET; - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); -@@ -345,6 +352,17 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, - if (val == -1) - val = 0; - np->tclass = val; -+ -+ if (is_meta_sk(sk)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (sk_it->sk_family == AF_INET6) -+ inet6_sk(sk_it)->tclass = val; -+ } -+ } - retv = 0; - break; - -diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c -index ec155844012b..225c015b60a8 100644 ---- a/net/ipv6/syncookies.c -+++ b/net/ipv6/syncookies.c -@@ -15,6 +15,8 @@ - #include - #include - #include -+#include -+#include - #include - - #define COOKIEBITS 24 /* Upper bits store count */ -@@ -106,7 +108,8 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, - } - EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); - --__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp) -+__u32 cookie_v6_init_sequence(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, __u16 *mssp) - { - const struct ipv6hdr *iph = ipv6_hdr(skb); - const struct tcphdr *th = tcp_hdr(skb); -@@ -128,6 +131,7 @@ EXPORT_SYMBOL_GPL(__cookie_v6_check); - struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) - { - struct tcp_options_received tcp_opt; -+ struct mptcp_options_received mopt; - struct inet_request_sock *ireq; - struct tcp_request_sock *treq; - struct ipv6_pinfo *np = inet6_sk(sk); -@@ -157,7 +161,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) - - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); -- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); -+ mptcp_init_mp_opt(&mopt); -+ tcp_parse_options(sock_net(sk), skb, &tcp_opt, &mopt, 0, NULL, NULL); - - if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(sock_net(sk), -@@ -170,14 +175,27 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) - goto out; - - ret = NULL; -- req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false); -+#ifdef CONFIG_MPTCP -+ if (mopt.saw_mpc) -+ req = inet_reqsk_alloc(&mptcp6_request_sock_ops, sk, false); -+ else -+#endif -+ req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false); - if (!req) - goto out; - - ireq = inet_rsk(req); -+ ireq->mptcp_rqsk = 0; -+ ireq->saw_mpc = 0; - treq = tcp_rsk(req); - treq->tfo_listener = false; - -+ /* Must be done before anything else, as it initializes -+ * hash_entry of the MPTCP request-sock. -+ */ -+ if (mopt.saw_mpc) -+ mptcp_cookies_reqsk_init(req, &mopt, skb); -+ - if (security_inet_conn_request(sk, skb, req)) - goto out_free; - -@@ -247,15 +265,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) - (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) - req->rsk_window_clamp = full_space; - -- tcp_select_initial_window(sk, full_space, req->mss, -- &req->rsk_rcv_wnd, &req->rsk_window_clamp, -- ireq->wscale_ok, &rcv_wscale, -- dst_metric(dst, RTAX_INITRWND)); -+ tp->ops->select_initial_window(sk, full_space, req->mss, -+ &req->rsk_rcv_wnd, &req->rsk_window_clamp, -+ ireq->wscale_ok, &rcv_wscale, -+ dst_metric(dst, RTAX_INITRWND)); - - ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - -- ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); -+ ret = tcp_get_cookie_sock(sk, skb, req, &mopt, dst, tsoff); - out: - return ret; - out_free: -diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c -index 3903cc0ab188..2f91fddabceb 100644 ---- a/net/ipv6/tcp_ipv6.c -+++ b/net/ipv6/tcp_ipv6.c -@@ -58,6 +58,8 @@ - #include - #include - #include -+#include -+#include - #include - - #include -@@ -67,15 +69,6 @@ - #include - - #include -- --static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); --static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, -- struct request_sock *req); -- --static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -- --static const struct inet_connection_sock_af_ops ipv6_mapped; --static const struct inet_connection_sock_af_ops ipv6_specific; - #ifdef CONFIG_TCP_MD5SIG - static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; - static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; -@@ -99,7 +92,7 @@ static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) - return (struct ipv6_pinfo *)(((u8 *)sk) + offset); - } - --static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) -+void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) - { - struct dst_entry *dst = skb_dst(skb); - -@@ -141,7 +134,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, - return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); - } - --static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, -+int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len) - { - struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; -@@ -157,6 +150,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - int err; - struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; - -+ mptcp_init_connect(sk); -+ - if (addr_len < SIN6_LEN_RFC2133) - return -EINVAL; - -@@ -236,7 +231,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - sin.sin_port = usin->sin6_port; - sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - -- icsk->icsk_af_ops = &ipv6_mapped; -+#ifdef CONFIG_MPTCP -+ if (sock_flag(sk, SOCK_MPTCP)) -+ icsk->icsk_af_ops = &mptcp_v6_mapped; -+ else -+#endif -+ icsk->icsk_af_ops = &ipv6_mapped; - sk->sk_backlog_rcv = tcp_v4_do_rcv; - #ifdef CONFIG_TCP_MD5SIG - tp->af_specific = &tcp_sock_ipv6_mapped_specific; -@@ -246,7 +246,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - - if (err) { - icsk->icsk_ext_hdr_len = exthdrlen; -- icsk->icsk_af_ops = &ipv6_specific; -+#ifdef CONFIG_MPTCP -+ if (sock_flag(sk, SOCK_MPTCP)) -+ icsk->icsk_af_ops = &mptcp_v6_specific; -+ else -+#endif -+ icsk->icsk_af_ops = &ipv6_specific; - sk->sk_backlog_rcv = tcp_v6_do_rcv; - #ifdef CONFIG_TCP_MD5SIG - tp->af_specific = &tcp_sock_ipv6_specific; -@@ -340,7 +345,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - return err; - } - --static void tcp_v6_mtu_reduced(struct sock *sk) -+void tcp_v6_mtu_reduced(struct sock *sk) - { - struct dst_entry *dst; - u32 mtu; -@@ -376,7 +381,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - struct ipv6_pinfo *np; - struct tcp_sock *tp; - __u32 seq, snd_una; -- struct sock *sk; -+ struct sock *sk, *meta_sk; - bool fatal; - int err; - -@@ -402,8 +407,14 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - return 0; - } - -- bh_lock_sock(sk); -- if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) -+ tp = tcp_sk(sk); -+ if (mptcp(tp)) -+ meta_sk = mptcp_meta_sk(sk); -+ else -+ meta_sk = sk; -+ -+ bh_lock_sock(meta_sk); -+ if (sock_owned_by_user(meta_sk) && type != ICMPV6_PKT_TOOBIG) - __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); - - if (sk->sk_state == TCP_CLOSE) -@@ -414,7 +425,6 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - goto out; - } - -- tp = tcp_sk(sk); - /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ - fastopen = rcu_dereference(tp->fastopen_rsk); - snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; -@@ -454,11 +464,15 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - - WRITE_ONCE(tp->mtu_info, mtu); - -- if (!sock_owned_by_user(sk)) -+ if (!sock_owned_by_user(meta_sk)) { - tcp_v6_mtu_reduced(sk); -- else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, -- &sk->sk_tsq_flags)) -- sock_hold(sk); -+ } else { -+ if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, -+ &sk->sk_tsq_flags)) -+ sock_hold(sk); -+ if (mptcp(tp)) -+ mptcp_tsq_flags(sk); -+ } - goto out; - } - -@@ -473,7 +487,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - if (fastopen && !fastopen->sk) - break; - -- if (!sock_owned_by_user(sk)) { -+ if (!sock_owned_by_user(meta_sk)) { - sk->sk_err = err; - sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ - -@@ -483,14 +497,14 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - goto out; - } - -- if (!sock_owned_by_user(sk) && np->recverr) { -+ if (!sock_owned_by_user(meta_sk) && np->recverr) { - sk->sk_err = err; - sk->sk_error_report(sk); - } else - sk->sk_err_soft = err; - - out: -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - sock_put(sk); - return 0; - } -@@ -538,8 +552,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, - return err; - } - -- --static void tcp_v6_reqsk_destructor(struct request_sock *req) -+void tcp_v6_reqsk_destructor(struct request_sock *req) - { - kfree(inet_rsk(req)->ipv6_opt); - kfree_skb(inet_rsk(req)->pktopts); -@@ -757,9 +770,10 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk, - return false; - } - --static void tcp_v6_init_req(struct request_sock *req, -- const struct sock *sk_listener, -- struct sk_buff *skb) -+static int tcp_v6_init_req(struct request_sock *req, -+ const struct sock *sk_listener, -+ struct sk_buff *skb, -+ bool want_cookie) - { - bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); - struct inet_request_sock *ireq = inet_rsk(req); -@@ -781,6 +795,8 @@ static void tcp_v6_init_req(struct request_sock *req, - refcount_inc(&skb->users); - ireq->pktopts = skb; - } -+ -+ return 0; - } - - static struct dst_entry *tcp_v6_route_req(const struct sock *sk, -@@ -800,7 +816,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { - .syn_ack_timeout = tcp_syn_ack_timeout, - }; - --static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { -+const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { - .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - - sizeof(struct ipv6hdr), - #ifdef CONFIG_TCP_MD5SIG -@@ -818,9 +834,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { - }; - - static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, -- u32 ack, u32 win, u32 tsval, u32 tsecr, -+ u32 ack, u32 data_ack, u32 win, u32 tsval, u32 tsecr, - int oif, struct tcp_md5sig_key *key, int rst, -- u8 tclass, __be32 label, u32 priority) -+ u8 tclass, __be32 label, u32 priority, int mptcp) - { - const struct tcphdr *th = tcp_hdr(skb); - struct tcphdr *t1; -@@ -839,7 +855,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 - if (key) - tot_len += TCPOLEN_MD5SIG_ALIGNED; - #endif -- -+#ifdef CONFIG_MPTCP -+ if (mptcp) -+ tot_len += MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK; -+#endif - buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, - GFP_ATOMIC); - if (!buff) -@@ -877,6 +896,17 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 - tcp_v6_md5_hash_hdr((__u8 *)topt, key, - &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, t1); -+ topt += 4; -+ } -+#endif -+#ifdef CONFIG_MPTCP -+ if (mptcp) { -+ /* Construction of 32-bit data_ack */ -+ *topt++ = htonl((TCPOPT_MPTCP << 24) | -+ ((MPTCP_SUB_LEN_DSS + MPTCP_SUB_LEN_ACK) << 16) | -+ (0x20 << 8) | -+ (0x01)); -+ *topt++ = htonl(data_ack); - } - #endif - -@@ -935,7 +965,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 - kfree_skb(buff); - } - --static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) -+void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) - { - const struct tcphdr *th = tcp_hdr(skb); - struct ipv6hdr *ipv6h = ipv6_hdr(skb); -@@ -1020,8 +1050,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) - label = ip6_flowlabel(ipv6h); - } - -- tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, -- label, priority); -+ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, 0, oif, key, 1, 0, -+ label, priority, 0); - - #ifdef CONFIG_TCP_MD5SIG - out: -@@ -1030,30 +1060,37 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) - } - - static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, -- u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, -+ u32 ack, u32 data_ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, u8 tclass, -- __be32 label, u32 priority) -+ __be32 label, u32 priority, int mptcp) - { -- tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, -- tclass, label, priority); -+ tcp_v6_send_response(sk, skb, seq, ack, data_ack, win, tsval, tsecr, oif, -+ key, 0, tclass, label, priority, mptcp); - } - - static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) - { - struct inet_timewait_sock *tw = inet_twsk(sk); - struct tcp_timewait_sock *tcptw = tcp_twsk(sk); -+ u32 data_ack = 0; -+ int mptcp = 0; - -+ if (tcptw->mptcp_tw) { -+ data_ack = (u32)tcptw->mptcp_tw->rcv_nxt; -+ mptcp = 1; -+ } - tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, -+ data_ack, - tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp_raw() + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), -- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); -+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, mptcp); - - inet_twsk_put(tw); - } - --static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, -- struct request_sock *req) -+void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, -+ struct request_sock *req) - { - /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV - * sk->sk_state == TCP_SYN_RECV -> for Fast Open. -@@ -1063,18 +1100,18 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, - * exception of segments, MUST be right-shifted by - * Rcv.Wind.Shift bits: - */ -- tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? -+ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN || is_meta_sk(sk)) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, -- tcp_rsk(req)->rcv_nxt, -+ tcp_rsk(req)->rcv_nxt, 0, - req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, - req->ts_recent, sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), -- 0, 0, sk->sk_priority); -+ 0, 0, sk->sk_priority, 0); - } - - --static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) -+struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) - { - #ifdef CONFIG_SYN_COOKIES - const struct tcphdr *th = tcp_hdr(skb); -@@ -1100,7 +1137,7 @@ u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, - return mss; - } - --static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) -+int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) - { - if (skb->protocol == htons(ETH_P_IP)) - return tcp_v4_conn_request(sk, skb); -@@ -1131,11 +1168,11 @@ static void tcp_v6_restore_cb(struct sk_buff *skb) - sizeof(struct inet6_skb_parm)); - } - --static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, -- struct request_sock *req, -- struct dst_entry *dst, -- struct request_sock *req_unhash, -- bool *own_req) -+struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, -+ struct request_sock *req, -+ struct dst_entry *dst, -+ struct request_sock *req_unhash, -+ bool *own_req) - { - struct inet_request_sock *ireq; - struct ipv6_pinfo *newnp; -@@ -1170,7 +1207,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * - - newnp->saddr = newsk->sk_v6_rcv_saddr; - -- inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; -+#ifdef CONFIG_MPTCP -+ /* We must check on the request-socket because the listener -+ * socket's flag may have been changed halfway through. -+ */ -+ if (!inet_rsk(req)->saw_mpc) -+ inet_csk(newsk)->icsk_af_ops = &mptcp_v6_mapped; -+ else -+#endif -+ inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; - newsk->sk_backlog_rcv = tcp_v4_do_rcv; - #ifdef CONFIG_TCP_MD5SIG - newtp->af_specific = &tcp_sock_ipv6_mapped_specific; -@@ -1217,6 +1262,14 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * - if (!newsk) - goto out_nonewsk; - -+#ifdef CONFIG_MPTCP -+ /* If the meta_sk is v6-mapped we can end up here with the wrong af_ops. -+ * Just make sure that this subflow is v6. -+ */ -+ if (is_meta_sk(sk)) -+ inet_csk(newsk)->icsk_af_ops = &mptcp_v6_specific; -+#endif -+ - /* - * No need to charge this sock to the relevant IPv6 refcnt debug socks - * count here, tcp_create_openreq_child now does this for us, see the -@@ -1344,7 +1397,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * - * This is because we cannot sleep with the original spinlock - * held. - */ --static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) -+int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) - { - struct ipv6_pinfo *np = tcp_inet6_sk(sk); - struct sk_buff *opt_skb = NULL; -@@ -1361,6 +1414,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) - if (skb->protocol == htons(ETH_P_IP)) - return tcp_v4_do_rcv(sk, skb); - -+ if (is_meta_sk(sk)) -+ return mptcp_v6_do_rcv(sk, skb); -+ - /* - * socket locking is here for SMP purposes as backlog rcv - * is currently called with bh processing disabled. -@@ -1488,6 +1544,10 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, - TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + - skb->len - th->doff*4); - TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); -+#ifdef CONFIG_MPTCP -+ TCP_SKB_CB(skb)->mptcp_flags = 0; -+ TCP_SKB_CB(skb)->dss_off = 0; -+#endif - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); - TCP_SKB_CB(skb)->tcp_tw_isn = 0; - TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); -@@ -1502,8 +1562,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - int sdif = inet6_sdif(skb); - const struct tcphdr *th; - const struct ipv6hdr *hdr; -+ struct sock *sk, *meta_sk = NULL; - bool refcounted; -- struct sock *sk; - int ret; - struct net *net = dev_net(skb->dev); - -@@ -1557,12 +1617,17 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - reqsk_put(req); - goto csum_error; - } -- if (unlikely(sk->sk_state != TCP_LISTEN)) { -+ if (unlikely(sk->sk_state != TCP_LISTEN && !is_meta_sk(sk))) { -+ inet_csk_reqsk_queue_drop_and_put(sk, req); -+ goto lookup; -+ } -+ if (unlikely(is_meta_sk(sk) && !mptcp_can_new_subflow(sk))) { - inet_csk_reqsk_queue_drop_and_put(sk, req); - goto lookup; - } - sock_hold(sk); - refcounted = true; -+ - nsk = NULL; - if (!tcp_filter(sk, skb)) { - th = (const struct tcphdr *)skb->data; -@@ -1621,19 +1686,28 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - - sk_incoming_cpu_update(sk); - -- bh_lock_sock_nested(sk); -+ if (mptcp(tcp_sk(sk))) { -+ meta_sk = mptcp_meta_sk(sk); -+ -+ bh_lock_sock_nested(meta_sk); -+ if (sock_owned_by_user(meta_sk)) -+ mptcp_prepare_for_backlog(sk, skb); -+ } else { -+ meta_sk = sk; -+ bh_lock_sock_nested(sk); -+ } - tcp_segs_in(tcp_sk(sk), skb); - ret = 0; -- if (!sock_owned_by_user(sk)) { -+ if (!sock_owned_by_user(meta_sk)) { - skb_to_free = sk->sk_rx_skb_cache; - sk->sk_rx_skb_cache = NULL; - ret = tcp_v6_do_rcv(sk, skb); - } else { -- if (tcp_add_backlog(sk, skb)) -+ if (tcp_add_backlog(meta_sk, skb)) - goto discard_and_relse; - skb_to_free = NULL; - } -- bh_unlock_sock(sk); -+ bh_unlock_sock(meta_sk); - if (skb_to_free) - __kfree_skb(skb_to_free); - put_and_return: -@@ -1647,6 +1721,19 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - - tcp_v6_fill_cb(skb, hdr, th); - -+#ifdef CONFIG_MPTCP -+ if (!sk && th->syn && !th->ack) { -+ int ret = mptcp_lookup_join(skb, NULL); -+ -+ if (ret < 0) { -+ tcp_v6_send_reset(NULL, skb); -+ goto discard_it; -+ } else if (ret > 0) { -+ return 0; -+ } -+ } -+#endif -+ - if (tcp_checksum_complete(skb)) { - csum_error: - __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); -@@ -1699,6 +1786,18 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) - refcounted = false; - goto process; - } -+#ifdef CONFIG_MPTCP -+ if (th->syn && !th->ack) { -+ int ret = mptcp_lookup_join(skb, inet_twsk(sk)); -+ -+ if (ret < 0) { -+ tcp_v6_send_reset(NULL, skb); -+ goto discard_it; -+ } else if (ret > 0) { -+ return 0; -+ } -+ } -+#endif - } - /* to ACK */ - /* fall through */ -@@ -1753,13 +1852,13 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) - } - } - --static struct timewait_sock_ops tcp6_timewait_sock_ops = { -+struct timewait_sock_ops tcp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor = tcp_twsk_destructor, - }; - --static const struct inet_connection_sock_af_ops ipv6_specific = { -+const struct inet_connection_sock_af_ops ipv6_specific = { - .queue_xmit = inet6_csk_xmit, - .send_check = tcp_v6_send_check, - .rebuild_header = inet6_sk_rebuild_header, -@@ -1790,7 +1889,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { - /* - * TCP over IPv4 via INET6 API - */ --static const struct inet_connection_sock_af_ops ipv6_mapped = { -+const struct inet_connection_sock_af_ops ipv6_mapped = { - .queue_xmit = ip_queue_xmit, - .send_check = tcp_v4_send_check, - .rebuild_header = inet_sk_rebuild_header, -@@ -1826,7 +1925,12 @@ static int tcp_v6_init_sock(struct sock *sk) - - tcp_init_sock(sk); - -- icsk->icsk_af_ops = &ipv6_specific; -+#ifdef CONFIG_MPTCP -+ if (sock_flag(sk, SOCK_MPTCP)) -+ icsk->icsk_af_ops = &mptcp_v6_specific; -+ else -+#endif -+ icsk->icsk_af_ops = &ipv6_specific; - - #ifdef CONFIG_TCP_MD5SIG - tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; -@@ -1835,7 +1939,7 @@ static int tcp_v6_init_sock(struct sock *sk) - return 0; - } - --static void tcp_v6_destroy_sock(struct sock *sk) -+void tcp_v6_destroy_sock(struct sock *sk) - { - tcp_v4_destroy_sock(sk); - inet6_destroy_sock(sk); -@@ -2058,6 +2162,11 @@ struct proto tcpv6_prot = { - .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), - .max_header = MAX_TCP_HEADER, - .obj_size = sizeof(struct tcp6_sock), -+#ifdef CONFIG_MPTCP -+ .useroffset = offsetof(struct tcp_sock, mptcp_sched_name), -+ .usersize = sizeof_field(struct tcp_sock, mptcp_sched_name) + -+ sizeof_field(struct tcp_sock, mptcp_pm_name), -+#endif - .slab_flags = SLAB_TYPESAFE_BY_RCU, - .twsk_prot = &tcp6_timewait_sock_ops, - .rsk_prot = &tcp6_request_sock_ops, -@@ -2068,6 +2177,9 @@ struct proto tcpv6_prot = { - .compat_getsockopt = compat_tcp_getsockopt, - #endif - .diag_destroy = tcp_abort, -+#ifdef CONFIG_MPTCP -+ .clear_sk = mptcp_clear_sk, -+#endif - }; - - /* thinking of making this const? Don't. -diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig -new file mode 100644 -index 000000000000..6e05dab4c632 ---- /dev/null -+++ b/net/mptcp/Kconfig -@@ -0,0 +1,154 @@ -+# -+# MPTCP configuration -+# -+config MPTCP -+ bool "MPTCP protocol" -+ depends on (IPV6=y || IPV6=n) -+ select CRYPTO_LIB_SHA256 -+ select CRYPTO -+ ---help--- -+ This replaces the normal TCP stack with a Multipath TCP stack, -+ able to use several paths at once. -+ -+menuconfig MPTCP_PM_ADVANCED -+ bool "MPTCP: advanced path-manager control" -+ depends on MPTCP=y -+ ---help--- -+ Support for selection of different path-managers. You should choose 'Y' here, -+ because otherwise you will not actively create new MPTCP-subflows. -+ -+if MPTCP_PM_ADVANCED -+ -+config MPTCP_FULLMESH -+ tristate "MPTCP Full-Mesh Path-Manager" -+ depends on MPTCP=y -+ ---help--- -+ This path-management module will create a full-mesh among all IP-addresses. -+ -+config MPTCP_NDIFFPORTS -+ tristate "MPTCP ndiff-ports" -+ depends on MPTCP=y -+ ---help--- -+ This path-management module will create multiple subflows between the same -+ pair of IP-addresses, modifying the source-port. You can set the number -+ of subflows via the mptcp_ndiffports-sysctl. -+ -+config MPTCP_BINDER -+ tristate "MPTCP Binder" -+ depends on (MPTCP=y) -+ ---help--- -+ This path-management module works like ndiffports, and adds the sysctl -+ option to set the gateway (and/or path to) per each additional subflow -+ via Loose Source Routing (IPv4 only). -+ -+config MPTCP_NETLINK -+ tristate "MPTCP Netlink Path-Manager" -+ depends on MPTCP=y -+ ---help--- -+ This path-management module is controlled over a Netlink interface. A userspace -+ module can therefore control the establishment of new subflows and the policy -+ to apply over those new subflows for every connection. -+ -+choice -+ prompt "Default MPTCP Path-Manager" -+ default DEFAULT_DUMMY -+ help -+ Select the Path-Manager of your choice -+ -+ config DEFAULT_FULLMESH -+ bool "Full mesh" if MPTCP_FULLMESH=y -+ -+ config DEFAULT_NDIFFPORTS -+ bool "ndiff-ports" if MPTCP_NDIFFPORTS=y -+ -+ config DEFAULT_BINDER -+ bool "binder" if MPTCP_BINDER=y -+ -+ config DEFAULT_NETLINK -+ bool "Netlink" if MPTCP_NETLINK=y -+ -+ config DEFAULT_DUMMY -+ bool "Default" -+ -+endchoice -+ -+endif -+ -+config DEFAULT_MPTCP_PM -+ string -+ default "default" if DEFAULT_DUMMY -+ default "fullmesh" if DEFAULT_FULLMESH -+ default "ndiffports" if DEFAULT_NDIFFPORTS -+ default "binder" if DEFAULT_BINDER -+ default "default" -+ -+menuconfig MPTCP_SCHED_ADVANCED -+ bool "MPTCP: advanced scheduler control" -+ depends on MPTCP=y -+ ---help--- -+ Support for selection of different schedulers. You should choose 'Y' here, -+ if you want to choose a different scheduler than the default one. -+ -+if MPTCP_SCHED_ADVANCED -+ -+config MPTCP_BLEST -+ tristate "MPTCP BLEST" -+ depends on MPTCP=y -+ ---help--- -+ This is an experimental BLocking ESTimation-based (BLEST) scheduler. -+ -+config MPTCP_ROUNDROBIN -+ tristate "MPTCP Round-Robin" -+ depends on (MPTCP=y) -+ ---help--- -+ This is a very simple round-robin scheduler. Probably has bad performance -+ but might be interesting for researchers. -+ -+config MPTCP_REDUNDANT -+ tristate "MPTCP Redundant" -+ depends on (MPTCP=y) -+ ---help--- -+ This scheduler sends all packets redundantly over all subflows to decreases -+ latency and jitter on the cost of lower throughput. -+ -+config MPTCP_ECF -+ tristate "MPTCP ECF" -+ depends on (MPTCP=y) -+ ---help--- -+ This is an experimental Earliest Completion First (ECF) scheduler. -+ -+choice -+ prompt "Default MPTCP Scheduler" -+ default DEFAULT_SCHEDULER -+ help -+ Select the Scheduler of your choice -+ -+ config DEFAULT_SCHEDULER -+ bool "Default" -+ ---help--- -+ This is the default scheduler, sending first on the subflow -+ with the lowest RTT. -+ -+ config DEFAULT_ROUNDROBIN -+ bool "Round-Robin" if MPTCP_ROUNDROBIN=y -+ ---help--- -+ This is the round-rob scheduler, sending in a round-robin -+ fashion.. -+ -+ config DEFAULT_REDUNDANT -+ bool "Redundant" if MPTCP_REDUNDANT=y -+ ---help--- -+ This is the redundant scheduler, sending packets redundantly over -+ all the subflows. -+ -+endchoice -+endif -+ -+config DEFAULT_MPTCP_SCHED -+ string -+ depends on (MPTCP=y) -+ default "default" if DEFAULT_SCHEDULER -+ default "roundrobin" if DEFAULT_ROUNDROBIN -+ default "redundant" if DEFAULT_REDUNDANT -+ default "default" -+ -diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile -new file mode 100644 -index 000000000000..369248a2f68e ---- /dev/null -+++ b/net/mptcp/Makefile -@@ -0,0 +1,25 @@ -+# -+## Makefile for MultiPath TCP support code. -+# -+# -+ -+obj-$(CONFIG_MPTCP) += mptcp.o -+ -+mptcp-y := mptcp_ctrl.o mptcp_ipv4.o mptcp_pm.o \ -+ mptcp_output.o mptcp_input.o mptcp_sched.o -+ -+obj-$(CONFIG_TCP_CONG_LIA) += mptcp_coupled.o -+obj-$(CONFIG_TCP_CONG_OLIA) += mptcp_olia.o -+obj-$(CONFIG_TCP_CONG_WVEGAS) += mptcp_wvegas.o -+obj-$(CONFIG_TCP_CONG_BALIA) += mptcp_balia.o -+obj-$(CONFIG_TCP_CONG_MCTCPDESYNC) += mctcp_desync.o -+obj-$(CONFIG_MPTCP_FULLMESH) += mptcp_fullmesh.o -+obj-$(CONFIG_MPTCP_NDIFFPORTS) += mptcp_ndiffports.o -+obj-$(CONFIG_MPTCP_BINDER) += mptcp_binder.o -+obj-$(CONFIG_MPTCP_NETLINK) += mptcp_netlink.o -+obj-$(CONFIG_MPTCP_ROUNDROBIN) += mptcp_rr.o -+obj-$(CONFIG_MPTCP_REDUNDANT) += mptcp_redundant.o -+obj-$(CONFIG_MPTCP_BLEST) += mptcp_blest.o -+obj-$(CONFIG_MPTCP_ECF) += mptcp_ecf.o -+ -+mptcp-$(subst m,y,$(CONFIG_IPV6)) += mptcp_ipv6.o -diff --git a/net/mptcp/mctcp_desync.c b/net/mptcp/mctcp_desync.c -new file mode 100644 -index 000000000000..f6bf9251d59b ---- /dev/null -+++ b/net/mptcp/mctcp_desync.c -@@ -0,0 +1,193 @@ -+/* -+ * Desynchronized Multi-Channel TCP Congestion Control Algorithm -+ * -+ * Implementation based on publications of "DMCTCP:Desynchronized Multi-Channel -+ * TCP for high speed access networks with tiny buffers" in 23rd international -+ * conference of Computer Communication and Networks (ICCCN), 2014, and -+ * "Exploring parallelism and desynchronization of TCP over high speed networks -+ * with tiny buffers" in Journal of Computer Communications Elsevier, 2015. -+ * -+ * http://ieeexplore.ieee.org/abstract/document/6911722/ -+ * https://doi.org/10.1016/j.comcom.2015.07.010 -+ * -+ * This prototype is for research purpose and is currently experimental code -+ * that only support a single path. Future support of multi-channel over -+ * multi-path requires channels grouping. -+ * -+ * Initial Design and Implementation: -+ * Cheng Cui -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the Free -+ * Software Foundation; either version 2 of the License, or (at your option) -+ * any later version. -+ */ -+#include -+#include -+#include -+ -+enum { -+ MASTER_CHANNEL = 1, -+ INI_MIN_CWND = 2, -+}; -+ -+/* private congestion control structure: -+ * off_tstamp: the last backoff timestamp for loss synchronization event -+ * off_subfid: the subflow which was backoff on off_tstamp -+ */ -+struct mctcp_desync { -+ u64 off_tstamp; -+ u8 off_subfid; -+}; -+ -+static inline int mctcp_cc_sk_can_send(const struct sock *sk) -+{ -+ return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us; -+} -+ -+static void mctcp_desync_init(struct sock *sk) -+{ -+ if (mptcp(tcp_sk(sk))) { -+ struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk)); -+ ca->off_tstamp = 0; -+ ca->off_subfid = 0; -+ } -+ /* If we do not mptcp, behave like reno: return */ -+} -+ -+static void mctcp_desync_cong_avoid(struct sock *sk, u32 ack, u32 acked) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (!mptcp(tp)) { -+ tcp_reno_cong_avoid(sk, ack, acked); -+ return; -+ } else if (!tcp_is_cwnd_limited(sk)) { -+ return; -+ } else { -+ const struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk)); -+ const u8 subfid = tp->mptcp->path_index; -+ -+ /* current aggregated cwnd */ -+ u32 agg_cwnd = 0; -+ u32 min_cwnd = 0xffffffff; -+ u8 min_cwnd_subfid = 0; -+ -+ /* In "safe" area, increase */ -+ if (tcp_in_slow_start(tp)) { -+ if (ca->off_subfid) { -+ /* passed initial phase, allow slow start */ -+ tcp_slow_start(tp, acked); -+ } else if (MASTER_CHANNEL == tp->mptcp->path_index) { -+ /* master channel is normal slow start in -+ * initial phase */ -+ tcp_slow_start(tp, acked); -+ } else { -+ /* secondary channels increase slowly until -+ * the initial phase passed -+ */ -+ tp->snd_ssthresh = tp->snd_cwnd = INI_MIN_CWND; -+ } -+ return; -+ } else { -+ /* In dangerous area, increase slowly and linearly. */ -+ const struct mptcp_tcp_sock *mptcp; -+ -+ /* get total cwnd and the subflow that has min cwnd */ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ const struct sock *sub_sk = mptcp_to_sock(mptcp); -+ -+ if (mctcp_cc_sk_can_send(sub_sk)) { -+ const struct tcp_sock *sub_tp = -+ tcp_sk(sub_sk); -+ agg_cwnd += sub_tp->snd_cwnd; -+ if(min_cwnd > sub_tp->snd_cwnd) { -+ min_cwnd = sub_tp->snd_cwnd; -+ min_cwnd_subfid = -+ sub_tp->mptcp->path_index; -+ } -+ } -+ } -+ /* the smallest subflow grows faster than others */ -+ if (subfid == min_cwnd_subfid) { -+ tcp_cong_avoid_ai(tp, min_cwnd, acked); -+ } else { -+ tcp_cong_avoid_ai(tp, agg_cwnd - min_cwnd, -+ acked); -+ } -+ } -+ } -+} -+ -+static u32 mctcp_desync_ssthresh(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (!mptcp(tp)) { -+ return max(tp->snd_cwnd >> 1U, 2U); -+ } else { -+ struct mctcp_desync *ca = inet_csk_ca(mptcp_meta_sk(sk)); -+ const u8 subfid = tp->mptcp->path_index; -+ const struct mptcp_tcp_sock *mptcp; -+ u32 max_cwnd = 0; -+ u8 max_cwnd_subfid = 0; -+ -+ /* Find the subflow that has the max cwnd. */ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ const struct sock *sub_sk = mptcp_to_sock(mptcp); -+ -+ if (mctcp_cc_sk_can_send(sub_sk)) { -+ const struct tcp_sock *sub_tp = tcp_sk(sub_sk); -+ if (max_cwnd < sub_tp->snd_cwnd) { -+ max_cwnd = sub_tp->snd_cwnd; -+ max_cwnd_subfid = -+ sub_tp->mptcp->path_index; -+ } -+ } -+ } -+ /* Use high resolution clock. */ -+ if (subfid == max_cwnd_subfid) { -+ u64 now = tcp_clock_us(); -+ u32 delta = tcp_stamp_us_delta(now, ca->off_tstamp); -+ -+ if (delta < (tp->srtt_us >> 3)) { -+ /* desynchronize */ -+ return tp->snd_cwnd; -+ } else { -+ ca->off_tstamp = now; -+ ca->off_subfid = subfid; -+ return max(max_cwnd >> 1U, 2U); -+ } -+ } else { -+ return tp->snd_cwnd; -+ } -+ } -+} -+ -+static struct tcp_congestion_ops mctcp_desync = { -+ .init = mctcp_desync_init, -+ .ssthresh = mctcp_desync_ssthresh, -+ .undo_cwnd = tcp_reno_undo_cwnd, -+ .cong_avoid = mctcp_desync_cong_avoid, -+ .owner = THIS_MODULE, -+ .name = "mctcpdesync", -+}; -+ -+static int __init mctcp_desync_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct mctcp_desync) > ICSK_CA_PRIV_SIZE); -+ return tcp_register_congestion_control(&mctcp_desync); -+} -+ -+static void __exit mctcp_desync_unregister(void) -+{ -+ tcp_unregister_congestion_control(&mctcp_desync); -+} -+ -+module_init(mctcp_desync_register); -+module_exit(mctcp_desync_unregister); -+ -+MODULE_AUTHOR("Cheng Cui"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("MCTCP: DESYNCHRONIZED MULTICHANNEL TCP CONGESTION CONTROL"); -+MODULE_VERSION("1.0"); -diff --git a/net/mptcp/mptcp_balia.c b/net/mptcp/mptcp_balia.c -new file mode 100644 -index 000000000000..179b53dea020 ---- /dev/null -+++ b/net/mptcp/mptcp_balia.c -@@ -0,0 +1,261 @@ -+/* -+ * MPTCP implementation - Balia Congestion Control -+ * (Balanced Linked Adaptation Algorithm) -+ * -+ * Analysis, Design and Implementation: -+ * Qiuyu Peng -+ * Anwar Walid -+ * Jaehyun Hwang -+ * Steven H. Low -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+ -+#include -+ -+/* The variable 'rate' (i.e., x_r) will be scaled -+ * e.g., from B/s to KB/s, MB/s, or GB/s -+ * if max_rate > 2^rate_scale_limit -+ */ -+ -+static int rate_scale_limit = 25; -+static int alpha_scale = 10; -+static int scale_num = 5; -+ -+struct mptcp_balia { -+ u64 ai; -+ u64 md; -+ bool forced_update; -+}; -+ -+static inline int mptcp_balia_sk_can_send(const struct sock *sk) -+{ -+ return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us; -+} -+ -+static inline u64 mptcp_get_ai(const struct sock *meta_sk) -+{ -+ return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->ai; -+} -+ -+static inline void mptcp_set_ai(const struct sock *meta_sk, u64 ai) -+{ -+ ((struct mptcp_balia *)inet_csk_ca(meta_sk))->ai = ai; -+} -+ -+static inline u64 mptcp_get_md(const struct sock *meta_sk) -+{ -+ return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->md; -+} -+ -+static inline void mptcp_set_md(const struct sock *meta_sk, u64 md) -+{ -+ ((struct mptcp_balia *)inet_csk_ca(meta_sk))->md = md; -+} -+ -+static inline u64 mptcp_balia_scale(u64 val, int scale) -+{ -+ return (u64) val << scale; -+} -+ -+static inline bool mptcp_get_forced(const struct sock *meta_sk) -+{ -+ return ((struct mptcp_balia *)inet_csk_ca(meta_sk))->forced_update; -+} -+ -+static inline void mptcp_set_forced(const struct sock *meta_sk, bool force) -+{ -+ ((struct mptcp_balia *)inet_csk_ca(meta_sk))->forced_update = force; -+} -+ -+static void mptcp_balia_recalc_ai(const struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ const struct mptcp_cb *mpcb = tp->mpcb; -+ struct mptcp_tcp_sock *mptcp; -+ u64 max_rate = 0, rate = 0, sum_rate = 0; -+ u64 alpha, ai = tp->snd_cwnd, md = (tp->snd_cwnd >> 1); -+ int num_scale_down = 0; -+ -+ if (!mpcb) -+ return; -+ -+ /* Find max_rate first */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ const struct sock *sub_sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *sub_tp = tcp_sk(sub_sk); -+ u64 tmp; -+ -+ if (!mptcp_balia_sk_can_send(sub_sk)) -+ continue; -+ -+ tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd -+ * (USEC_PER_SEC << 3), sub_tp->srtt_us); -+ sum_rate += tmp; -+ -+ if (tp == sub_tp) -+ rate = tmp; -+ -+ if (tmp >= max_rate) -+ max_rate = tmp; -+ } -+ -+ /* At least, the current subflow should be able to send */ -+ if (unlikely(!rate)) -+ goto exit; -+ -+ alpha = div64_u64(max_rate, rate); -+ -+ /* Scale down max_rate if it is too high (e.g., >2^25) */ -+ while (max_rate > mptcp_balia_scale(1, rate_scale_limit)) { -+ max_rate >>= scale_num; -+ num_scale_down++; -+ } -+ -+ if (num_scale_down) { -+ sum_rate = 0; -+ mptcp_for_each_sub(mpcb, mptcp) { -+ const struct sock *sub_sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *sub_tp = tcp_sk(sub_sk); -+ u64 tmp; -+ -+ if (!mptcp_balia_sk_can_send(sub_sk)) -+ continue; -+ -+ tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd -+ * (USEC_PER_SEC << 3), sub_tp->srtt_us); -+ tmp >>= (scale_num * num_scale_down); -+ -+ sum_rate += tmp; -+ } -+ rate >>= (scale_num * num_scale_down); -+ } -+ -+ /* (sum_rate)^2 * 10 * w_r -+ * ai = ------------------------------------ -+ * (x_r + max_rate) * (4x_r + max_rate) -+ */ -+ sum_rate *= sum_rate; -+ -+ ai = div64_u64(sum_rate * 10, rate + max_rate); -+ ai = div64_u64(ai * tp->snd_cwnd, (rate << 2) + max_rate); -+ -+ if (unlikely(!ai)) -+ ai = tp->snd_cwnd; -+ -+ md = ((tp->snd_cwnd >> 1) * min(mptcp_balia_scale(alpha, alpha_scale), -+ mptcp_balia_scale(3, alpha_scale) >> 1)) -+ >> alpha_scale; -+ -+exit: -+ mptcp_set_ai(sk, ai); -+ mptcp_set_md(sk, md); -+} -+ -+static void mptcp_balia_init(struct sock *sk) -+{ -+ if (mptcp(tcp_sk(sk))) { -+ mptcp_set_forced(sk, 0); -+ mptcp_set_ai(sk, 0); -+ mptcp_set_md(sk, 0); -+ } -+} -+ -+static void mptcp_balia_cwnd_event(struct sock *sk, enum tcp_ca_event event) -+{ -+ if (event == CA_EVENT_COMPLETE_CWR || event == CA_EVENT_LOSS) -+ mptcp_balia_recalc_ai(sk); -+} -+ -+static void mptcp_balia_set_state(struct sock *sk, u8 ca_state) -+{ -+ if (!mptcp(tcp_sk(sk))) -+ return; -+ -+ mptcp_set_forced(sk, 1); -+} -+ -+static void mptcp_balia_cong_avoid(struct sock *sk, u32 ack, u32 acked) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ int snd_cwnd; -+ -+ if (!mptcp(tp)) { -+ tcp_reno_cong_avoid(sk, ack, acked); -+ return; -+ } -+ -+ if (!tcp_is_cwnd_limited(sk)) -+ return; -+ -+ if (tcp_in_slow_start(tp)) { -+ /* In "safe" area, increase. */ -+ tcp_slow_start(tp, acked); -+ mptcp_balia_recalc_ai(sk); -+ return; -+ } -+ -+ if (mptcp_get_forced(mptcp_meta_sk(sk))) { -+ mptcp_balia_recalc_ai(sk); -+ mptcp_set_forced(sk, 0); -+ } -+ -+ snd_cwnd = (int)mptcp_get_ai(sk); -+ -+ if (tp->snd_cwnd_cnt >= snd_cwnd) { -+ if (tp->snd_cwnd < tp->snd_cwnd_clamp) { -+ tp->snd_cwnd++; -+ mptcp_balia_recalc_ai(sk); -+ } -+ -+ tp->snd_cwnd_cnt = 0; -+ } else { -+ tp->snd_cwnd_cnt++; -+ } -+} -+ -+static u32 mptcp_balia_ssthresh(struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (unlikely(!mptcp(tp))) -+ return tcp_reno_ssthresh(sk); -+ else -+ return max((u32)(tp->snd_cwnd - mptcp_get_md(sk)), 1U); -+} -+ -+static struct tcp_congestion_ops mptcp_balia = { -+ .init = mptcp_balia_init, -+ .ssthresh = mptcp_balia_ssthresh, -+ .cong_avoid = mptcp_balia_cong_avoid, -+ .undo_cwnd = tcp_reno_undo_cwnd, -+ .cwnd_event = mptcp_balia_cwnd_event, -+ .set_state = mptcp_balia_set_state, -+ .owner = THIS_MODULE, -+ .name = "balia", -+}; -+ -+static int __init mptcp_balia_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct mptcp_balia) > ICSK_CA_PRIV_SIZE); -+ return tcp_register_congestion_control(&mptcp_balia); -+} -+ -+static void __exit mptcp_balia_unregister(void) -+{ -+ tcp_unregister_congestion_control(&mptcp_balia); -+} -+ -+module_init(mptcp_balia_register); -+module_exit(mptcp_balia_unregister); -+ -+MODULE_AUTHOR("Jaehyun Hwang, Anwar Walid, Qiuyu Peng, Steven H. Low"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("MPTCP BALIA CONGESTION CONTROL ALGORITHM"); -+MODULE_VERSION("0.1"); -diff --git a/net/mptcp/mptcp_binder.c b/net/mptcp/mptcp_binder.c -new file mode 100644 -index 000000000000..7f34a8d00274 ---- /dev/null -+++ b/net/mptcp/mptcp_binder.c -@@ -0,0 +1,494 @@ -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define MPTCP_GW_MAX_LISTS 10 -+#define MPTCP_GW_LIST_MAX_LEN 6 -+#define MPTCP_GW_SYSCTL_MAX_LEN (15 * MPTCP_GW_LIST_MAX_LEN * \ -+ MPTCP_GW_MAX_LISTS) -+ -+struct mptcp_gw_list { -+ struct in_addr list[MPTCP_GW_MAX_LISTS][MPTCP_GW_LIST_MAX_LEN]; -+ u8 len[MPTCP_GW_MAX_LISTS]; -+}; -+ -+struct binder_priv { -+ /* Worker struct for subflow establishment */ -+ struct work_struct subflow_work; -+ -+ struct mptcp_cb *mpcb; -+ -+ /* Prevent multiple sub-sockets concurrently iterating over sockets */ -+ spinlock_t *flow_lock; -+}; -+ -+static struct mptcp_gw_list *mptcp_gws; -+static rwlock_t mptcp_gws_lock; -+ -+static int mptcp_binder_ndiffports __read_mostly = 1; -+ -+static char sysctl_mptcp_binder_gateways[MPTCP_GW_SYSCTL_MAX_LEN] __read_mostly; -+ -+static int mptcp_get_avail_list_ipv4(struct sock *sk) -+{ -+ int i, j, list_taken, opt_ret, opt_len; -+ unsigned char *opt_ptr, *opt_end_ptr, opt[MAX_IPOPTLEN]; -+ -+ for (i = 0; i < MPTCP_GW_MAX_LISTS; ++i) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ if (mptcp_gws->len[i] == 0) -+ goto error; -+ -+ mptcp_debug("mptcp_get_avail_list_ipv4: List %i\n", i); -+ list_taken = 0; -+ -+ /* Loop through all sub-sockets in this connection */ -+ mptcp_for_each_sub(tcp_sk(sk)->mpcb, mptcp) { -+ sk = mptcp_to_sock(mptcp); -+ -+ mptcp_debug("mptcp_get_avail_list_ipv4: Next sock\n"); -+ -+ /* Reset length and options buffer, then retrieve -+ * from socket -+ */ -+ opt_len = MAX_IPOPTLEN; -+ memset(opt, 0, MAX_IPOPTLEN); -+ opt_ret = ip_getsockopt(sk, IPPROTO_IP, -+ IP_OPTIONS, (char __user *)opt, (int __user *)&opt_len); -+ if (opt_ret < 0) { -+ mptcp_debug("%s: MPTCP subsocket getsockopt() IP_OPTIONS failed, error %d\n", -+ __func__, opt_ret); -+ goto error; -+ } -+ -+ /* If socket has no options, it has no stake in this list */ -+ if (opt_len <= 0) -+ continue; -+ -+ /* Iterate options buffer */ -+ for (opt_ptr = &opt[0]; opt_ptr < &opt[opt_len]; opt_ptr++) { -+ if (*opt_ptr == IPOPT_LSRR) { -+ mptcp_debug("mptcp_get_avail_list_ipv4: LSRR options found\n"); -+ goto sock_lsrr; -+ } -+ } -+ continue; -+ -+sock_lsrr: -+ /* Pointer to the 2nd to last address */ -+ opt_end_ptr = opt_ptr+(*(opt_ptr+1))-4; -+ -+ /* Addresses start 3 bytes after type offset */ -+ opt_ptr += 3; -+ j = 0; -+ -+ /* Different length lists cannot be the same */ -+ if ((opt_end_ptr-opt_ptr)/4 != mptcp_gws->len[i]) -+ continue; -+ -+ /* Iterate if we are still inside options list -+ * and sysctl list -+ */ -+ while (opt_ptr < opt_end_ptr && j < mptcp_gws->len[i]) { -+ /* If there is a different address, this list must -+ * not be set on this socket -+ */ -+ if (memcmp(&mptcp_gws->list[i][j], opt_ptr, 4)) -+ break; -+ -+ /* Jump 4 bytes to next address */ -+ opt_ptr += 4; -+ j++; -+ } -+ -+ /* Reached the end without a differing address, lists -+ * are therefore identical. -+ */ -+ if (j == mptcp_gws->len[i]) { -+ mptcp_debug("mptcp_get_avail_list_ipv4: List already used\n"); -+ list_taken = 1; -+ break; -+ } -+ } -+ -+ /* Free list found if not taken by a socket */ -+ if (!list_taken) { -+ mptcp_debug("mptcp_get_avail_list_ipv4: List free\n"); -+ break; -+ } -+ } -+ -+ if (i >= MPTCP_GW_MAX_LISTS) -+ goto error; -+ -+ return i; -+error: -+ return -1; -+} -+ -+/* The list of addresses is parsed each time a new connection is opened, -+ * to make sure it's up to date. In case of error, all the lists are -+ * marked as unavailable and the subflow's fingerprint is set to 0. -+ */ -+static void mptcp_v4_add_lsrr(struct sock *sk, struct in_addr addr) -+{ -+ int i, j, ret; -+ unsigned char opt[MAX_IPOPTLEN] = {0}; -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct binder_priv *fmp = (struct binder_priv *)&tp->mpcb->mptcp_pm[0]; -+ -+ /* Read lock: multiple sockets can read LSRR addresses at the same -+ * time, but writes are done in mutual exclusion. -+ * Spin lock: must search for free list for one socket at a time, or -+ * multiple sockets could take the same list. -+ */ -+ read_lock(&mptcp_gws_lock); -+ spin_lock(fmp->flow_lock); -+ -+ i = mptcp_get_avail_list_ipv4(sk); -+ -+ /* Execution enters here only if a free path is found. -+ */ -+ if (i >= 0) { -+ opt[0] = IPOPT_NOP; -+ opt[1] = IPOPT_LSRR; -+ opt[2] = sizeof(mptcp_gws->list[i][0].s_addr) * -+ (mptcp_gws->len[i] + 1) + 3; -+ opt[3] = IPOPT_MINOFF; -+ for (j = 0; j < mptcp_gws->len[i]; ++j) -+ memcpy(opt + 4 + -+ (j * sizeof(mptcp_gws->list[i][0].s_addr)), -+ &mptcp_gws->list[i][j].s_addr, -+ sizeof(mptcp_gws->list[i][0].s_addr)); -+ /* Final destination must be part of IP_OPTIONS parameter. */ -+ memcpy(opt + 4 + (j * sizeof(addr.s_addr)), &addr.s_addr, -+ sizeof(addr.s_addr)); -+ -+ /* setsockopt must be inside the lock, otherwise another -+ * subflow could fail to see that we have taken a list. -+ */ -+ ret = ip_setsockopt(sk, IPPROTO_IP, IP_OPTIONS, (char __user *)opt, -+ 4 + sizeof(mptcp_gws->list[i][0].s_addr) * (mptcp_gws->len[i] + 1)); -+ -+ if (ret < 0) { -+ mptcp_debug("%s: MPTCP subsock setsockopt() IP_OPTIONS failed, error %d\n", -+ __func__, ret); -+ } -+ } -+ -+ spin_unlock(fmp->flow_lock); -+ read_unlock(&mptcp_gws_lock); -+ -+ return; -+} -+ -+/* Parses gateways string for a list of paths to different -+ * gateways, and stores them for use with the Loose Source Routing (LSRR) -+ * socket option. Each list must have "," separated addresses, and the lists -+ * themselves must be separated by "-". Returns -1 in case one or more of the -+ * addresses is not a valid ipv4/6 address. -+ */ -+static int mptcp_parse_gateway_ipv4(char *gateways) -+{ -+ int i, j, k, ret; -+ char *tmp_string = NULL; -+ struct in_addr tmp_addr; -+ -+ tmp_string = kzalloc(16, GFP_KERNEL); -+ if (tmp_string == NULL) -+ return -ENOMEM; -+ -+ write_lock(&mptcp_gws_lock); -+ -+ memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list)); -+ -+ /* A TMP string is used since inet_pton needs a null terminated string -+ * but we do not want to modify the sysctl for obvious reasons. -+ * i will iterate over the SYSCTL string, j will iterate over the -+ * temporary string where each IP is copied into, k will iterate over -+ * the IPs in each list. -+ */ -+ for (i = j = k = 0; -+ i < MPTCP_GW_SYSCTL_MAX_LEN && k < MPTCP_GW_MAX_LISTS; -+ ++i) { -+ if (gateways[i] == '-' || gateways[i] == ',' || gateways[i] == '\0') { -+ /* If the temp IP is empty and the current list is -+ * empty, we are done. -+ */ -+ if (j == 0 && mptcp_gws->len[k] == 0) -+ break; -+ -+ /* Terminate the temp IP string, then if it is -+ * non-empty parse the IP and copy it. -+ */ -+ tmp_string[j] = '\0'; -+ if (j > 0) { -+ mptcp_debug("mptcp_parse_gateway_list tmp: %s i: %d\n", tmp_string, i); -+ -+ ret = in4_pton(tmp_string, strlen(tmp_string), -+ (u8 *)&tmp_addr.s_addr, '\0', -+ NULL); -+ -+ if (ret) { -+ mptcp_debug("mptcp_parse_gateway_list ret: %d s_addr: %pI4\n", -+ ret, -+ &tmp_addr.s_addr); -+ memcpy(&mptcp_gws->list[k][mptcp_gws->len[k]].s_addr, -+ &tmp_addr.s_addr, -+ sizeof(tmp_addr.s_addr)); -+ mptcp_gws->len[k]++; -+ j = 0; -+ tmp_string[j] = '\0'; -+ /* Since we can't impose a limit to -+ * what the user can input, make sure -+ * there are not too many IPs in the -+ * SYSCTL string. -+ */ -+ if (mptcp_gws->len[k] > MPTCP_GW_LIST_MAX_LEN) { -+ mptcp_debug("mptcp_parse_gateway_list too many members in list %i: max %i\n", -+ k, -+ MPTCP_GW_LIST_MAX_LEN); -+ goto error; -+ } -+ } else { -+ goto error; -+ } -+ } -+ -+ if (gateways[i] == '-' || gateways[i] == '\0') -+ ++k; -+ } else { -+ tmp_string[j] = gateways[i]; -+ ++j; -+ } -+ } -+ -+ /* Number of flows is number of gateway lists plus master flow */ -+ mptcp_binder_ndiffports = k+1; -+ -+ write_unlock(&mptcp_gws_lock); -+ kfree(tmp_string); -+ -+ return 0; -+ -+error: -+ memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list)); -+ memset(gateways, 0, sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN); -+ write_unlock(&mptcp_gws_lock); -+ kfree(tmp_string); -+ return -1; -+} -+ -+/** -+ * Create all new subflows, by doing calls to mptcp_initX_subsockets -+ * -+ * This function uses a goto next_subflow, to allow releasing the lock between -+ * new subflows and giving other processes a chance to do some work on the -+ * socket and potentially finishing the communication. -+ **/ -+static void create_subflow_worker(struct work_struct *work) -+{ -+ const struct binder_priv *pm_priv = container_of(work, -+ struct binder_priv, -+ subflow_work); -+ struct mptcp_cb *mpcb = pm_priv->mpcb; -+ struct sock *meta_sk = mpcb->meta_sk; -+ int iter = 0; -+ -+next_subflow: -+ if (iter) { -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ -+ cond_resched(); -+ } -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ if (!mptcp(tcp_sk(meta_sk))) -+ goto exit; -+ -+ iter++; -+ -+ if (sock_flag(meta_sk, SOCK_DEAD)) -+ goto exit; -+ -+ if (mpcb->master_sk && -+ !tcp_sk(mpcb->master_sk)->mptcp->fully_established) -+ goto exit; -+ -+ if (mptcp_binder_ndiffports > iter && -+ mptcp_binder_ndiffports > mptcp_subflow_count(mpcb)) { -+ struct mptcp_loc4 loc; -+ struct mptcp_rem4 rem; -+ -+ loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr; -+ loc.loc4_id = 0; -+ loc.low_prio = 0; -+ -+ rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr; -+ rem.port = inet_sk(meta_sk)->inet_dport; -+ rem.rem4_id = 0; /* Default 0 */ -+ -+ mptcp_init4_subsockets(meta_sk, &loc, &rem); -+ -+ goto next_subflow; -+ } -+ -+exit: -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ mptcp_mpcb_put(mpcb); -+ sock_put(meta_sk); -+} -+ -+static void binder_new_session(const struct sock *meta_sk) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct binder_priv *fmp = (struct binder_priv *)&mpcb->mptcp_pm[0]; -+ static DEFINE_SPINLOCK(flow_lock); -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ if (meta_sk->sk_family == AF_INET6 && -+ !mptcp_v6_is_v4_mapped(meta_sk)) { -+ mptcp_fallback_default(mpcb); -+ return; -+ } -+#endif -+ -+ /* Initialize workqueue-struct */ -+ INIT_WORK(&fmp->subflow_work, create_subflow_worker); -+ fmp->mpcb = mpcb; -+ -+ fmp->flow_lock = &flow_lock; -+} -+ -+static void binder_create_subflows(struct sock *meta_sk) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct binder_priv *pm_priv = (struct binder_priv *)&mpcb->mptcp_pm[0]; -+ -+ if (mptcp_in_infinite_mapping_weak(mpcb) || -+ mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD)) -+ return; -+ -+ if (!work_pending(&pm_priv->subflow_work)) { -+ sock_hold(meta_sk); -+ refcount_inc(&mpcb->mpcb_refcnt); -+ queue_work(mptcp_wq, &pm_priv->subflow_work); -+ } -+} -+ -+static int binder_get_local_id(const struct sock *meta_sk, sa_family_t family, -+ union inet_addr *addr, bool *low_prio) -+{ -+ return 0; -+} -+ -+/* Callback functions, executed when syctl mptcp.mptcp_gateways is updated. -+ * Inspired from proc_tcp_congestion_control(). -+ */ -+static int proc_mptcp_gateways(struct ctl_table *ctl, int write, -+ void __user *buffer, size_t *lenp, -+ loff_t *ppos) -+{ -+ int ret; -+ struct ctl_table tbl = { -+ .maxlen = MPTCP_GW_SYSCTL_MAX_LEN, -+ }; -+ -+ if (write) { -+ tbl.data = kzalloc(MPTCP_GW_SYSCTL_MAX_LEN, GFP_KERNEL); -+ if (tbl.data == NULL) -+ return -ENOMEM; -+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos); -+ if (ret == 0) { -+ ret = mptcp_parse_gateway_ipv4(tbl.data); -+ memcpy(ctl->data, tbl.data, MPTCP_GW_SYSCTL_MAX_LEN); -+ } -+ kfree(tbl.data); -+ } else { -+ ret = proc_dostring(ctl, write, buffer, lenp, ppos); -+ } -+ -+ -+ return ret; -+} -+ -+static struct mptcp_pm_ops binder __read_mostly = { -+ .new_session = binder_new_session, -+ .fully_established = binder_create_subflows, -+ .get_local_id = binder_get_local_id, -+ .init_subsocket_v4 = mptcp_v4_add_lsrr, -+ .name = "binder", -+ .owner = THIS_MODULE, -+}; -+ -+static struct ctl_table binder_table[] = { -+ { -+ .procname = "mptcp_binder_gateways", -+ .data = &sysctl_mptcp_binder_gateways, -+ .maxlen = sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN, -+ .mode = 0644, -+ .proc_handler = &proc_mptcp_gateways -+ }, -+ { } -+}; -+ -+static struct ctl_table_header *mptcp_sysctl_binder; -+ -+/* General initialization of MPTCP_PM */ -+static int __init binder_register(void) -+{ -+ mptcp_gws = kzalloc(sizeof(*mptcp_gws), GFP_KERNEL); -+ if (!mptcp_gws) -+ return -ENOMEM; -+ -+ rwlock_init(&mptcp_gws_lock); -+ -+ BUILD_BUG_ON(sizeof(struct binder_priv) > MPTCP_PM_SIZE); -+ -+ mptcp_sysctl_binder = register_net_sysctl(&init_net, "net/mptcp", -+ binder_table); -+ if (!mptcp_sysctl_binder) -+ goto sysctl_fail; -+ -+ if (mptcp_register_path_manager(&binder)) -+ goto pm_failed; -+ -+ return 0; -+ -+pm_failed: -+ unregister_net_sysctl_table(mptcp_sysctl_binder); -+sysctl_fail: -+ kfree(mptcp_gws); -+ -+ return -1; -+} -+ -+static void binder_unregister(void) -+{ -+ mptcp_unregister_path_manager(&binder); -+ unregister_net_sysctl_table(mptcp_sysctl_binder); -+ kfree(mptcp_gws); -+} -+ -+module_init(binder_register); -+module_exit(binder_unregister); -+ -+MODULE_AUTHOR("Luca Boccassi, Duncan Eastoe, Christoph Paasch (ndiffports)"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("BINDER MPTCP"); -+MODULE_VERSION("0.1"); -diff --git a/net/mptcp/mptcp_blest.c b/net/mptcp/mptcp_blest.c -new file mode 100644 -index 000000000000..22e25dd0d44e ---- /dev/null -+++ b/net/mptcp/mptcp_blest.c -@@ -0,0 +1,285 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* MPTCP Scheduler to reduce HoL-blocking and spurious retransmissions. -+ * -+ * Algorithm Design: -+ * Simone Ferlin -+ * Ozgu Alay -+ * Olivier Mehani -+ * Roksana Boreli -+ * -+ * Initial Implementation: -+ * Simone Ferlin -+ * -+ * Additional Authors: -+ * Daniel Weber -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+ -+static unsigned char lambda __read_mostly = 12; -+module_param(lambda, byte, 0644); -+MODULE_PARM_DESC(lambda, "Divided by 10 for scaling factor of fast flow rate estimation"); -+ -+static unsigned char max_lambda __read_mostly = 13; -+module_param(max_lambda, byte, 0644); -+MODULE_PARM_DESC(max_lambda, "Divided by 10 for maximum scaling factor of fast flow rate estimation"); -+ -+static unsigned char min_lambda __read_mostly = 10; -+module_param(min_lambda, byte, 0644); -+MODULE_PARM_DESC(min_lambda, "Divided by 10 for minimum scaling factor of fast flow rate estimation"); -+ -+static unsigned char dyn_lambda_good = 10; /* 1% */ -+module_param(dyn_lambda_good, byte, 0644); -+MODULE_PARM_DESC(dyn_lambda_good, "Decrease of lambda in positive case."); -+ -+static unsigned char dyn_lambda_bad = 40; /* 4% */ -+module_param(dyn_lambda_bad, byte, 0644); -+MODULE_PARM_DESC(dyn_lambda_bad, "Increase of lambda in negative case."); -+ -+struct blestsched_priv { -+ u32 last_rbuf_opti; -+ u32 min_srtt_us; -+ u32 max_srtt_us; -+}; -+ -+struct blestsched_cb { -+ s16 lambda_1000; /* values range from min_lambda * 100 to max_lambda * 100 */ -+ u32 last_lambda_update; -+}; -+ -+static struct blestsched_priv *blestsched_get_priv(const struct tcp_sock *tp) -+{ -+ return (struct blestsched_priv *)&tp->mptcp->mptcp_sched[0]; -+} -+ -+static struct blestsched_cb *blestsched_get_cb(const struct tcp_sock *tp) -+{ -+ return (struct blestsched_cb *)&tp->mpcb->mptcp_sched[0]; -+} -+ -+static void blestsched_update_lambda(struct sock *meta_sk, struct sock *sk) -+{ -+ struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(meta_sk)); -+ struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk)); -+ -+ if (tcp_jiffies32 - blest_cb->last_lambda_update < usecs_to_jiffies(blest_p->min_srtt_us >> 3)) -+ return; -+ -+ /* if there have been retransmissions of packets of the slow flow -+ * during the slow flows last RTT => increase lambda -+ * otherwise decrease -+ */ -+ if (tcp_sk(meta_sk)->retrans_stamp) { -+ /* need to slow down on the slow flow */ -+ blest_cb->lambda_1000 += dyn_lambda_bad; -+ } else { -+ /* use the slow flow more */ -+ blest_cb->lambda_1000 -= dyn_lambda_good; -+ } -+ -+ /* cap lambda_1000 to its value range */ -+ blest_cb->lambda_1000 = min_t(s16, blest_cb->lambda_1000, max_lambda * 100); -+ blest_cb->lambda_1000 = max_t(s16, blest_cb->lambda_1000, min_lambda * 100); -+ -+ blest_cb->last_lambda_update = tcp_jiffies32; -+} -+ -+/* how many bytes will sk send during the rtt of another, slower flow? */ -+static u32 blestsched_estimate_bytes(struct sock *sk, u32 time_8) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct blestsched_priv *blest_p = blestsched_get_priv(tp); -+ struct blestsched_cb *blest_cb = blestsched_get_cb(mptcp_meta_tp(tp)); -+ u32 avg_rtt, num_rtts, ca_cwnd, packets; -+ -+ avg_rtt = (blest_p->min_srtt_us + blest_p->max_srtt_us) / 2; -+ if (avg_rtt == 0) -+ num_rtts = 1; /* sanity */ -+ else -+ num_rtts = (time_8 / avg_rtt) + 1; /* round up */ -+ -+ /* during num_rtts, how many bytes will be sent on the flow? -+ * assumes for simplification that Reno is applied as congestion-control -+ */ -+ if (tp->snd_ssthresh == TCP_INFINITE_SSTHRESH) { -+ /* we are in initial slow start */ -+ if (num_rtts > 16) -+ num_rtts = 16; /* cap for sanity */ -+ packets = tp->snd_cwnd * ((1 << num_rtts) - 1); /* cwnd + 2*cwnd + 4*cwnd */ -+ } else { -+ ca_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh + 1); /* assume we jump to CA already */ -+ packets = (ca_cwnd + (num_rtts - 1) / 2) * num_rtts; -+ } -+ -+ return div_u64(((u64)packets) * tp->mss_cache * blest_cb->lambda_1000, 1000); -+} -+ -+static u32 blestsched_estimate_linger_time(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct blestsched_priv *blest_p = blestsched_get_priv(tp); -+ u32 estimate, slope, inflight, cwnd; -+ -+ inflight = tcp_packets_in_flight(tp) + 1; /* take into account the new one */ -+ cwnd = tp->snd_cwnd; -+ -+ if (inflight >= cwnd) { -+ estimate = blest_p->max_srtt_us; -+ } else { -+ slope = blest_p->max_srtt_us - blest_p->min_srtt_us; -+ if (cwnd == 0) -+ cwnd = 1; /* sanity */ -+ estimate = blest_p->min_srtt_us + (slope * inflight) / cwnd; -+ } -+ -+ return (tp->srtt_us > estimate) ? tp->srtt_us : estimate; -+} -+ -+/* This is the BLEST scheduler. This function decides on which flow to send -+ * a given MSS. If all subflows are found to be busy or the currently best -+ * subflow is estimated to possibly cause HoL-blocking, NULL is returned. -+ */ -+struct sock *blest_get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sock *bestsk, *minsk = NULL; -+ struct tcp_sock *meta_tp, *besttp; -+ struct mptcp_tcp_sock *mptcp; -+ struct blestsched_priv *blest_p; -+ u32 min_srtt = U32_MAX; -+ -+ /* Answer data_fin on same subflow!!! */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && -+ skb && mptcp_is_data_fin(skb)) { -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index && -+ mptcp_is_available(bestsk, skb, zero_wnd_test)) -+ return bestsk; -+ } -+ } -+ -+ /* First, find the overall best subflow */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ besttp = tcp_sk(bestsk); -+ blest_p = blestsched_get_priv(besttp); -+ -+ /* Set of states for which we are allowed to send data */ -+ if (!mptcp_sk_can_send(bestsk)) -+ continue; -+ -+ /* We do not send data on this subflow unless it is -+ * fully established, i.e. the 4th ack has been received. -+ */ -+ if (besttp->mptcp->pre_established) -+ continue; -+ -+ blest_p->min_srtt_us = min(blest_p->min_srtt_us, besttp->srtt_us); -+ blest_p->max_srtt_us = max(blest_p->max_srtt_us, besttp->srtt_us); -+ -+ /* record minimal rtt */ -+ if (besttp->srtt_us < min_srtt) { -+ min_srtt = besttp->srtt_us; -+ minsk = bestsk; -+ } -+ } -+ -+ /* find the current best subflow according to the default scheduler */ -+ bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test); -+ -+ /* if we decided to use a slower flow, we have the option of not using it at all */ -+ if (bestsk && minsk && bestsk != minsk) { -+ u32 slow_linger_time, fast_bytes, slow_inflight_bytes, slow_bytes, avail_space; -+ u32 buffered_bytes = 0; -+ -+ meta_tp = tcp_sk(meta_sk); -+ besttp = tcp_sk(bestsk); -+ -+ blestsched_update_lambda(meta_sk, bestsk); -+ -+ /* if we send this SKB now, it will be acked in besttp->srtt seconds -+ * during this time: how many bytes will we send on the fast flow? -+ */ -+ slow_linger_time = blestsched_estimate_linger_time(bestsk); -+ fast_bytes = blestsched_estimate_bytes(minsk, slow_linger_time); -+ -+ if (skb) -+ buffered_bytes = skb->len; -+ -+ /* is the required space available in the mptcp meta send window? -+ * we assume that all bytes inflight on the slow path will be acked in besttp->srtt seconds -+ * (just like the SKB if it was sent now) -> that means that those inflight bytes will -+ * keep occupying space in the meta window until then -+ */ -+ slow_inflight_bytes = besttp->write_seq - besttp->snd_una; -+ slow_bytes = buffered_bytes + slow_inflight_bytes; // bytes of this SKB plus those in flight already -+ -+ avail_space = (slow_bytes < meta_tp->snd_wnd) ? (meta_tp->snd_wnd - slow_bytes) : 0; -+ -+ if (fast_bytes > avail_space) { -+ /* sending this SKB on the slow flow means -+ * we wouldn't be able to send all the data we'd like to send on the fast flow -+ * so don't do that -+ */ -+ return NULL; -+ } -+ } -+ -+ return bestsk; -+} -+ -+static void blestsched_init(struct sock *sk) -+{ -+ struct blestsched_priv *blest_p = blestsched_get_priv(tcp_sk(sk)); -+ struct blestsched_cb *blest_cb = blestsched_get_cb(tcp_sk(mptcp_meta_sk(sk))); -+ -+ blest_p->last_rbuf_opti = tcp_jiffies32; -+ blest_p->min_srtt_us = U32_MAX; -+ blest_p->max_srtt_us = 0; -+ -+ if (!blest_cb->lambda_1000) { -+ blest_cb->lambda_1000 = lambda * 100; -+ blest_cb->last_lambda_update = tcp_jiffies32; -+ } -+} -+ -+static struct mptcp_sched_ops mptcp_sched_blest = { -+ .get_subflow = blest_get_available_subflow, -+ .next_segment = mptcp_next_segment, -+ .init = blestsched_init, -+ .name = "blest", -+ .owner = THIS_MODULE, -+}; -+ -+static int __init blest_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct blestsched_priv) > MPTCP_SCHED_SIZE); -+ BUILD_BUG_ON(sizeof(struct blestsched_cb) > MPTCP_SCHED_DATA_SIZE); -+ -+ if (mptcp_register_scheduler(&mptcp_sched_blest)) -+ return -1; -+ -+ return 0; -+} -+ -+static void blest_unregister(void) -+{ -+ mptcp_unregister_scheduler(&mptcp_sched_blest); -+} -+ -+module_init(blest_register); -+module_exit(blest_unregister); -+ -+MODULE_AUTHOR("Simone Ferlin, Daniel Weber"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("BLEST scheduler for MPTCP, based on default minimum RTT scheduler"); -+MODULE_VERSION("0.95"); -diff --git a/net/mptcp/mptcp_coupled.c b/net/mptcp/mptcp_coupled.c -new file mode 100644 -index 000000000000..9eb7628053f6 ---- /dev/null -+++ b/net/mptcp/mptcp_coupled.c -@@ -0,0 +1,262 @@ -+/* -+ * MPTCP implementation - Linked Increase congestion control Algorithm (LIA) -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+#include -+#include -+ -+#include -+ -+/* Scaling is done in the numerator with alpha_scale_num and in the denominator -+ * with alpha_scale_den. -+ * -+ * To downscale, we just need to use alpha_scale. -+ * -+ * We have: alpha_scale = alpha_scale_num / (alpha_scale_den ^ 2) -+ */ -+static int alpha_scale_den = 10; -+static int alpha_scale_num = 32; -+static int alpha_scale = 12; -+ -+struct mptcp_ccc { -+ u64 alpha; -+ bool forced_update; -+}; -+ -+static inline int mptcp_ccc_sk_can_send(const struct sock *sk) -+{ -+ return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us; -+} -+ -+static inline u64 mptcp_get_alpha(const struct sock *meta_sk) -+{ -+ return ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->alpha; -+} -+ -+static inline void mptcp_set_alpha(const struct sock *meta_sk, u64 alpha) -+{ -+ ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->alpha = alpha; -+} -+ -+static inline u64 mptcp_ccc_scale(u32 val, int scale) -+{ -+ return (u64) val << scale; -+} -+ -+static inline bool mptcp_get_forced(const struct sock *meta_sk) -+{ -+ return ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->forced_update; -+} -+ -+static inline void mptcp_set_forced(const struct sock *meta_sk, bool force) -+{ -+ ((struct mptcp_ccc *)inet_csk_ca(meta_sk))->forced_update = force; -+} -+ -+static void mptcp_ccc_recalc_alpha(const struct sock *sk) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ const struct mptcp_tcp_sock *mptcp; -+ int best_cwnd = 0, best_rtt = 0, can_send = 0; -+ u64 max_numerator = 0, sum_denominator = 0, alpha = 1; -+ -+ if (!mpcb) -+ return; -+ -+ /* Do regular alpha-calculation for multiple subflows */ -+ -+ /* Find the max numerator of the alpha-calculation */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ const struct sock *sub_sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *sub_tp = tcp_sk(sub_sk); -+ u64 tmp; -+ -+ if (!mptcp_ccc_sk_can_send(sub_sk)) -+ continue; -+ -+ can_send++; -+ -+ /* We need to look for the path, that provides the max-value. -+ * Integer-overflow is not possible here, because -+ * tmp will be in u64. -+ */ -+ tmp = div64_u64(mptcp_ccc_scale(sub_tp->snd_cwnd, -+ alpha_scale_num), (u64)sub_tp->srtt_us * sub_tp->srtt_us); -+ -+ if (tmp >= max_numerator) { -+ max_numerator = tmp; -+ best_cwnd = sub_tp->snd_cwnd; -+ best_rtt = sub_tp->srtt_us; -+ } -+ } -+ -+ /* No subflow is able to send - we don't care anymore */ -+ if (unlikely(!can_send)) -+ goto exit; -+ -+ /* Calculate the denominator */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ const struct sock *sub_sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *sub_tp = tcp_sk(sub_sk); -+ -+ if (!mptcp_ccc_sk_can_send(sub_sk)) -+ continue; -+ -+ sum_denominator += div_u64( -+ mptcp_ccc_scale(sub_tp->snd_cwnd, -+ alpha_scale_den) * best_rtt, -+ sub_tp->srtt_us); -+ } -+ sum_denominator *= sum_denominator; -+ if (unlikely(!sum_denominator)) { -+ pr_err("%s: sum_denominator == 0\n", __func__); -+ mptcp_for_each_sub(mpcb, mptcp) { -+ const struct sock *sub_sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *sub_tp = tcp_sk(sub_sk); -+ pr_err("%s: pi:%d, state:%d\n, rtt:%u, cwnd: %u", -+ __func__, sub_tp->mptcp->path_index, -+ sub_sk->sk_state, sub_tp->srtt_us, -+ sub_tp->snd_cwnd); -+ } -+ } -+ -+ alpha = div64_u64(mptcp_ccc_scale(best_cwnd, alpha_scale_num), sum_denominator); -+ -+ if (unlikely(!alpha)) -+ alpha = 1; -+ -+exit: -+ mptcp_set_alpha(mptcp_meta_sk(sk), alpha); -+} -+ -+static void mptcp_ccc_init(struct sock *sk) -+{ -+ if (mptcp(tcp_sk(sk))) { -+ mptcp_set_forced(mptcp_meta_sk(sk), 0); -+ mptcp_set_alpha(mptcp_meta_sk(sk), 1); -+ } -+ /* If we do not mptcp, behave like reno: return */ -+} -+ -+static void mptcp_ccc_cwnd_event(struct sock *sk, enum tcp_ca_event event) -+{ -+ if (event == CA_EVENT_LOSS) -+ mptcp_ccc_recalc_alpha(sk); -+} -+ -+static void mptcp_ccc_set_state(struct sock *sk, u8 ca_state) -+{ -+ if (!mptcp(tcp_sk(sk))) -+ return; -+ -+ mptcp_set_forced(mptcp_meta_sk(sk), 1); -+} -+ -+static void mptcp_ccc_cong_avoid(struct sock *sk, u32 ack, u32 acked) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ int snd_cwnd; -+ u64 alpha; -+ -+ if (!mptcp(tp)) { -+ tcp_reno_cong_avoid(sk, ack, acked); -+ return; -+ } -+ -+ if (!tcp_is_cwnd_limited(sk)) -+ return; -+ -+ if (tcp_in_slow_start(tp)) { -+ /* In "safe" area, increase. */ -+ tcp_slow_start(tp, acked); -+ mptcp_ccc_recalc_alpha(sk); -+ return; -+ } -+ -+ if (mptcp_get_forced(mptcp_meta_sk(sk))) { -+ mptcp_ccc_recalc_alpha(sk); -+ mptcp_set_forced(mptcp_meta_sk(sk), 0); -+ } -+ -+ alpha = mptcp_get_alpha(mptcp_meta_sk(sk)); -+ -+ /* This may happen, if at the initialization, the mpcb -+ * was not yet attached to the sock, and thus -+ * initializing alpha failed. -+ */ -+ if (unlikely(!alpha)) -+ alpha = 1; -+ -+ snd_cwnd = (int)div_u64((u64)mptcp_ccc_scale(1, alpha_scale), alpha); -+ -+ /* snd_cwnd_cnt >= max (scale * tot_cwnd / alpha, cwnd) -+ * Thus, we select here the max value. -+ */ -+ if (snd_cwnd < tp->snd_cwnd) -+ snd_cwnd = tp->snd_cwnd; -+ -+ if (tp->snd_cwnd_cnt >= snd_cwnd) { -+ if (tp->snd_cwnd < tp->snd_cwnd_clamp) { -+ tp->snd_cwnd++; -+ mptcp_ccc_recalc_alpha(sk); -+ } -+ -+ tp->snd_cwnd_cnt = 0; -+ } else { -+ tp->snd_cwnd_cnt++; -+ } -+} -+ -+static struct tcp_congestion_ops mptcp_ccc = { -+ .init = mptcp_ccc_init, -+ .ssthresh = tcp_reno_ssthresh, -+ .cong_avoid = mptcp_ccc_cong_avoid, -+ .undo_cwnd = tcp_reno_undo_cwnd, -+ .cwnd_event = mptcp_ccc_cwnd_event, -+ .set_state = mptcp_ccc_set_state, -+ .owner = THIS_MODULE, -+ .name = "lia", -+}; -+ -+static int __init mptcp_ccc_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct mptcp_ccc) > ICSK_CA_PRIV_SIZE); -+ return tcp_register_congestion_control(&mptcp_ccc); -+} -+ -+static void __exit mptcp_ccc_unregister(void) -+{ -+ tcp_unregister_congestion_control(&mptcp_ccc); -+} -+ -+module_init(mptcp_ccc_register); -+module_exit(mptcp_ccc_unregister); -+ -+MODULE_AUTHOR("Christoph Paasch, Sébastien Barré"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("MPTCP LINKED INCREASE CONGESTION CONTROL ALGORITHM"); -+MODULE_VERSION("0.1"); -diff --git a/net/mptcp/mptcp_ctrl.c b/net/mptcp/mptcp_ctrl.c -new file mode 100644 -index 000000000000..db01ec142111 ---- /dev/null -+++ b/net/mptcp/mptcp_ctrl.c -@@ -0,0 +1,3313 @@ -+/* -+ * MPTCP implementation - MPTCP-control -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#if IS_ENABLED(CONFIG_IPV6) -+#include -+#include -+#endif -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static struct kmem_cache *mptcp_sock_cache __read_mostly; -+static struct kmem_cache *mptcp_cb_cache __read_mostly; -+static struct kmem_cache *mptcp_tw_cache __read_mostly; -+ -+int sysctl_mptcp_enabled __read_mostly = 1; -+int sysctl_mptcp_version __read_mostly = 0; -+static int min_mptcp_version; -+static int max_mptcp_version = 1; -+int sysctl_mptcp_checksum __read_mostly = 1; -+int sysctl_mptcp_debug __read_mostly; -+EXPORT_SYMBOL(sysctl_mptcp_debug); -+int sysctl_mptcp_syn_retries __read_mostly = 3; -+ -+bool mptcp_init_failed __read_mostly; -+ -+struct static_key mptcp_static_key = STATIC_KEY_INIT_FALSE; -+EXPORT_SYMBOL(mptcp_static_key); -+ -+static void mptcp_key_hash(u8 version, u64 key, u32 *token, u64 *idsn); -+ -+static int proc_mptcp_path_manager(struct ctl_table *ctl, int write, -+ void __user *buffer, size_t *lenp, -+ loff_t *ppos) -+{ -+ char val[MPTCP_PM_NAME_MAX]; -+ struct ctl_table tbl = { -+ .data = val, -+ .maxlen = MPTCP_PM_NAME_MAX, -+ }; -+ int ret; -+ -+ mptcp_get_default_path_manager(val); -+ -+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos); -+ if (write && ret == 0) -+ ret = mptcp_set_default_path_manager(val); -+ return ret; -+} -+ -+static int proc_mptcp_scheduler(struct ctl_table *ctl, int write, -+ void __user *buffer, size_t *lenp, -+ loff_t *ppos) -+{ -+ char val[MPTCP_SCHED_NAME_MAX]; -+ struct ctl_table tbl = { -+ .data = val, -+ .maxlen = MPTCP_SCHED_NAME_MAX, -+ }; -+ int ret; -+ -+ mptcp_get_default_scheduler(val); -+ -+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos); -+ if (write && ret == 0) -+ ret = mptcp_set_default_scheduler(val); -+ return ret; -+} -+ -+static struct ctl_table mptcp_table[] = { -+ { -+ .procname = "mptcp_enabled", -+ .data = &sysctl_mptcp_enabled, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec -+ }, -+ { -+ .procname = "mptcp_version", -+ .data = &sysctl_mptcp_version, -+ .mode = 0644, -+ .maxlen = sizeof(int), -+ .proc_handler = &proc_dointvec_minmax, -+ .extra1 = &min_mptcp_version, -+ .extra2 = &max_mptcp_version, -+ }, -+ { -+ .procname = "mptcp_checksum", -+ .data = &sysctl_mptcp_checksum, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec -+ }, -+ { -+ .procname = "mptcp_debug", -+ .data = &sysctl_mptcp_debug, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec -+ }, -+ { -+ .procname = "mptcp_syn_retries", -+ .data = &sysctl_mptcp_syn_retries, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = &proc_dointvec -+ }, -+ { -+ .procname = "mptcp_path_manager", -+ .mode = 0644, -+ .maxlen = MPTCP_PM_NAME_MAX, -+ .proc_handler = proc_mptcp_path_manager, -+ }, -+ { -+ .procname = "mptcp_scheduler", -+ .mode = 0644, -+ .maxlen = MPTCP_SCHED_NAME_MAX, -+ .proc_handler = proc_mptcp_scheduler, -+ }, -+ { } -+}; -+ -+static inline u32 mptcp_hash_tk(u32 token, struct mptcp_hashtable *htable) -+{ -+ return token & htable->mask; -+} -+ -+struct mptcp_hashtable mptcp_tk_htable; -+EXPORT_SYMBOL(mptcp_tk_htable); -+ -+/* The following hash table is used to avoid collision of token */ -+static struct mptcp_hashtable mptcp_reqsk_tk_htb; -+ -+/* Lock, protecting the two hash-tables that hold the token. Namely, -+ * mptcp_reqsk_tk_htb and tk_hashtable -+ */ -+static spinlock_t mptcp_tk_hashlock; -+ -+static bool mptcp_reqsk_find_tk(const u32 token) -+{ -+ const u32 hash = mptcp_hash_tk(token, &mptcp_reqsk_tk_htb); -+ const struct mptcp_request_sock *mtreqsk; -+ const struct hlist_nulls_node *node; -+ -+begin: -+ hlist_nulls_for_each_entry_rcu(mtreqsk, node, -+ &mptcp_reqsk_tk_htb.hashtable[hash], -+ hash_entry) { -+ if (token == mtreqsk->mptcp_loc_token) -+ return true; -+ } -+ /* A request-socket is destroyed by RCU. So, it might have been recycled -+ * and put into another hash-table list. So, after the lookup we may -+ * end up in a different list. So, we may need to restart. -+ * -+ * See also the comment in __inet_lookup_established. -+ */ -+ if (get_nulls_value(node) != hash) -+ goto begin; -+ return false; -+} -+ -+static void mptcp_reqsk_insert_tk(struct request_sock *reqsk, const u32 token) -+{ -+ u32 hash = mptcp_hash_tk(token, &mptcp_reqsk_tk_htb); -+ -+ hlist_nulls_add_head_rcu(&mptcp_rsk(reqsk)->hash_entry, -+ &mptcp_reqsk_tk_htb.hashtable[hash]); -+} -+ -+static void mptcp_reqsk_remove_tk(const struct request_sock *reqsk) -+{ -+ rcu_read_lock(); -+ local_bh_disable(); -+ spin_lock(&mptcp_tk_hashlock); -+ hlist_nulls_del_init_rcu(&mptcp_rsk(reqsk)->hash_entry); -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+} -+ -+void mptcp_reqsk_destructor(struct request_sock *req) -+{ -+ if (!mptcp_rsk(req)->is_sub) -+ mptcp_reqsk_remove_tk(req); -+} -+ -+static void __mptcp_hash_insert(struct tcp_sock *meta_tp, const u32 token) -+{ -+ u32 hash = mptcp_hash_tk(token, &mptcp_tk_htable); -+ -+ hlist_nulls_add_head_rcu(&meta_tp->tk_table, -+ &mptcp_tk_htable.hashtable[hash]); -+ meta_tp->inside_tk_table = 1; -+} -+ -+static bool mptcp_find_token(u32 token) -+{ -+ const u32 hash = mptcp_hash_tk(token, &mptcp_tk_htable); -+ const struct tcp_sock *meta_tp; -+ const struct hlist_nulls_node *node; -+ -+begin: -+ hlist_nulls_for_each_entry_rcu(meta_tp, node, -+ &mptcp_tk_htable.hashtable[hash], -+ tk_table) { -+ if (token == meta_tp->mptcp_loc_token) -+ return true; -+ } -+ /* A TCP-socket is destroyed by RCU. So, it might have been recycled -+ * and put into another hash-table list. So, after the lookup we may -+ * end up in a different list. So, we may need to restart. -+ * -+ * See also the comment in __inet_lookup_established. -+ */ -+ if (get_nulls_value(node) != hash) -+ goto begin; -+ return false; -+} -+ -+static void mptcp_set_key_reqsk(struct request_sock *req, -+ const struct sk_buff *skb, -+ u32 seed) -+{ -+ const struct inet_request_sock *ireq = inet_rsk(req); -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ -+ if (skb->protocol == htons(ETH_P_IP)) { -+ mtreq->mptcp_loc_key = mptcp_v4_get_key(ip_hdr(skb)->saddr, -+ ip_hdr(skb)->daddr, -+ htons(ireq->ir_num), -+ ireq->ir_rmt_port, -+ seed); -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { -+ mtreq->mptcp_loc_key = mptcp_v6_get_key(ipv6_hdr(skb)->saddr.s6_addr32, -+ ipv6_hdr(skb)->daddr.s6_addr32, -+ htons(ireq->ir_num), -+ ireq->ir_rmt_port, -+ seed); -+#endif -+ } -+ -+ mptcp_key_hash(mtreq->mptcp_ver, mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL); -+} -+ -+/* New MPTCP-connection request, prepare a new token for the meta-socket that -+ * will be created in mptcp_check_req_master(), and store the received token. -+ */ -+static void mptcp_reqsk_new_mptcp(struct request_sock *req, -+ const struct sock *sk, -+ const struct mptcp_options_received *mopt, -+ const struct sk_buff *skb) -+{ -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ -+ inet_rsk(req)->saw_mpc = 1; -+ mtreq->mptcp_ver = mopt->mptcp_ver; -+ -+ rcu_read_lock(); -+ local_bh_disable(); -+ spin_lock(&mptcp_tk_hashlock); -+ do { -+ mptcp_set_key_reqsk(req, skb, mptcp_seed++); -+ } while (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || -+ mptcp_find_token(mtreq->mptcp_loc_token)); -+ mptcp_reqsk_insert_tk(req, mtreq->mptcp_loc_token); -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+ -+ if (mtreq->mptcp_ver == MPTCP_VERSION_0) { -+ mtreq->mptcp_rem_key = mopt->mptcp_sender_key; -+ mtreq->rem_key_set = 1; -+ } -+} -+ -+static int mptcp_reqsk_new_cookie(struct request_sock *req, -+ const struct sock *sk, -+ const struct mptcp_options_received *mopt, -+ const struct sk_buff *skb) -+{ -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ -+ /* Must happen before mptcp_set_key_reqsk to generate the token with -+ * the proper hash algo. -+ */ -+ mtreq->mptcp_ver = mopt->mptcp_ver; -+ -+ rcu_read_lock(); -+ local_bh_disable(); -+ spin_lock(&mptcp_tk_hashlock); -+ -+ mptcp_set_key_reqsk(req, skb, tcp_rsk(req)->snt_isn); -+ -+ if (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || -+ mptcp_find_token(mtreq->mptcp_loc_token)) { -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+ return false; -+ } -+ -+ inet_rsk(req)->saw_mpc = 1; -+ -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+ -+ if (mtreq->mptcp_ver == MPTCP_VERSION_0) { -+ mtreq->mptcp_rem_key = mopt->mptcp_sender_key; -+ mtreq->rem_key_set = 1; -+ } -+ -+ return true; -+} -+ -+static void mptcp_set_key_sk(const struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ const struct inet_sock *isk = inet_sk(sk); -+ -+ if (sk->sk_family == AF_INET) -+ tp->mptcp_loc_key = mptcp_v4_get_key(isk->inet_saddr, -+ isk->inet_daddr, -+ isk->inet_sport, -+ isk->inet_dport, -+ mptcp_seed++); -+#if IS_ENABLED(CONFIG_IPV6) -+ else -+ tp->mptcp_loc_key = mptcp_v6_get_key(inet6_sk(sk)->saddr.s6_addr32, -+ sk->sk_v6_daddr.s6_addr32, -+ isk->inet_sport, -+ isk->inet_dport, -+ mptcp_seed++); -+#endif -+ -+ mptcp_key_hash(tp->mptcp_ver, tp->mptcp_loc_key, &tp->mptcp_loc_token, NULL); -+} -+ -+#ifdef CONFIG_JUMP_LABEL -+static atomic_t mptcp_needed_deferred; -+static atomic_t mptcp_wanted; -+ -+static void mptcp_clear(struct work_struct *work) -+{ -+ int deferred = atomic_xchg(&mptcp_needed_deferred, 0); -+ int wanted; -+ -+ wanted = atomic_add_return(deferred, &mptcp_wanted); -+ if (wanted > 0) -+ static_key_enable(&mptcp_static_key); -+ else -+ static_key_disable(&mptcp_static_key); -+} -+ -+static DECLARE_WORK(mptcp_work, mptcp_clear); -+#endif -+ -+static void mptcp_enable_static_key_bh(void) -+{ -+#ifdef CONFIG_JUMP_LABEL -+ int wanted; -+ -+ while (1) { -+ wanted = atomic_read(&mptcp_wanted); -+ if (wanted <= 0) -+ break; -+ if (atomic_cmpxchg(&mptcp_wanted, wanted, wanted + 1) == wanted) -+ return; -+ } -+ atomic_inc(&mptcp_needed_deferred); -+ schedule_work(&mptcp_work); -+#else -+ static_key_slow_inc(&mptcp_static_key); -+#endif -+} -+ -+static void mptcp_enable_static_key(void) -+{ -+#ifdef CONFIG_JUMP_LABEL -+ atomic_inc(&mptcp_wanted); -+ static_key_enable(&mptcp_static_key); -+#else -+ static_key_slow_inc(&mptcp_static_key); -+#endif -+} -+ -+void mptcp_disable_static_key(void) -+{ -+#ifdef CONFIG_JUMP_LABEL -+ int wanted; -+ -+ while (1) { -+ wanted = atomic_read(&mptcp_wanted); -+ if (wanted <= 1) -+ break; -+ if (atomic_cmpxchg(&mptcp_wanted, wanted, wanted - 1) == wanted) -+ return; -+ } -+ atomic_dec(&mptcp_needed_deferred); -+ schedule_work(&mptcp_work); -+#else -+ static_key_slow_dec(&mptcp_static_key); -+#endif -+} -+ -+void mptcp_enable_sock(struct sock *sk) -+{ -+ if (!sock_flag(sk, SOCK_MPTCP)) { -+ sock_set_flag(sk, SOCK_MPTCP); -+ tcp_sk(sk)->mptcp_ver = sysctl_mptcp_version; -+ -+ /* Necessary here, because MPTCP can be enabled/disabled through -+ * a setsockopt. -+ */ -+ if (sk->sk_family == AF_INET) -+ inet_csk(sk)->icsk_af_ops = &mptcp_v4_specific; -+#if IS_ENABLED(CONFIG_IPV6) -+ else if (mptcp_v6_is_v4_mapped(sk)) -+ inet_csk(sk)->icsk_af_ops = &mptcp_v6_mapped; -+ else -+ inet_csk(sk)->icsk_af_ops = &mptcp_v6_specific; -+#endif -+ -+ mptcp_enable_static_key(); -+ } -+} -+ -+void mptcp_disable_sock(struct sock *sk) -+{ -+ if (sock_flag(sk, SOCK_MPTCP)) { -+ sock_reset_flag(sk, SOCK_MPTCP); -+ -+ /* Necessary here, because MPTCP can be enabled/disabled through -+ * a setsockopt. -+ */ -+ if (sk->sk_family == AF_INET) -+ inet_csk(sk)->icsk_af_ops = &ipv4_specific; -+#if IS_ENABLED(CONFIG_IPV6) -+ else if (mptcp_v6_is_v4_mapped(sk)) -+ inet_csk(sk)->icsk_af_ops = &ipv6_mapped; -+ else -+ inet_csk(sk)->icsk_af_ops = &ipv6_specific; -+#endif -+ -+ mptcp_disable_static_key(); -+ } -+} -+ -+void mptcp_connect_init(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ rcu_read_lock(); -+ local_bh_disable(); -+ spin_lock(&mptcp_tk_hashlock); -+ do { -+ mptcp_set_key_sk(sk); -+ } while (mptcp_reqsk_find_tk(tp->mptcp_loc_token) || -+ mptcp_find_token(tp->mptcp_loc_token)); -+ -+ __mptcp_hash_insert(tp, tp->mptcp_loc_token); -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); -+} -+ -+/** -+ * This function increments the refcount of the mpcb struct. -+ * It is the responsibility of the caller to decrement when releasing -+ * the structure. -+ */ -+struct sock *mptcp_hash_find(const struct net *net, const u32 token) -+{ -+ const u32 hash = mptcp_hash_tk(token, &mptcp_tk_htable); -+ const struct tcp_sock *meta_tp; -+ struct sock *meta_sk = NULL; -+ const struct hlist_nulls_node *node; -+ -+ rcu_read_lock(); -+ local_bh_disable(); -+begin: -+ hlist_nulls_for_each_entry_rcu(meta_tp, node, -+ &mptcp_tk_htable.hashtable[hash], -+ tk_table) { -+ meta_sk = (struct sock *)meta_tp; -+ if (token == meta_tp->mptcp_loc_token && -+ net_eq(net, sock_net(meta_sk))) { -+ if (unlikely(!refcount_inc_not_zero(&meta_sk->sk_refcnt))) -+ goto out; -+ if (unlikely(token != meta_tp->mptcp_loc_token || -+ !net_eq(net, sock_net(meta_sk)))) { -+ sock_gen_put(meta_sk); -+ goto begin; -+ } -+ goto found; -+ } -+ } -+ /* A TCP-socket is destroyed by RCU. So, it might have been recycled -+ * and put into another hash-table list. So, after the lookup we may -+ * end up in a different list. So, we may need to restart. -+ * -+ * See also the comment in __inet_lookup_established. -+ */ -+ if (get_nulls_value(node) != hash) -+ goto begin; -+out: -+ meta_sk = NULL; -+found: -+ local_bh_enable(); -+ rcu_read_unlock(); -+ return meta_sk; -+} -+EXPORT_SYMBOL_GPL(mptcp_hash_find); -+ -+void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) -+{ -+ /* remove from the token hashtable */ -+ rcu_read_lock(); -+ local_bh_disable(); -+ spin_lock(&mptcp_tk_hashlock); -+ hlist_nulls_del_init_rcu(&meta_tp->tk_table); -+ meta_tp->inside_tk_table = 0; -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+} -+ -+struct sock *mptcp_select_ack_sock(const struct sock *meta_sk) -+{ -+ const struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct sock *rttsk = NULL, *lastsk = NULL; -+ u32 min_time = 0, last_active = 0; -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(meta_tp->mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ u32 elapsed; -+ -+ if (!mptcp_sk_can_send_ack(sk) || tp->pf) -+ continue; -+ -+ elapsed = keepalive_time_elapsed(tp); -+ -+ /* We take the one with the lowest RTT within a reasonable -+ * (meta-RTO)-timeframe -+ */ -+ if (elapsed < inet_csk(meta_sk)->icsk_rto) { -+ if (!min_time || tp->srtt_us < min_time) { -+ min_time = tp->srtt_us; -+ rttsk = sk; -+ } -+ continue; -+ } -+ -+ /* Otherwise, we just take the most recent active */ -+ if (!rttsk && (!last_active || elapsed < last_active)) { -+ last_active = elapsed; -+ lastsk = sk; -+ } -+ } -+ -+ if (rttsk) -+ return rttsk; -+ -+ return lastsk; -+} -+EXPORT_SYMBOL(mptcp_select_ack_sock); -+ -+static void mptcp_sock_def_error_report(struct sock *sk) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (!sock_flag(sk, SOCK_DEAD)) { -+ if (tp->send_mp_fclose && sk->sk_err == ETIMEDOUT) { -+ /* Called by the keep alive timer (tcp_write_timeout), -+ * when the limit of fastclose retransmissions has been -+ * reached. Send a TCP RST to clear the status of any -+ * stateful firewall (typically conntrack) which are -+ * not aware of mptcp and cannot understand the -+ * fastclose option. -+ */ -+ tp->ops->send_active_reset(sk, GFP_ATOMIC); -+ } -+ } -+ -+ /* record this info that can be used by PM after the sf close */ -+ tp->mptcp->sk_err = sk->sk_err; -+ -+ if (!tp->tcp_disconnect && mptcp_in_infinite_mapping_weak(mpcb)) { -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ meta_sk->sk_err = sk->sk_err; -+ meta_sk->sk_err_soft = sk->sk_err_soft; -+ -+ if (!sock_flag(meta_sk, SOCK_DEAD)) -+ meta_sk->sk_error_report(meta_sk); -+ -+ WARN(meta_sk->sk_state == TCP_CLOSE, -+ "Meta already closed i_rcv %u i_snd %u send_i %u flags %#lx\n", -+ mpcb->infinite_mapping_rcv, mpcb->infinite_mapping_snd, -+ mpcb->send_infinite_mapping, meta_sk->sk_flags); -+ -+ if (meta_sk->sk_state != TCP_CLOSE) -+ tcp_done(meta_sk); -+ } -+ -+ sk->sk_err = 0; -+ return; -+} -+ -+void mptcp_mpcb_put(struct mptcp_cb *mpcb) -+{ -+ if (refcount_dec_and_test(&mpcb->mpcb_refcnt)) { -+ mptcp_cleanup_path_manager(mpcb); -+ mptcp_cleanup_scheduler(mpcb); -+ kfree(mpcb->master_info); -+ kmem_cache_free(mptcp_cb_cache, mpcb); -+ } -+} -+EXPORT_SYMBOL(mptcp_mpcb_put); -+ -+static void mptcp_mpcb_cleanup(struct mptcp_cb *mpcb) -+{ -+ struct mptcp_tw *mptw; -+ -+ /* The mpcb is disappearing - we can make the final -+ * update to the rcv_nxt of the time-wait-sock and remove -+ * its reference to the mpcb. -+ */ -+ spin_lock_bh(&mpcb->mpcb_list_lock); -+ list_for_each_entry_rcu(mptw, &mpcb->tw_list, list) { -+ list_del_rcu(&mptw->list); -+ mptw->in_list = 0; -+ mptcp_mpcb_put(mpcb); -+ rcu_assign_pointer(mptw->mpcb, NULL); -+ } -+ spin_unlock_bh(&mpcb->mpcb_list_lock); -+ -+ mptcp_mpcb_put(mpcb); -+} -+ -+static void mptcp_sock_destruct(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (!is_meta_sk(sk)) { -+ BUG_ON(!hlist_unhashed(&tp->mptcp->cb_list)); -+ -+ kmem_cache_free(mptcp_sock_cache, tp->mptcp); -+ tp->mptcp = NULL; -+ -+ /* Taken when mpcb pointer was set */ -+ sock_put(mptcp_meta_sk(sk)); -+ mptcp_mpcb_put(tp->mpcb); -+ } else { -+ mptcp_debug("%s destroying meta-sk token %#x\n", __func__, -+ tcp_sk(sk)->mpcb->mptcp_loc_token); -+ -+ mptcp_mpcb_cleanup(tp->mpcb); -+ } -+ -+ WARN_ON(!static_key_false(&mptcp_static_key)); -+ -+ /* Must be called here, because this will decrement the jump-label. */ -+ inet_sock_destruct(sk); -+} -+ -+void mptcp_destroy_sock(struct sock *sk) -+{ -+ if (is_meta_sk(sk)) { -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ __skb_queue_purge(&tcp_sk(sk)->mpcb->reinject_queue); -+ -+ /* We have to close all remaining subflows. Normally, they -+ * should all be about to get closed. But, if the kernel is -+ * forcing a closure (e.g., tcp_write_err), the subflows might -+ * not have been closed properly (as we are waiting for the -+ * DATA_ACK of the DATA_FIN). -+ */ -+ mptcp_for_each_sub_safe(tcp_sk(sk)->mpcb, mptcp, tmp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ /* Already did call tcp_close - waiting for graceful -+ * closure, or if we are retransmitting fast-close on -+ * the subflow. The reset (or timeout) will kill the -+ * subflow.. -+ */ -+ if (tcp_sk(sk_it)->closing || -+ tcp_sk(sk_it)->send_mp_fclose) -+ continue; -+ -+ /* Allow the delayed work first to prevent time-wait state */ -+ if (delayed_work_pending(&tcp_sk(sk_it)->mptcp->work)) -+ continue; -+ -+ mptcp_sub_close(sk_it, 0); -+ } -+ } else { -+ mptcp_del_sock(sk); -+ } -+} -+ -+static void mptcp_set_state(struct sock *sk) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ /* Meta is not yet established - wake up the application */ -+ if ((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) && -+ sk->sk_state == TCP_ESTABLISHED) { -+ tcp_set_state(meta_sk, TCP_ESTABLISHED); -+ -+ if (!sock_flag(meta_sk, SOCK_DEAD)) { -+ meta_sk->sk_state_change(meta_sk); -+ sk_wake_async(meta_sk, SOCK_WAKE_IO, POLL_OUT); -+ } -+ -+ tcp_sk(meta_sk)->lsndtime = tcp_jiffies32; -+ } -+ -+ if (sk->sk_state == TCP_CLOSE) { -+ if (!sock_flag(sk, SOCK_DEAD)) -+ mptcp_sub_close(sk, 0); -+ } -+} -+ -+static int mptcp_set_congestion_control(struct sock *meta_sk, const char *name, -+ bool load, bool reinit, bool cap_net_admin) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ int err, result = 0; -+ -+ result = __tcp_set_congestion_control(meta_sk, name, load, reinit, cap_net_admin); -+ -+ tcp_sk(meta_sk)->mpcb->tcp_ca_explicit_set = true; -+ -+ mptcp_for_each_sub(tcp_sk(meta_sk)->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ err = __tcp_set_congestion_control(sk_it, name, load, reinit, cap_net_admin); -+ if (err) -+ result = err; -+ } -+ return result; -+} -+ -+static void mptcp_assign_congestion_control(struct sock *sk) -+{ -+ struct inet_connection_sock *icsk = inet_csk(sk); -+ struct inet_connection_sock *meta_icsk = inet_csk(mptcp_meta_sk(sk)); -+ const struct tcp_congestion_ops *ca = meta_icsk->icsk_ca_ops; -+ -+ /* Congestion control is the same as meta. Thus, it has been -+ * try_module_get'd by tcp_assign_congestion_control. -+ * Congestion control on meta was not explicitly configured by -+ * application, leave default or route based. -+ */ -+ if (icsk->icsk_ca_ops == ca || -+ !tcp_sk(mptcp_meta_sk(sk))->mpcb->tcp_ca_explicit_set) -+ return; -+ -+ /* Use the same congestion control as set on the meta-sk */ -+ if (!try_module_get(ca->owner)) { -+ /* This should never happen. The congestion control is linked -+ * to the meta-socket (through tcp_assign_congestion_control) -+ * who "holds" the refcnt on the module. -+ */ -+ WARN(1, "Could not get the congestion control!"); -+ return; -+ } -+ module_put(icsk->icsk_ca_ops->owner); -+ icsk->icsk_ca_ops = ca; -+ -+ /* Clear out private data before diag gets it and -+ * the ca has not been initialized. -+ */ -+ if (ca->get_info) -+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); -+ -+ return; -+} -+ -+siphash_key_t mptcp_secret __read_mostly; -+u32 mptcp_seed = 0; -+ -+#define SHA256_DIGEST_WORDS (SHA256_DIGEST_SIZE / 4) -+ -+static void mptcp_key_sha256(const u64 key, u32 *token, u64 *idsn) -+{ -+ u32 mptcp_hashed_key[SHA256_DIGEST_WORDS]; -+ struct sha256_state state; -+ -+ sha256_init(&state); -+ sha256_update(&state, (const u8 *)&key, sizeof(key)); -+ sha256_final(&state, (u8 *)mptcp_hashed_key); -+ -+ if (token) -+ *token = mptcp_hashed_key[0]; -+ if (idsn) -+ *idsn = ntohll(*((__be64 *)&mptcp_hashed_key[6])); -+} -+ -+static void mptcp_hmac_sha256(const u8 *key_1, const u8 *key_2, u8 *hash_out, -+ int arg_num, va_list list) -+{ -+ u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; -+ struct sha256_state state; -+ int index, msg_length; -+ int length = 0; -+ u8 *msg; -+ int i; -+ -+ /* Generate key xored with ipad */ -+ memset(input, 0x36, SHA256_BLOCK_SIZE); -+ for (i = 0; i < 8; i++) -+ input[i] ^= key_1[i]; -+ for (i = 0; i < 8; i++) -+ input[i + 8] ^= key_2[i]; -+ -+ index = SHA256_BLOCK_SIZE; -+ msg_length = 0; -+ for (i = 0; i < arg_num; i++) { -+ length = va_arg(list, int); -+ msg = va_arg(list, u8 *); -+ BUG_ON(index + length >= sizeof(input)); /* Message is too long */ -+ memcpy(&input[index], msg, length); -+ index += length; -+ msg_length += length; -+ } -+ -+ sha256_init(&state); -+ sha256_update(&state, input, SHA256_BLOCK_SIZE + msg_length); -+ sha256_final(&state, &input[SHA256_BLOCK_SIZE]); -+ -+ /* Prepare second part of hmac */ -+ memset(input, 0x5C, SHA256_BLOCK_SIZE); -+ for (i = 0; i < 8; i++) -+ input[i] ^= key_1[i]; -+ for (i = 0; i < 8; i++) -+ input[i + 8] ^= key_2[i]; -+ -+ sha256_init(&state); -+ sha256_update(&state, input, sizeof(input)); -+ sha256_final(&state, hash_out); -+} -+ -+static void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn) -+{ -+ u32 workspace[SHA_WORKSPACE_WORDS]; -+ u32 mptcp_hashed_key[SHA_DIGEST_WORDS]; -+ u8 input[64]; -+ int i; -+ -+ memset(workspace, 0, sizeof(workspace)); -+ -+ /* Initialize input with appropriate padding */ -+ memset(&input[9], 0, sizeof(input) - 10); /* -10, because the last byte -+ * is explicitly set too -+ */ -+ memcpy(input, &key, sizeof(key)); /* Copy key to the msg beginning */ -+ input[8] = 0x80; /* Padding: First bit after message = 1 */ -+ input[63] = 0x40; /* Padding: Length of the message = 64 bits */ -+ -+ sha_init(mptcp_hashed_key); -+ sha_transform(mptcp_hashed_key, input, workspace); -+ -+ for (i = 0; i < 5; i++) -+ mptcp_hashed_key[i] = (__force u32)cpu_to_be32(mptcp_hashed_key[i]); -+ -+ if (token) -+ *token = mptcp_hashed_key[0]; -+ if (idsn) -+ *idsn = ntohll(*((__be64 *)&mptcp_hashed_key[3])); -+} -+ -+static void mptcp_key_hash(u8 version, u64 key, u32 *token, u64 *idsn) -+{ -+ if (version == MPTCP_VERSION_0) -+ mptcp_key_sha1(key, token, idsn); -+ else if (version >= MPTCP_VERSION_1) -+ mptcp_key_sha256(key, token, idsn); -+} -+ -+static void mptcp_hmac_sha1(const u8 *key_1, const u8 *key_2, u32 *hash_out, -+ int arg_num, va_list list) -+{ -+ u32 workspace[SHA_WORKSPACE_WORDS]; -+ u8 input[128]; /* 2 512-bit blocks */ -+ int i; -+ int index; -+ int length; -+ u8 *msg; -+ -+ memset(workspace, 0, sizeof(workspace)); -+ -+ /* Generate key xored with ipad */ -+ memset(input, 0x36, 64); -+ for (i = 0; i < 8; i++) -+ input[i] ^= key_1[i]; -+ for (i = 0; i < 8; i++) -+ input[i + 8] ^= key_2[i]; -+ -+ index = 64; -+ for (i = 0; i < arg_num; i++) { -+ length = va_arg(list, int); -+ msg = va_arg(list, u8 *); -+ BUG_ON(index + length > 125); /* Message is too long */ -+ memcpy(&input[index], msg, length); -+ index += length; -+ } -+ -+ input[index] = 0x80; /* Padding: First bit after message = 1 */ -+ memset(&input[index + 1], 0, (126 - index)); -+ -+ /* Padding: Length of the message = 512 + message length (bits) */ -+ input[126] = 0x02; -+ input[127] = ((index - 64) * 8); /* Message length (bits) */ -+ -+ sha_init(hash_out); -+ sha_transform(hash_out, input, workspace); -+ memset(workspace, 0, sizeof(workspace)); -+ -+ sha_transform(hash_out, &input[64], workspace); -+ memset(workspace, 0, sizeof(workspace)); -+ -+ for (i = 0; i < 5; i++) -+ hash_out[i] = (__force u32)cpu_to_be32(hash_out[i]); -+ -+ /* Prepare second part of hmac */ -+ memset(input, 0x5C, 64); -+ for (i = 0; i < 8; i++) -+ input[i] ^= key_1[i]; -+ for (i = 0; i < 8; i++) -+ input[i + 8] ^= key_2[i]; -+ -+ memcpy(&input[64], hash_out, 20); -+ input[84] = 0x80; -+ memset(&input[85], 0, 41); -+ -+ /* Padding: Length of the message = 512 + 160 bits */ -+ input[126] = 0x02; -+ input[127] = 0xA0; -+ -+ sha_init(hash_out); -+ sha_transform(hash_out, input, workspace); -+ memset(workspace, 0, sizeof(workspace)); -+ -+ sha_transform(hash_out, &input[64], workspace); -+ -+ for (i = 0; i < 5; i++) -+ hash_out[i] = (__force u32)cpu_to_be32(hash_out[i]); -+} -+ -+void mptcp_hmac(u8 ver, const u8 *key_1, const u8 *key_2, u8 *hash_out, -+ int arg_num, ...) -+{ -+ va_list args; -+ -+ va_start(args, arg_num); -+ if (ver == MPTCP_VERSION_0) -+ mptcp_hmac_sha1(key_1, key_2, (u32 *)hash_out, arg_num, args); -+ else if (ver >= MPTCP_VERSION_1) -+ mptcp_hmac_sha256(key_1, key_2, hash_out, arg_num, args); -+ va_end(args); -+} -+EXPORT_SYMBOL(mptcp_hmac); -+ -+static void mptcp_mpcb_inherit_sockopts(struct sock *meta_sk, struct sock *master_sk) -+{ -+ /* Socket-options handled by sk_clone_lock while creating the meta-sk. -+ * ====== -+ * SO_SNDBUF, SO_SNDBUFFORCE, SO_RCVBUF, SO_RCVBUFFORCE, SO_RCVLOWAT, -+ * SO_RCVTIMEO, SO_SNDTIMEO, SO_ATTACH_FILTER, SO_DETACH_FILTER, -+ * TCP_NODELAY, TCP_CORK -+ * -+ * Socket-options handled in this function here -+ * ====== -+ * TCP_DEFER_ACCEPT -+ * SO_KEEPALIVE -+ * -+ * Socket-options on the todo-list -+ * ====== -+ * SO_BINDTODEVICE - should probably prevent creation of new subsocks -+ * across other devices. - what about the api-draft? -+ * SO_DEBUG -+ * SO_REUSEADDR - probably we don't care about this -+ * SO_DONTROUTE, SO_BROADCAST -+ * SO_OOBINLINE -+ * SO_LINGER -+ * SO_TIMESTAMP* - I don't think this is of concern for a SOCK_STREAM -+ * SO_PASSSEC - I don't think this is of concern for a SOCK_STREAM -+ * SO_RXQ_OVFL -+ * TCP_COOKIE_TRANSACTIONS -+ * TCP_MAXSEG -+ * TCP_THIN_* - Handled by sk_clone_lock, but we need to support this -+ * in mptcp_meta_retransmit_timer. AND we need to check -+ * what is about the subsockets. -+ * TCP_LINGER2 -+ * TCP_WINDOW_CLAMP -+ * TCP_USER_TIMEOUT -+ * TCP_MD5SIG -+ * -+ * Socket-options of no concern for the meta-socket (but for the subsocket) -+ * ====== -+ * SO_PRIORITY -+ * SO_MARK -+ * TCP_CONGESTION -+ * TCP_SYNCNT -+ * TCP_QUICKACK -+ */ -+ -+ /* DEFER_ACCEPT should not be set on the meta, as we want to accept new subflows directly */ -+ inet_csk(meta_sk)->icsk_accept_queue.rskq_defer_accept = 0; -+ -+ /* Keepalives are handled entirely at the MPTCP-layer */ -+ if (sock_flag(meta_sk, SOCK_KEEPOPEN)) { -+ inet_csk_reset_keepalive_timer(meta_sk, -+ keepalive_time_when(tcp_sk(meta_sk))); -+ sock_reset_flag(master_sk, SOCK_KEEPOPEN); -+ inet_csk_delete_keepalive_timer(master_sk); -+ } -+ -+ /* Do not propagate subflow-errors up to the MPTCP-layer */ -+ inet_sk(master_sk)->recverr = 0; -+} -+ -+/* Called without holding lock on meta_sk */ -+static void mptcp_sub_inherit_sockopts(const struct sock *meta_sk, struct sock *sub_sk) -+{ -+ __u8 meta_tos; -+ -+ /* IP_TOS also goes to the subflow. */ -+ meta_tos = READ_ONCE(inet_sk(meta_sk)->tos); -+ if (inet_sk(sub_sk)->tos != meta_tos) { -+ inet_sk(sub_sk)->tos = meta_tos; -+ sub_sk->sk_priority = meta_sk->sk_priority; -+ sk_dst_reset(sub_sk); -+ } -+ -+ /* IPV6_TCLASS */ -+ if (sub_sk->sk_family == AF_INET6 && meta_sk->sk_family == AF_INET6) -+ inet6_sk(sub_sk)->tclass = inet6_sk(meta_sk)->tclass; -+ -+ /* Inherit SO_REUSEADDR */ -+ sub_sk->sk_reuse = meta_sk->sk_reuse; -+ -+ /* Inherit SO_MARK: can be used for routing or filtering */ -+ sub_sk->sk_mark = meta_sk->sk_mark; -+ -+ /* Inherit snd/rcv-buffer locks */ -+ sub_sk->sk_userlocks = meta_sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; -+ -+ /* Nagle/Cork is forced off on the subflows. It is handled at the meta-layer */ -+ tcp_sk(sub_sk)->nonagle = TCP_NAGLE_OFF|TCP_NAGLE_PUSH; -+ -+ /* Keepalives are handled entirely at the MPTCP-layer */ -+ if (sock_flag(sub_sk, SOCK_KEEPOPEN)) { -+ sock_reset_flag(sub_sk, SOCK_KEEPOPEN); -+ inet_csk_delete_keepalive_timer(sub_sk); -+ } -+ -+ /* Do not propagate subflow-errors up to the MPTCP-layer */ -+ inet_sk(sub_sk)->recverr = 0; -+} -+ -+void mptcp_prepare_for_backlog(struct sock *sk, struct sk_buff *skb) -+{ -+ /* In case of success (in mptcp_backlog_rcv) and error (in kfree_skb) of -+ * sk_add_backlog, we will decrement the sk refcount. -+ */ -+ sock_hold(sk); -+ skb->sk = sk; -+ skb->destructor = sock_efree; -+} -+ -+int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ /* skb-sk may be NULL if we receive a packet immediatly after the -+ * SYN/ACK + MP_CAPABLE. -+ */ -+ struct sock *sk = skb->sk ? skb->sk : meta_sk; -+ int ret = 0; -+ -+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) { -+ kfree_skb(skb); -+ return 0; -+ } -+ -+ /* Decrement sk refcnt when calling the skb destructor. -+ * Refcnt is incremented and skb destructor is set in tcp_v{4,6}_rcv via -+ * mptcp_prepare_for_backlog() here above. -+ */ -+ skb_orphan(skb); -+ -+ if (sk->sk_family == AF_INET) -+ ret = tcp_v4_do_rcv(sk, skb); -+#if IS_ENABLED(CONFIG_IPV6) -+ else -+ ret = tcp_v6_do_rcv(sk, skb); -+#endif -+ -+ sock_put(sk); -+ return ret; -+} -+ -+static void mptcp_init_buffer_space(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ int space; -+ -+ tcp_init_buffer_space(sk); -+ -+ if (is_master_tp(tp)) { -+ meta_tp->rcvq_space.space = meta_tp->rcv_wnd; -+ tcp_mstamp_refresh(meta_tp); -+ meta_tp->rcvq_space.time = meta_tp->tcp_mstamp; -+ meta_tp->rcvq_space.seq = meta_tp->copied_seq; -+ -+ /* If there is only one subflow, we just use regular TCP -+ * autotuning. User-locks are handled already by -+ * tcp_init_buffer_space -+ */ -+ meta_tp->window_clamp = tp->window_clamp; -+ meta_tp->rcv_ssthresh = tp->rcv_ssthresh; -+ meta_sk->sk_rcvbuf = sk->sk_rcvbuf; -+ meta_sk->sk_sndbuf = sk->sk_sndbuf; -+ -+ return; -+ } -+ -+ if (meta_sk->sk_userlocks & SOCK_RCVBUF_LOCK) -+ goto snd_buf; -+ -+ /* Adding a new subflow to the rcv-buffer space. We make a simple -+ * addition, to give some space to allow traffic on the new subflow. -+ * Autotuning will increase it further later on. -+ */ -+ space = min(meta_sk->sk_rcvbuf + sk->sk_rcvbuf, -+ sock_net(meta_sk)->ipv4.sysctl_tcp_rmem[2]); -+ if (space > meta_sk->sk_rcvbuf) { -+ meta_tp->window_clamp += tp->window_clamp; -+ meta_tp->rcv_ssthresh += tp->rcv_ssthresh; -+ meta_sk->sk_rcvbuf = space; -+ } -+ -+snd_buf: -+ if (meta_sk->sk_userlocks & SOCK_SNDBUF_LOCK) -+ return; -+ -+ /* Adding a new subflow to the send-buffer space. We make a simple -+ * addition, to give some space to allow traffic on the new subflow. -+ * Autotuning will increase it further later on. -+ */ -+ space = min(meta_sk->sk_sndbuf + sk->sk_sndbuf, -+ sock_net(meta_sk)->ipv4.sysctl_tcp_wmem[2]); -+ if (space > meta_sk->sk_sndbuf) { -+ meta_sk->sk_sndbuf = space; -+ meta_sk->sk_write_space(meta_sk); -+ } -+} -+ -+struct lock_class_key meta_key; -+char *meta_key_name = "sk_lock-AF_INET-MPTCP"; -+struct lock_class_key meta_slock_key; -+char *meta_slock_key_name = "slock-AF_INET-MPTCP"; -+ -+static const struct tcp_sock_ops mptcp_meta_specific = { -+ .__select_window = __mptcp_select_window, -+ .select_window = mptcp_select_window, -+ .select_initial_window = mptcp_select_initial_window, -+ .init_buffer_space = mptcp_init_buffer_space, -+ .set_rto = mptcp_tcp_set_rto, -+ .should_expand_sndbuf = mptcp_should_expand_sndbuf, -+ .send_fin = mptcp_send_fin, -+ .write_xmit = mptcp_write_xmit, -+ .send_active_reset = mptcp_send_active_reset, -+ .write_wakeup = mptcp_write_wakeup, -+ .retransmit_timer = mptcp_meta_retransmit_timer, -+ .time_wait = mptcp_time_wait, -+ .cleanup_rbuf = mptcp_cleanup_rbuf, -+ .set_cong_ctrl = mptcp_set_congestion_control, -+}; -+ -+static const struct tcp_sock_ops mptcp_sub_specific = { -+ .__select_window = __mptcp_select_window, -+ .select_window = mptcp_select_window, -+ .select_initial_window = mptcp_select_initial_window, -+ .init_buffer_space = mptcp_init_buffer_space, -+ .set_rto = mptcp_tcp_set_rto, -+ .should_expand_sndbuf = mptcp_should_expand_sndbuf, -+ .send_fin = tcp_send_fin, -+ .write_xmit = tcp_write_xmit, -+ .send_active_reset = tcp_send_active_reset, -+ .write_wakeup = tcp_write_wakeup, -+ .retransmit_timer = mptcp_sub_retransmit_timer, -+ .time_wait = tcp_time_wait, -+ .cleanup_rbuf = tcp_cleanup_rbuf, -+ .set_cong_ctrl = __tcp_set_congestion_control, -+}; -+ -+void mptcp_initialize_recv_vars(struct tcp_sock *meta_tp, struct mptcp_cb *mpcb, -+ __u64 remote_key) -+{ -+ u64 idsn; -+ -+ mpcb->mptcp_rem_key = remote_key; -+ mpcb->rem_key_set = 1; -+ mptcp_key_hash(mpcb->mptcp_ver, mpcb->mptcp_rem_key, &mpcb->mptcp_rem_token, &idsn); -+ -+ idsn++; -+ mpcb->rcv_high_order[0] = idsn >> 32; -+ mpcb->rcv_high_order[1] = mpcb->rcv_high_order[0] + 1; -+ meta_tp->copied_seq = (u32)idsn; -+ meta_tp->rcv_nxt = (u32)idsn; -+ meta_tp->rcv_wup = (u32)idsn; -+ meta_tp->rcv_right_edge = meta_tp->rcv_wup + meta_tp->rcv_wnd; -+ -+ meta_tp->snd_wl1 = meta_tp->rcv_nxt - 1; -+} -+ -+static int mptcp_alloc_mpcb(struct sock *meta_sk, __u64 remote_key, -+ int rem_key_set, __u8 mptcp_ver, u32 window) -+{ -+ struct mptcp_cb *mpcb; -+ struct sock *master_sk; -+ struct inet_connection_sock *meta_icsk = inet_csk(meta_sk); -+ struct tcp_sock *master_tp, *meta_tp = tcp_sk(meta_sk); -+ u64 snd_idsn; -+ -+ dst_release(meta_sk->sk_rx_dst); -+ meta_sk->sk_rx_dst = NULL; -+ /* This flag is set to announce sock_lock_init to -+ * reclassify the lock-class of the master socket. -+ */ -+ meta_tp->is_master_sk = 1; -+ master_sk = sk_clone_lock(meta_sk, GFP_ATOMIC | __GFP_ZERO); -+ meta_tp->is_master_sk = 0; -+ if (!master_sk) -+ goto err_alloc_master; -+ -+ /* Same as in inet_csk_clone_lock - need to init to 0 */ -+ memset(&inet_csk(master_sk)->icsk_accept_queue, 0, -+ sizeof(inet_csk(master_sk)->icsk_accept_queue)); -+ -+ master_tp = tcp_sk(master_sk); -+ master_tp->inside_tk_table = 0; -+ -+ mpcb = kmem_cache_zalloc(mptcp_cb_cache, GFP_ATOMIC); -+ if (!mpcb) -+ goto err_alloc_mpcb; -+ -+ /* Store the mptcp version agreed on initial handshake */ -+ mpcb->mptcp_ver = mptcp_ver; -+ -+ /* Store the keys and generate the peer's token */ -+ mpcb->mptcp_loc_key = meta_tp->mptcp_loc_key; -+ mpcb->mptcp_loc_token = meta_tp->mptcp_loc_token; -+ -+ /* Generate Initial data-sequence-numbers */ -+ mptcp_key_hash(mpcb->mptcp_ver, mpcb->mptcp_loc_key, NULL, &snd_idsn); -+ snd_idsn++; -+ mpcb->snd_high_order[0] = snd_idsn >> 32; -+ mpcb->snd_high_order[1] = mpcb->snd_high_order[0] - 1; -+ -+ mpcb->meta_sk = meta_sk; -+ mpcb->master_sk = master_sk; -+ -+ skb_queue_head_init(&mpcb->reinject_queue); -+ mutex_init(&mpcb->mpcb_mutex); -+ -+ /* Init time-wait stuff */ -+ INIT_LIST_HEAD(&mpcb->tw_list); -+ -+ INIT_HLIST_HEAD(&mpcb->callback_list); -+ INIT_HLIST_HEAD(&mpcb->conn_list); -+ spin_lock_init(&mpcb->mpcb_list_lock); -+ -+ mpcb->orig_sk_rcvbuf = meta_sk->sk_rcvbuf; -+ mpcb->orig_sk_sndbuf = meta_sk->sk_sndbuf; -+ mpcb->orig_window_clamp = meta_tp->window_clamp; -+ -+ /* The meta is directly linked - set refcnt to 1 */ -+ refcount_set(&mpcb->mpcb_refcnt, 1); -+ -+ if (!meta_tp->inside_tk_table) { -+ /* Adding the meta_tp in the token hashtable - coming from server-side */ -+ rcu_read_lock(); -+ local_bh_disable(); -+ spin_lock(&mptcp_tk_hashlock); -+ -+ /* With lockless listeners, we might process two ACKs at the -+ * same time. With TCP, inet_csk_complete_hashdance takes care -+ * of this. But, for MPTCP this would be too late if we add -+ * this MPTCP-socket in the token table (new subflows might -+ * come in and match on this socket here. -+ * So, we need to check if someone else already added the token -+ * and revert in that case. The other guy won the race... -+ */ -+ if (mptcp_find_token(mpcb->mptcp_loc_token)) { -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+ -+ goto err_insert_token; -+ } -+ __mptcp_hash_insert(meta_tp, mpcb->mptcp_loc_token); -+ -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+ } -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ if (meta_icsk->icsk_af_ops == &mptcp_v6_mapped) { -+ struct tcp6_sock *master_tp6 = (struct tcp6_sock *)master_sk; -+ struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk); -+ -+ inet_sk(master_sk)->pinet6 = &master_tp6->inet6; -+ -+ newnp = inet6_sk(master_sk); -+ memcpy(newnp, np, sizeof(struct ipv6_pinfo)); -+ -+ newnp->ipv6_mc_list = NULL; -+ newnp->ipv6_ac_list = NULL; -+ newnp->ipv6_fl_list = NULL; -+ newnp->pktoptions = NULL; -+ newnp->opt = NULL; -+ -+ newnp->rxopt.all = 0; -+ newnp->repflow = 0; -+ np->rxopt.all = 0; -+ np->repflow = 0; -+ } else if (meta_sk->sk_family == AF_INET6) { -+ struct tcp6_sock *master_tp6 = (struct tcp6_sock *)master_sk; -+ struct ipv6_pinfo *newnp, *np = inet6_sk(meta_sk); -+ struct ipv6_txoptions *opt; -+ -+ inet_sk(master_sk)->pinet6 = &master_tp6->inet6; -+ -+ /* The following heavily inspired from tcp_v6_syn_recv_sock() */ -+ newnp = inet6_sk(master_sk); -+ memcpy(newnp, np, sizeof(struct ipv6_pinfo)); -+ -+ newnp->ipv6_mc_list = NULL; -+ newnp->ipv6_ac_list = NULL; -+ newnp->ipv6_fl_list = NULL; -+ newnp->pktoptions = NULL; -+ newnp->opt = NULL; -+ -+ newnp->rxopt.all = 0; -+ newnp->repflow = 0; -+ np->rxopt.all = 0; -+ np->repflow = 0; -+ -+ opt = rcu_dereference(np->opt); -+ if (opt) { -+ opt = ipv6_dup_options(master_sk, opt); -+ RCU_INIT_POINTER(newnp->opt, opt); -+ } -+ inet_csk(master_sk)->icsk_ext_hdr_len = 0; -+ if (opt) -+ inet_csk(master_sk)->icsk_ext_hdr_len = opt->opt_nflen + -+ opt->opt_flen; -+ } -+#endif -+ -+ meta_tp->mptcp = NULL; -+ -+ meta_tp->write_seq = (u32)snd_idsn; -+ meta_tp->snd_sml = meta_tp->write_seq; -+ meta_tp->snd_una = meta_tp->write_seq; -+ meta_tp->snd_nxt = meta_tp->write_seq; -+ meta_tp->pushed_seq = meta_tp->write_seq; -+ meta_tp->snd_up = meta_tp->write_seq; -+ -+ if (rem_key_set) -+ mptcp_initialize_recv_vars(meta_tp, mpcb, remote_key); -+ -+ meta_tp->snd_wnd = window; -+ meta_tp->retrans_stamp = 0; /* Set in tcp_connect() */ -+ -+ meta_tp->packets_out = 0; -+ meta_icsk->icsk_probes_out = 0; -+ -+ rcu_assign_pointer(inet_sk(meta_sk)->inet_opt, NULL); -+ -+ /* Set mptcp-pointers */ -+ master_tp->mpcb = mpcb; -+ master_tp->meta_sk = meta_sk; -+ meta_tp->mpcb = mpcb; -+ meta_tp->meta_sk = meta_sk; -+ -+ /* Initialize the queues */ -+ master_tp->out_of_order_queue = RB_ROOT; -+ master_sk->tcp_rtx_queue = RB_ROOT; -+ INIT_LIST_HEAD(&master_tp->tsq_node); -+ INIT_LIST_HEAD(&master_tp->tsorted_sent_queue); -+ -+ master_tp->fastopen_req = NULL; -+ -+ master_sk->sk_tsq_flags = 0; -+ /* icsk_bind_hash inherited from the meta, but it will be properly set in -+ * mptcp_create_master_sk. Same operation is done in inet_csk_clone_lock. -+ */ -+ inet_csk(master_sk)->icsk_bind_hash = NULL; -+ -+ /* Init the accept_queue structure, we support a queue of 32 pending -+ * connections, it does not need to be huge, since we only store here -+ * pending subflow creations. -+ */ -+ reqsk_queue_alloc(&meta_icsk->icsk_accept_queue); -+ meta_sk->sk_max_ack_backlog = 32; -+ meta_sk->sk_ack_backlog = 0; -+ -+ if (!sock_flag(meta_sk, SOCK_MPTCP)) { -+ mptcp_enable_static_key_bh(); -+ sock_set_flag(meta_sk, SOCK_MPTCP); -+ } -+ -+ /* Redefine function-pointers as the meta-sk is now fully ready */ -+ meta_tp->mpc = 1; -+ meta_tp->ops = &mptcp_meta_specific; -+ -+ meta_sk->sk_backlog_rcv = mptcp_backlog_rcv; -+ meta_sk->sk_destruct = mptcp_sock_destruct; -+ -+ /* Meta-level retransmit timer */ -+ meta_icsk->icsk_rto *= 2; /* Double of initial - rto */ -+ -+ tcp_init_xmit_timers(master_sk); -+ /* Has been set for sending out the SYN */ -+ inet_csk_clear_xmit_timer(meta_sk, ICSK_TIME_RETRANS); -+ -+ mptcp_mpcb_inherit_sockopts(meta_sk, master_sk); -+ -+ mptcp_init_path_manager(mpcb); -+ mptcp_init_scheduler(mpcb); -+ -+ if (!try_module_get(inet_csk(master_sk)->icsk_ca_ops->owner)) -+ tcp_assign_congestion_control(master_sk); -+ -+ master_tp->saved_syn = NULL; -+ -+ mptcp_debug("%s: created mpcb with token %#x\n", -+ __func__, mpcb->mptcp_loc_token); -+ -+ return 0; -+ -+err_insert_token: -+ kmem_cache_free(mptcp_cb_cache, mpcb); -+ -+err_alloc_mpcb: -+ inet_sk(master_sk)->inet_opt = NULL; -+ master_sk->sk_state = TCP_CLOSE; -+ sock_orphan(master_sk); -+ bh_unlock_sock(master_sk); -+ sk_free(master_sk); -+ -+err_alloc_master: -+ return -ENOBUFS; -+} -+ -+/* Called without holding lock on mpcb */ -+static u8 mptcp_set_new_pathindex(struct mptcp_cb *mpcb) -+{ -+ int i; -+ -+ /* Start at 1, because 0 is reserved for the meta-sk */ -+ for (i = 1; i < sizeof(mpcb->path_index_bits) * 8; i++) { -+ if (!test_and_set_bit(i, &mpcb->path_index_bits)) -+ break; -+ } -+ -+ if (i == sizeof(mpcb->path_index_bits) * 8) -+ return 0; -+ return i; -+} -+ -+/* May be called without holding the meta-level lock */ -+int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id, -+ gfp_t flags) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ tp->mptcp = kmem_cache_zalloc(mptcp_sock_cache, flags); -+ if (!tp->mptcp) -+ return -ENOMEM; -+ -+ tp->mptcp->path_index = mptcp_set_new_pathindex(mpcb); -+ /* No more space for more subflows? */ -+ if (!tp->mptcp->path_index) { -+ kmem_cache_free(mptcp_sock_cache, tp->mptcp); -+ return -EPERM; -+ } -+ -+ INIT_HLIST_NODE(&tp->mptcp->cb_list); -+ -+ tp->mptcp->tp = tp; -+ tp->mpcb = mpcb; -+ tp->meta_sk = meta_sk; -+ -+ if (!sock_flag(sk, SOCK_MPTCP)) { -+ mptcp_enable_static_key_bh(); -+ sock_set_flag(sk, SOCK_MPTCP); -+ } -+ -+ tp->mpc = 1; -+ tp->ops = &mptcp_sub_specific; -+ -+ tp->mptcp->loc_id = loc_id; -+ tp->mptcp->rem_id = rem_id; -+ if (mpcb->sched_ops->init) -+ mpcb->sched_ops->init(sk); -+ -+ /* The corresponding sock_put is in mptcp_sock_destruct(). It cannot be -+ * included in mptcp_del_sock(), because the mpcb must remain alive -+ * until the last subsocket is completely destroyed. -+ */ -+ sock_hold(meta_sk); -+ refcount_inc(&mpcb->mpcb_refcnt); -+ -+ spin_lock_bh(&mpcb->mpcb_list_lock); -+ hlist_add_head_rcu(&tp->mptcp->node, &mpcb->conn_list); -+ spin_unlock_bh(&mpcb->mpcb_list_lock); -+ -+ tp->mptcp->attached = 1; -+ -+ mptcp_sub_inherit_sockopts(meta_sk, sk); -+ INIT_DELAYED_WORK(&tp->mptcp->work, mptcp_sub_close_wq); -+ -+ /* Properly inherit CC from the meta-socket */ -+ mptcp_assign_congestion_control(sk); -+ -+ /* As we successfully allocated the mptcp_tcp_sock, we have to -+ * change the function-pointers here (for sk_destruct to work correctly) -+ */ -+ sk->sk_error_report = mptcp_sock_def_error_report; -+ sk->sk_data_ready = mptcp_data_ready; -+ sk->sk_write_space = mptcp_write_space; -+ sk->sk_state_change = mptcp_set_state; -+ sk->sk_destruct = mptcp_sock_destruct; -+ -+ if (sk->sk_family == AF_INET) -+ mptcp_debug("%s: token %#x pi %d, src_addr:%pI4:%d dst_addr:%pI4:%d\n", -+ __func__ , mpcb->mptcp_loc_token, -+ tp->mptcp->path_index, -+ &((struct inet_sock *)tp)->inet_saddr, -+ ntohs(((struct inet_sock *)tp)->inet_sport), -+ &((struct inet_sock *)tp)->inet_daddr, -+ ntohs(((struct inet_sock *)tp)->inet_dport)); -+#if IS_ENABLED(CONFIG_IPV6) -+ else -+ mptcp_debug("%s: token %#x pi %d, src_addr:%pI6:%d dst_addr:%pI6:%d\n", -+ __func__ , mpcb->mptcp_loc_token, -+ tp->mptcp->path_index, &inet6_sk(sk)->saddr, -+ ntohs(((struct inet_sock *)tp)->inet_sport), -+ &sk->sk_v6_daddr, -+ ntohs(((struct inet_sock *)tp)->inet_dport)); -+#endif -+ -+ return 0; -+} -+ -+void mptcp_del_sock(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_cb *mpcb; -+ -+ if (!tp->mptcp || !tp->mptcp->attached) -+ return; -+ -+ mpcb = tp->mpcb; -+ -+ if (mpcb->sched_ops->release) -+ mpcb->sched_ops->release(sk); -+ -+ if (mpcb->pm_ops->delete_subflow) -+ mpcb->pm_ops->delete_subflow(sk); -+ -+ mptcp_debug("%s: Removing subsock tok %#x pi:%d state %d is_meta? %d\n", -+ __func__, mpcb->mptcp_loc_token, tp->mptcp->path_index, -+ sk->sk_state, is_meta_sk(sk)); -+ -+ spin_lock_bh(&mpcb->mpcb_list_lock); -+ hlist_del_init_rcu(&tp->mptcp->node); -+ spin_unlock_bh(&mpcb->mpcb_list_lock); -+ -+ tp->mptcp->attached = 0; -+ mpcb->path_index_bits &= ~(1 << tp->mptcp->path_index); -+ -+ if (!tcp_write_queue_empty(sk) || !tcp_rtx_queue_empty(sk)) -+ mptcp_reinject_data(sk, 0); -+ -+ if (is_master_tp(tp)) { -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ -+ if (meta_tp->record_master_info && -+ !sock_flag(meta_sk, SOCK_DEAD)) { -+ mpcb->master_info = kmalloc(sizeof(*mpcb->master_info), -+ GFP_ATOMIC); -+ -+ if (mpcb->master_info) -+ tcp_get_info(sk, mpcb->master_info, true); -+ } -+ -+ mpcb->master_sk = NULL; -+ } else if (tp->mptcp->pre_established) { -+ sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer); -+ } -+} -+ -+/* Updates the MPTCP-session based on path-manager information (e.g., addresses, -+ * low-prio flows,...). -+ */ -+void mptcp_update_metasocket(const struct sock *meta_sk) -+{ -+ if (tcp_sk(meta_sk)->mpcb->pm_ops->new_session) -+ tcp_sk(meta_sk)->mpcb->pm_ops->new_session(meta_sk); -+} -+ -+/* Clean up the receive buffer for full frames taken by the user, -+ * then send an ACK if necessary. COPIED is the number of bytes -+ * tcp_recvmsg has given to the user so far, it speeds up the -+ * calculation of whether or not we must ACK for the sake of -+ * a window update. -+ * (inspired from tcp_cleanup_rbuf()) -+ */ -+void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ bool recheck_rcv_window = false; -+ struct mptcp_tcp_sock *mptcp; -+ __u32 rcv_window_now = 0; -+ -+ if (copied > 0 && !(meta_sk->sk_shutdown & RCV_SHUTDOWN)) { -+ rcv_window_now = tcp_receive_window_now(meta_tp); -+ -+ /* Optimize, __mptcp_select_window() is not cheap. */ -+ if (2 * rcv_window_now <= meta_tp->window_clamp) -+ recheck_rcv_window = true; -+ } -+ -+ mptcp_for_each_sub(meta_tp->mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ const struct inet_connection_sock *icsk = inet_csk(sk); -+ -+ if (!mptcp_sk_can_send_ack(sk)) -+ continue; -+ -+ if (!inet_csk_ack_scheduled(sk)) -+ goto second_part; -+ /* Delayed ACKs frequently hit locked sockets during bulk -+ * receive. -+ */ -+ if (icsk->icsk_ack.blocked || -+ /* Once-per-two-segments ACK was not sent by tcp_input.c */ -+ tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || -+ /* If this read emptied read buffer, we send ACK, if -+ * connection is not bidirectional, user drained -+ * receive buffer and there was a small segment -+ * in queue. -+ */ -+ (copied > 0 && -+ ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || -+ ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && -+ !icsk->icsk_ack.pingpong)) && -+ !atomic_read(&meta_sk->sk_rmem_alloc))) { -+ tcp_send_ack(sk); -+ continue; -+ } -+ -+second_part: -+ /* This here is the second part of tcp_cleanup_rbuf */ -+ if (recheck_rcv_window) { -+ __u32 new_window = tp->ops->__select_window(sk); -+ -+ /* Send ACK now, if this read freed lots of space -+ * in our buffer. Certainly, new_window is new window. -+ * We can advertise it now, if it is not less than -+ * current one. -+ * "Lots" means "at least twice" here. -+ */ -+ if (new_window && new_window >= 2 * rcv_window_now) -+ tcp_send_ack(sk); -+ } -+ } -+} -+ -+static int mptcp_sub_send_fin(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct sk_buff *skb = tcp_write_queue_tail(sk); -+ int mss_now; -+ -+ /* Optimization, tack on the FIN if we have a queue of -+ * unsent frames. But be careful about outgoing SACKS -+ * and IP options. -+ */ -+ mss_now = tcp_current_mss(sk); -+ -+ if (tcp_send_head(sk) != NULL) { -+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; -+ TCP_SKB_CB(skb)->end_seq++; -+ tp->write_seq++; -+ } else { -+ skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_ATOMIC); -+ if (!skb) -+ return 1; -+ -+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); -+ skb_reserve(skb, MAX_TCP_HEADER); -+ /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ -+ tcp_init_nondata_skb(skb, tp->write_seq, -+ TCPHDR_ACK | TCPHDR_FIN); -+ tcp_queue_skb(sk, skb); -+ } -+ __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); -+ -+ return 0; -+} -+ -+static void mptcp_sub_close_doit(struct sock *sk) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (sock_flag(sk, SOCK_DEAD)) -+ return; -+ -+ if (meta_sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) { -+ tp->closing = 1; -+ tcp_close(sk, 0); -+ } else if (tcp_close_state(sk)) { -+ sk->sk_shutdown |= SEND_SHUTDOWN; -+ tcp_send_fin(sk); -+ } -+} -+ -+void mptcp_sub_close_wq(struct work_struct *work) -+{ -+ struct tcp_sock *tp = container_of(work, struct mptcp_tcp_sock, work.work)->tp; -+ struct sock *sk = (struct sock *)tp; -+ struct mptcp_cb *mpcb = tp->mpcb; -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ mptcp_sub_close_doit(sk); -+ -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ mptcp_mpcb_put(mpcb); -+ sock_put(sk); -+} -+ -+void mptcp_sub_close(struct sock *sk, unsigned long delay) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct delayed_work *work = &tcp_sk(sk)->mptcp->work; -+ -+ /* We are already closing - e.g., call from sock_def_error_report upon -+ * tcp_disconnect in tcp_close. -+ */ -+ if (tp->closing) -+ return; -+ -+ /* Work already scheduled ? */ -+ if (work_pending(&work->work)) { -+ /* Work present - who will be first ? */ -+ if (jiffies + delay > work->timer.expires) -+ return; -+ -+ /* Try canceling - if it fails, work will be executed soon */ -+ if (!cancel_delayed_work(work)) -+ return; -+ sock_put(sk); -+ mptcp_mpcb_put(tp->mpcb); -+ } -+ -+ if (!delay) { -+ unsigned char old_state = sk->sk_state; -+ -+ /* We directly send the FIN. Because it may take so a long time, -+ * untile the work-queue will get scheduled... -+ * -+ * If mptcp_sub_send_fin returns 1, it failed and thus we reset -+ * the old state so that tcp_close will finally send the fin -+ * in user-context. -+ */ -+ if (!sk->sk_err && old_state != TCP_CLOSE && -+ tcp_close_state(sk) && mptcp_sub_send_fin(sk)) { -+ if (old_state == TCP_ESTABLISHED) -+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); -+ sk->sk_state = old_state; -+ } -+ } -+ -+ sock_hold(sk); -+ refcount_inc(&tp->mpcb->mpcb_refcnt); -+ queue_delayed_work(mptcp_wq, work, delay); -+} -+ -+void mptcp_sub_force_close(struct sock *sk) -+{ -+ /* The below tcp_done may have freed the socket, if he is already dead. -+ * Thus, we are not allowed to access it afterwards. That's why -+ * we have to store the dead-state in this local variable. -+ */ -+ int sock_is_dead = sock_flag(sk, SOCK_DEAD); -+ -+ tcp_sk(sk)->mp_killed = 1; -+ -+ if (sk->sk_state != TCP_CLOSE) -+ tcp_done(sk); -+ -+ if (!sock_is_dead) -+ mptcp_sub_close(sk, 0); -+} -+EXPORT_SYMBOL(mptcp_sub_force_close); -+ -+/* Update the mpcb send window, based on the contributions -+ * of each subflow -+ */ -+void mptcp_update_sndbuf(const struct tcp_sock *tp) -+{ -+ struct sock *meta_sk = tp->meta_sk; -+ int new_sndbuf = 0, old_sndbuf = meta_sk->sk_sndbuf; -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ -+ if (!mptcp_sk_can_send(sk)) -+ continue; -+ -+ new_sndbuf += sk->sk_sndbuf; -+ -+ if (new_sndbuf > sock_net(meta_sk)->ipv4.sysctl_tcp_wmem[2] || -+ new_sndbuf < 0) { -+ new_sndbuf = sock_net(meta_sk)->ipv4.sysctl_tcp_wmem[2]; -+ break; -+ } -+ } -+ meta_sk->sk_sndbuf = max(min(new_sndbuf, -+ sock_net(meta_sk)->ipv4.sysctl_tcp_wmem[2]), -+ meta_sk->sk_sndbuf); -+ -+ /* The subflow's call to sk_write_space in tcp_new_space ends up in -+ * mptcp_write_space. -+ * It has nothing to do with waking up the application. -+ * So, we do it here. -+ */ -+ if (old_sndbuf != meta_sk->sk_sndbuf) -+ meta_sk->sk_write_space(meta_sk); -+} -+ -+/* Similar to: tcp_close */ -+void mptcp_close(struct sock *meta_sk, long timeout) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct mptcp_tcp_sock *mptcp; -+ struct sk_buff *skb; -+ int data_was_unread = 0; -+ int state; -+ -+ mptcp_debug("%s: Close of meta_sk with tok %#x\n", -+ __func__, mpcb->mptcp_loc_token); -+ -+ WARN_ON(refcount_inc_not_zero(&mpcb->mpcb_refcnt) == 0); -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ if (meta_tp->inside_tk_table) -+ /* Detach the mpcb from the token hashtable */ -+ mptcp_hash_remove_bh(meta_tp); -+ -+ meta_sk->sk_shutdown = SHUTDOWN_MASK; -+ /* We need to flush the recv. buffs. We do this only on the -+ * descriptor close, not protocol-sourced closes, because the -+ * reader process may not have drained the data yet! -+ */ -+ while ((skb = __skb_dequeue(&meta_sk->sk_receive_queue)) != NULL) { -+ u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; -+ -+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) -+ len--; -+ data_was_unread += len; -+ __kfree_skb(skb); -+ } -+ -+ sk_mem_reclaim(meta_sk); -+ -+ /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ -+ if (meta_sk->sk_state == TCP_CLOSE) { -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(sk_it)->send_mp_fclose) -+ continue; -+ mptcp_sub_close(sk_it, 0); -+ } -+ goto adjudge_to_death; -+ } -+ -+ if (data_was_unread) { -+ /* Unread data was tossed, zap the connection. */ -+ NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPABORTONCLOSE); -+ tcp_set_state(meta_sk, TCP_CLOSE); -+ tcp_sk(meta_sk)->ops->send_active_reset(meta_sk, -+ meta_sk->sk_allocation); -+ } else if (sock_flag(meta_sk, SOCK_LINGER) && !meta_sk->sk_lingertime) { -+ /* Check zero linger _after_ checking for unread data. */ -+ meta_sk->sk_prot->disconnect(meta_sk, 0); -+ NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA); -+ } else if (tcp_close_state(meta_sk)) { -+ mptcp_send_fin(meta_sk); -+ } else if (meta_tp->snd_una == meta_tp->write_seq) { -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ /* The DATA_FIN has been sent and acknowledged -+ * (e.g., by sk_shutdown). Close all the other subflows -+ */ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ unsigned long delay = 0; -+ /* If we are the passive closer, don't trigger -+ * subflow-fin until the subflow has been finned -+ * by the peer. - thus we add a delay -+ */ -+ if (mpcb->passive_close && -+ sk_it->sk_state == TCP_ESTABLISHED) -+ delay = inet_csk(sk_it)->icsk_rto << 3; -+ -+ mptcp_sub_close(sk_it, delay); -+ } -+ } -+ -+ sk_stream_wait_close(meta_sk, timeout); -+ -+adjudge_to_death: -+ state = meta_sk->sk_state; -+ sock_hold(meta_sk); -+ sock_orphan(meta_sk); -+ -+ /* socket will be freed after mptcp_close - we have to prevent -+ * access from the subflows. -+ */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ /* Similar to sock_orphan, but we don't set it DEAD, because -+ * the callbacks are still set and must be called. -+ */ -+ write_lock_bh(&sk_it->sk_callback_lock); -+ sk_set_socket(sk_it, NULL); -+ sk_it->sk_wq = NULL; -+ write_unlock_bh(&sk_it->sk_callback_lock); -+ } -+ -+ if (mpcb->pm_ops->close_session) -+ mpcb->pm_ops->close_session(meta_sk); -+ -+ /* It is the last release_sock in its life. It will remove backlog. */ -+ release_sock(meta_sk); -+ -+ /* Now socket is owned by kernel and we acquire BH lock -+ * to finish close. No need to check for user refs. -+ */ -+ local_bh_disable(); -+ bh_lock_sock(meta_sk); -+ WARN_ON(sock_owned_by_user(meta_sk)); -+ -+ percpu_counter_inc(meta_sk->sk_prot->orphan_count); -+ -+ /* Have we already been destroyed by a softirq or backlog? */ -+ if (state != TCP_CLOSE && meta_sk->sk_state == TCP_CLOSE) -+ goto out; -+ -+ /* This is a (useful) BSD violating of the RFC. There is a -+ * problem with TCP as specified in that the other end could -+ * keep a socket open forever with no application left this end. -+ * We use a 3 minute timeout (about the same as BSD) then kill -+ * our end. If they send after that then tough - BUT: long enough -+ * that we won't make the old 4*rto = almost no time - whoops -+ * reset mistake. -+ * -+ * Nope, it was not mistake. It is really desired behaviour -+ * f.e. on http servers, when such sockets are useless, but -+ * consume significant resources. Let's do it with special -+ * linger2 option. --ANK -+ */ -+ -+ if (meta_sk->sk_state == TCP_FIN_WAIT2) { -+ if (meta_tp->linger2 < 0) { -+ tcp_set_state(meta_sk, TCP_CLOSE); -+ meta_tp->ops->send_active_reset(meta_sk, GFP_ATOMIC); -+ __NET_INC_STATS(sock_net(meta_sk), -+ LINUX_MIB_TCPABORTONLINGER); -+ } else { -+ const int tmo = tcp_fin_time(meta_sk); -+ -+ if (tmo > TCP_TIMEWAIT_LEN) { -+ inet_csk_reset_keepalive_timer(meta_sk, -+ tmo - TCP_TIMEWAIT_LEN); -+ } else { -+ meta_tp->ops->time_wait(meta_sk, TCP_FIN_WAIT2, -+ tmo); -+ goto out; -+ } -+ } -+ } -+ if (meta_sk->sk_state != TCP_CLOSE) { -+ sk_mem_reclaim(meta_sk); -+ if (tcp_check_oom(meta_sk, 0)) { -+ if (net_ratelimit()) -+ pr_info("MPTCP: out of memory: force closing socket\n"); -+ tcp_set_state(meta_sk, TCP_CLOSE); -+ meta_tp->ops->send_active_reset(meta_sk, GFP_ATOMIC); -+ __NET_INC_STATS(sock_net(meta_sk), -+ LINUX_MIB_TCPABORTONMEMORY); -+ } -+ } -+ -+ -+ if (meta_sk->sk_state == TCP_CLOSE) -+ inet_csk_destroy_sock(meta_sk); -+ /* Otherwise, socket is reprieved until protocol close. */ -+ -+out: -+ bh_unlock_sock(meta_sk); -+ local_bh_enable(); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ mptcp_mpcb_put(mpcb); -+ sock_put(meta_sk); /* Taken by sock_hold */ -+} -+ -+void mptcp_disconnect(struct sock *meta_sk) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ __skb_queue_purge(&meta_tp->mpcb->reinject_queue); -+ -+ if (meta_tp->inside_tk_table) -+ mptcp_hash_remove_bh(meta_tp); -+ -+ local_bh_disable(); -+ mptcp_for_each_sub_safe(meta_tp->mpcb, mptcp, tmp) { -+ struct sock *subsk = mptcp_to_sock(mptcp); -+ -+ if (spin_is_locked(&subsk->sk_lock.slock)) -+ bh_unlock_sock(subsk); -+ -+ tcp_sk(subsk)->tcp_disconnect = 1; -+ -+ meta_sk->sk_prot->disconnect(subsk, O_NONBLOCK); -+ -+ sock_orphan(subsk); -+ -+ percpu_counter_inc(meta_sk->sk_prot->orphan_count); -+ -+ inet_csk_destroy_sock(subsk); -+ } -+ local_bh_enable(); -+ -+ mptcp_mpcb_cleanup(meta_tp->mpcb); -+ meta_tp->meta_sk = NULL; -+ -+ meta_tp->send_mp_fclose = 0; -+ meta_tp->mpc = 0; -+ meta_tp->ops = &tcp_specific; -+#if IS_ENABLED(CONFIG_IPV6) -+ if (meta_sk->sk_family == AF_INET6) -+ meta_sk->sk_backlog_rcv = tcp_v6_do_rcv; -+ else -+ meta_sk->sk_backlog_rcv = tcp_v4_do_rcv; -+#else -+ meta_sk->sk_backlog_rcv = tcp_v4_do_rcv; -+#endif -+ meta_sk->sk_destruct = inet_sock_destruct; -+} -+ -+ -+/* Returns True if we should enable MPTCP for that socket. */ -+bool mptcp_doit(struct sock *sk) -+{ -+ const struct dst_entry *dst = __sk_dst_get(sk); -+ -+ /* Don't do mptcp over loopback */ -+ if (sk->sk_family == AF_INET && -+ (ipv4_is_loopback(inet_sk(sk)->inet_daddr) || -+ ipv4_is_loopback(inet_sk(sk)->inet_saddr))) -+ return false; -+#if IS_ENABLED(CONFIG_IPV6) -+ if (sk->sk_family == AF_INET6 && -+ (ipv6_addr_loopback(&sk->sk_v6_daddr) || -+ ipv6_addr_loopback(&inet6_sk(sk)->saddr))) -+ return false; -+#endif -+ if (mptcp_v6_is_v4_mapped(sk) && -+ ipv4_is_loopback(inet_sk(sk)->inet_saddr)) -+ return false; -+ -+#ifdef CONFIG_TCP_MD5SIG -+ /* If TCP_MD5SIG is enabled, do not do MPTCP - there is no Option-Space */ -+ if (tcp_sk(sk)->af_specific->md5_lookup(sk, sk)) -+ return false; -+#endif -+ -+ if (dst->dev && (dst->dev->flags & IFF_NOMULTIPATH)) -+ return false; -+ -+ return true; -+} -+ -+int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, -+ int rem_key_set, __u8 mptcp_ver, u32 window) -+{ -+ struct tcp_sock *master_tp; -+ struct sock *master_sk; -+ -+ if (mptcp_alloc_mpcb(meta_sk, remote_key, rem_key_set, mptcp_ver, window)) -+ goto err_alloc_mpcb; -+ -+ master_sk = tcp_sk(meta_sk)->mpcb->master_sk; -+ master_tp = tcp_sk(master_sk); -+ -+ if (mptcp_add_sock(meta_sk, master_sk, 0, 0, GFP_ATOMIC)) -+ goto err_add_sock; -+ -+ if (__inet_inherit_port(meta_sk, master_sk) < 0) -+ goto err_add_sock; -+ -+ meta_sk->sk_prot->unhash(meta_sk); -+ inet_ehash_nolisten(master_sk, NULL); -+ -+ master_tp->mptcp->init_rcv_wnd = master_tp->rcv_wnd; -+ -+ return 0; -+ -+err_add_sock: -+ inet_csk_prepare_forced_close(master_sk); -+ tcp_done(master_sk); -+ -+err_alloc_mpcb: -+ return -ENOBUFS; -+} -+ -+static int __mptcp_check_req_master(struct sock *child, -+ const struct mptcp_options_received *mopt, -+ struct request_sock *req) -+{ -+ struct tcp_sock *child_tp = tcp_sk(child); -+ struct sock *meta_sk = child; -+ struct mptcp_cb *mpcb; -+ struct mptcp_request_sock *mtreq; -+ -+ /* Never contained an MP_CAPABLE */ -+ if (!inet_rsk(req)->mptcp_rqsk) -+ return 1; -+ -+ mtreq = mptcp_rsk(req); -+ -+ if (!inet_rsk(req)->saw_mpc) { -+ /* Fallback to regular TCP, because we saw one SYN without -+ * MP_CAPABLE. In tcp_check_req we continue the regular path. -+ * But, the socket has been added to the reqsk_tk_htb, so we -+ * must still remove it. -+ */ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); -+ mptcp_reqsk_remove_tk(req); -+ return 1; -+ } -+ -+ /* mopt can be NULL when coming from FAST-OPEN */ -+ if (mopt && mopt->saw_mpc && mtreq->mptcp_ver == MPTCP_VERSION_1) { -+ mtreq->mptcp_rem_key = mopt->mptcp_sender_key; -+ mtreq->rem_key_set = 1; -+ } -+ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); -+ -+ /* Just set this values to pass them to mptcp_alloc_mpcb */ -+ child_tp->mptcp_loc_key = mtreq->mptcp_loc_key; -+ child_tp->mptcp_loc_token = mtreq->mptcp_loc_token; -+ -+ if (mptcp_create_master_sk(meta_sk, mtreq->mptcp_rem_key, -+ mtreq->rem_key_set, mtreq->mptcp_ver, -+ child_tp->snd_wnd)) { -+ inet_csk_prepare_forced_close(meta_sk); -+ tcp_done(meta_sk); -+ -+ return -ENOBUFS; -+ } -+ -+ child = tcp_sk(child)->mpcb->master_sk; -+ child_tp = tcp_sk(child); -+ mpcb = child_tp->mpcb; -+ -+ child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn; -+ child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn; -+ -+ mpcb->dss_csum = mtreq->dss_csum; -+ mpcb->server_side = 1; -+ -+ /* Needs to be done here additionally, because when accepting a -+ * new connection we pass by __reqsk_free and not reqsk_free. -+ */ -+ mptcp_reqsk_remove_tk(req); -+ -+ /* Hold when creating the meta-sk in tcp_vX_syn_recv_sock. */ -+ sock_put(meta_sk); -+ -+ return 0; -+} -+ -+int mptcp_check_req_fastopen(struct sock *child, struct request_sock *req) -+{ -+ struct sock *meta_sk = child, *master_sk; -+ struct sk_buff *skb; -+ u32 new_mapping; -+ int ret; -+ -+ ret = __mptcp_check_req_master(child, NULL, req); -+ if (ret) -+ return ret; -+ -+ master_sk = tcp_sk(meta_sk)->mpcb->master_sk; -+ -+ /* We need to rewind copied_seq as it is set to IDSN + 1 and as we have -+ * pre-MPTCP data in the receive queue. -+ */ -+ tcp_sk(meta_sk)->copied_seq -= tcp_sk(master_sk)->rcv_nxt - -+ tcp_rsk(req)->rcv_isn - 1; -+ -+ /* Map subflow sequence number to data sequence numbers. We need to map -+ * these data to [IDSN - len - 1, IDSN[. -+ */ -+ new_mapping = tcp_sk(meta_sk)->copied_seq - tcp_rsk(req)->rcv_isn - 1; -+ -+ /* There should be only one skb: the SYN + data. */ -+ skb_queue_walk(&meta_sk->sk_receive_queue, skb) { -+ TCP_SKB_CB(skb)->seq += new_mapping; -+ TCP_SKB_CB(skb)->end_seq += new_mapping; -+ } -+ -+ /* With fastopen we change the semantics of the relative subflow -+ * sequence numbers to deal with middleboxes that could add/remove -+ * multiple bytes in the SYN. We chose to start counting at rcv_nxt - 1 -+ * instead of the regular TCP ISN. -+ */ -+ tcp_sk(master_sk)->mptcp->rcv_isn = tcp_sk(master_sk)->rcv_nxt - 1; -+ -+ /* We need to update copied_seq of the master_sk to account for the -+ * already moved data to the meta receive queue. -+ */ -+ tcp_sk(master_sk)->copied_seq = tcp_sk(master_sk)->rcv_nxt; -+ -+ /* Handled by the master_sk */ -+ tcp_sk(meta_sk)->fastopen_rsk = NULL; -+ -+ return 0; -+} -+ -+int mptcp_check_req_master(struct sock *sk, struct sock *child, -+ struct request_sock *req, const struct sk_buff *skb, -+ const struct mptcp_options_received *mopt, -+ int drop, u32 tsoff) -+{ -+ struct sock *meta_sk = child; -+ int ret; -+ -+ ret = __mptcp_check_req_master(child, mopt, req); -+ if (ret) -+ return ret; -+ child = tcp_sk(child)->mpcb->master_sk; -+ -+ sock_rps_save_rxhash(child, skb); -+ -+ /* drop indicates that we come from tcp_check_req and thus need to -+ * handle the request-socket fully. -+ */ -+ if (drop) { -+ tcp_synack_rtt_meas(child, req); -+ -+ inet_csk_reqsk_queue_drop(sk, req); -+ reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); -+ if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) { -+ bh_unlock_sock(meta_sk); -+ /* No sock_put() of the meta needed. The reference has -+ * already been dropped in __mptcp_check_req_master(). -+ */ -+ sock_put(child); -+ return -1; -+ } -+ } else { -+ /* Thus, we come from syn-cookies */ -+ refcount_set(&req->rsk_refcnt, 1); -+ tcp_sk(meta_sk)->tsoffset = tsoff; -+ if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) { -+ bh_unlock_sock(meta_sk); -+ /* No sock_put() of the meta needed. The reference has -+ * already been dropped in __mptcp_check_req_master(). -+ */ -+ sock_put(child); -+ reqsk_put(req); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ -+/* May be called without holding the meta-level lock */ -+struct sock *mptcp_check_req_child(struct sock *meta_sk, -+ struct sock *child, -+ struct request_sock *req, -+ struct sk_buff *skb, -+ const struct mptcp_options_received *mopt) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ struct tcp_sock *child_tp = tcp_sk(child); -+ u8 hash_mac_check[SHA256_DIGEST_SIZE]; -+ -+ if (!mopt->join_ack) { -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINACKFAIL); -+ goto teardown; -+ } -+ -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 2, -+ 4, (u8 *)&mtreq->mptcp_rem_nonce, -+ 4, (u8 *)&mtreq->mptcp_loc_nonce); -+ -+ if (memcmp(hash_mac_check, (char *)&mopt->mptcp_recv_mac, 20)) { -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINACKMAC); -+ goto teardown; -+ } -+ -+ /* Point it to the same struct socket and wq as the meta_sk */ -+ sk_set_socket(child, meta_sk->sk_socket); -+ child->sk_wq = meta_sk->sk_wq; -+ -+ if (mptcp_add_sock(meta_sk, child, mtreq->loc_id, mtreq->rem_id, GFP_ATOMIC)) { -+ /* Has been inherited, but now child_tp->mptcp is NULL */ -+ child_tp->mpc = 0; -+ child_tp->ops = &tcp_specific; -+ -+ /* TODO when we support acking the third ack for new subflows, -+ * we should silently discard this third ack, by returning NULL. -+ * -+ * Maybe, at the retransmission we will have enough memory to -+ * fully add the socket to the meta-sk. -+ */ -+ goto teardown; -+ } -+ -+ /* The child is a clone of the meta socket, we must now reset -+ * some of the fields -+ */ -+ child_tp->mptcp->rcv_low_prio = mtreq->rcv_low_prio; -+ -+ /* We should allow proper increase of the snd/rcv-buffers. Thus, we -+ * use the original values instead of the bloated up ones from the -+ * clone. -+ */ -+ child->sk_sndbuf = mpcb->orig_sk_sndbuf; -+ child->sk_rcvbuf = mpcb->orig_sk_rcvbuf; -+ -+ child_tp->mptcp->slave_sk = 1; -+ child_tp->mptcp->snt_isn = tcp_rsk(req)->snt_isn; -+ child_tp->mptcp->rcv_isn = tcp_rsk(req)->rcv_isn; -+ child_tp->mptcp->init_rcv_wnd = req->rsk_rcv_wnd; -+ -+ child->sk_tsq_flags = 0; -+ -+ child_tp->packets_out = 0; -+ -+ tcp_reset_vars(child); -+ -+ sock_rps_save_rxhash(child, skb); -+ tcp_synack_rtt_meas(child, req); -+ -+ if (mpcb->pm_ops->established_subflow) -+ mpcb->pm_ops->established_subflow(child); -+ -+ /* Subflows do not use the accept queue, as they -+ * are attached immediately to the mpcb. -+ */ -+ inet_csk_reqsk_queue_drop(meta_sk, req); -+ reqsk_queue_removed(&inet_csk(meta_sk)->icsk_accept_queue, req); -+ -+ /* The refcnt is initialized to 2, because regular TCP will put him -+ * in the socket's listener queue. However, we do not have a listener-queue. -+ * So, we need to make sure that this request-sock indeed gets destroyed. -+ */ -+ reqsk_put(req); -+ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINACKRX); -+ -+ if (inet_sk(child)->inet_sport != inet_sk(meta_sk)->inet_sport) -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINALTERNATEPORT); -+ -+ return child; -+ -+teardown: -+ req->rsk_ops->send_reset(meta_sk, skb); -+ -+ /* Drop this request - sock creation failed. */ -+ inet_csk_reqsk_queue_drop(meta_sk, req); -+ reqsk_queue_removed(&inet_csk(meta_sk)->icsk_accept_queue, req); -+ inet_csk_prepare_forced_close(child); -+ tcp_done(child); -+ bh_unlock_sock(meta_sk); -+ return meta_sk; -+} -+ -+int mptcp_init_tw_sock(struct sock *sk, struct tcp_timewait_sock *tw) -+{ -+ struct mptcp_tw *mptw; -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ -+ /* A subsocket in tw can only receive data. So, if we are in -+ * infinite-receive, then we should not reply with a data-ack or act -+ * upon general MPTCP-signaling. We prevent this by simply not creating -+ * the mptcp_tw_sock. -+ */ -+ if (mpcb->infinite_mapping_rcv) { -+ tw->mptcp_tw = NULL; -+ return 0; -+ } -+ -+ /* Alloc MPTCP-tw-sock */ -+ mptw = kmem_cache_alloc(mptcp_tw_cache, GFP_ATOMIC); -+ if (!mptw) { -+ tw->mptcp_tw = NULL; -+ return -ENOBUFS; -+ } -+ -+ refcount_inc(&mpcb->mpcb_refcnt); -+ -+ tw->mptcp_tw = mptw; -+ mptw->loc_key = mpcb->mptcp_loc_key; -+ mptw->meta_tw = mpcb->in_time_wait; -+ mptw->rcv_nxt = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp)); -+ if (mptw->meta_tw && mpcb->mptw_state != TCP_TIME_WAIT) -+ mptw->rcv_nxt++; -+ rcu_assign_pointer(mptw->mpcb, mpcb); -+ -+ spin_lock_bh(&mpcb->mpcb_list_lock); -+ list_add_rcu(&mptw->list, &tp->mpcb->tw_list); -+ mptw->in_list = 1; -+ spin_unlock_bh(&mpcb->mpcb_list_lock); -+ -+ return 0; -+} -+ -+void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) -+{ -+ struct mptcp_cb *mpcb; -+ -+ rcu_read_lock(); -+ local_bh_disable(); -+ mpcb = rcu_dereference(tw->mptcp_tw->mpcb); -+ -+ /* If we are still holding a ref to the mpcb, we have to remove ourself -+ * from the list and drop the ref properly. -+ */ -+ if (mpcb && refcount_inc_not_zero(&mpcb->mpcb_refcnt)) { -+ spin_lock(&mpcb->mpcb_list_lock); -+ if (tw->mptcp_tw->in_list) { -+ list_del_rcu(&tw->mptcp_tw->list); -+ tw->mptcp_tw->in_list = 0; -+ /* Put, because we added it to the list */ -+ mptcp_mpcb_put(mpcb); -+ } -+ spin_unlock(&mpcb->mpcb_list_lock); -+ -+ /* Second time, because we increased it above */ -+ mptcp_mpcb_put(mpcb); -+ } -+ -+ local_bh_enable(); -+ rcu_read_unlock(); -+ -+ kmem_cache_free(mptcp_tw_cache, tw->mptcp_tw); -+} -+ -+/* Updates the rcv_nxt of the time-wait-socks and allows them to ack a -+ * data-fin. -+ */ -+void mptcp_time_wait(struct sock *meta_sk, int state, int timeo) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_tw *mptw; -+ -+ if (mptcp_in_infinite_mapping_weak(meta_tp->mpcb)) { -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ mptcp_for_each_sub_safe(meta_tp->mpcb, mptcp, tmp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (sk_it->sk_state == TCP_CLOSE) -+ continue; -+ -+ tcp_sk(sk_it)->ops->time_wait(sk_it, state, timeo); -+ } -+ } -+ -+ /* Used for sockets that go into tw after the meta -+ * (see mptcp_init_tw_sock()) -+ */ -+ meta_tp->mpcb->in_time_wait = 1; -+ meta_tp->mpcb->mptw_state = state; -+ -+ /* Update the time-wait-sock's information */ -+ rcu_read_lock(); -+ local_bh_disable(); -+ list_for_each_entry_rcu(mptw, &meta_tp->mpcb->tw_list, list) { -+ mptw->meta_tw = 1; -+ mptw->rcv_nxt = mptcp_get_rcv_nxt_64(meta_tp); -+ -+ /* We want to ack a DATA_FIN, but are yet in FIN_WAIT_2 - -+ * pretend as if the DATA_FIN has already reached us, that way -+ * the checks in tcp_timewait_state_process will be good as the -+ * DATA_FIN comes in. -+ */ -+ if (state != TCP_TIME_WAIT) -+ mptw->rcv_nxt++; -+ } -+ local_bh_enable(); -+ rcu_read_unlock(); -+ -+ if (meta_sk->sk_state != TCP_CLOSE) -+ tcp_done(meta_sk); -+} -+ -+void mptcp_tsq_flags(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ /* It will be handled as a regular deferred-call */ -+ if (is_meta_sk(sk)) -+ return; -+ -+ if (hlist_unhashed(&tp->mptcp->cb_list)) { -+ hlist_add_head(&tp->mptcp->cb_list, &tp->mpcb->callback_list); -+ /* We need to hold it here, as the sock_hold is not assured -+ * by the release_sock as it is done in regular TCP. -+ * -+ * The subsocket may get inet_csk_destroy'd while it is inside -+ * the callback_list. -+ */ -+ sock_hold(sk); -+ } -+ -+ if (!test_and_set_bit(MPTCP_SUB_DEFERRED, &meta_sk->sk_tsq_flags)) -+ sock_hold(meta_sk); -+} -+ -+void mptcp_tsq_sub_deferred(struct sock *meta_sk) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ __sock_put(meta_sk); -+ hlist_for_each_entry_safe(mptcp, tmp, &meta_tp->mpcb->callback_list, cb_list) { -+ struct tcp_sock *tp = mptcp->tp; -+ struct sock *sk = (struct sock *)tp; -+ -+ hlist_del_init(&mptcp->cb_list); -+ sk->sk_prot->release_cb(sk); -+ /* Final sock_put (cfr. mptcp_tsq_flags) */ -+ sock_put(sk); -+ } -+} -+ -+/* May be called without holding the meta-level lock */ -+void mptcp_join_reqsk_init(const struct mptcp_cb *mpcb, -+ const struct request_sock *req, -+ struct sk_buff *skb) -+{ -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ u8 mptcp_hash_mac[SHA256_DIGEST_SIZE]; -+ struct mptcp_options_received mopt; -+ -+ mptcp_init_mp_opt(&mopt); -+ tcp_parse_mptcp_options(skb, &mopt); -+ -+ mtreq->is_sub = 1; -+ inet_rsk(req)->mptcp_rqsk = 1; -+ -+ mtreq->mptcp_rem_nonce = mopt.mptcp_recv_nonce; -+ -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, -+ (u8 *)&mpcb->mptcp_rem_key, mptcp_hash_mac, 2, -+ 4, (u8 *)&mtreq->mptcp_loc_nonce, -+ 4, (u8 *)&mtreq->mptcp_rem_nonce); -+ mtreq->mptcp_hash_tmac = *(u64 *)mptcp_hash_mac; -+ -+ mtreq->rem_id = mopt.rem_id; -+ mtreq->rcv_low_prio = mopt.low_prio; -+ inet_rsk(req)->saw_mpc = 1; -+ -+ MPTCP_INC_STATS(sock_net(mpcb->meta_sk), MPTCP_MIB_JOINSYNRX); -+} -+ -+void mptcp_reqsk_init(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, bool want_cookie) -+{ -+ struct mptcp_options_received mopt; -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ -+ mptcp_init_mp_opt(&mopt); -+ tcp_parse_mptcp_options(skb, &mopt); -+ -+ mtreq->dss_csum = mopt.dss_csum; -+ -+ if (want_cookie) { -+ if (!mptcp_reqsk_new_cookie(req, sk, &mopt, skb)) -+ /* No key available - back to regular TCP */ -+ inet_rsk(req)->mptcp_rqsk = 0; -+ return; -+ } -+ -+ mptcp_reqsk_new_mptcp(req, sk, &mopt, skb); -+} -+ -+void mptcp_cookies_reqsk_init(struct request_sock *req, -+ struct mptcp_options_received *mopt, -+ struct sk_buff *skb) -+{ -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ -+ /* Absolutely need to always initialize this. */ -+ mtreq->hash_entry.pprev = NULL; -+ -+ mtreq->mptcp_ver = mopt->mptcp_ver; -+ mtreq->mptcp_rem_key = mopt->mptcp_sender_key; -+ mtreq->mptcp_loc_key = mopt->mptcp_receiver_key; -+ mtreq->rem_key_set = 1; -+ -+ /* Generate the token */ -+ mptcp_key_hash(mtreq->mptcp_ver, mtreq->mptcp_loc_key, &mtreq->mptcp_loc_token, NULL); -+ -+ rcu_read_lock(); -+ local_bh_disable(); -+ spin_lock(&mptcp_tk_hashlock); -+ -+ /* Check, if the key is still free */ -+ if (mptcp_reqsk_find_tk(mtreq->mptcp_loc_token) || -+ mptcp_find_token(mtreq->mptcp_loc_token)) -+ goto out; -+ -+ inet_rsk(req)->saw_mpc = 1; -+ mtreq->is_sub = 0; -+ inet_rsk(req)->mptcp_rqsk = 1; -+ mtreq->dss_csum = mopt->dss_csum; -+ -+out: -+ spin_unlock(&mptcp_tk_hashlock); -+ local_bh_enable(); -+ rcu_read_unlock(); -+} -+ -+int mptcp_conn_request(struct sock *sk, struct sk_buff *skb) -+{ -+ struct mptcp_options_received mopt; -+ -+ mptcp_init_mp_opt(&mopt); -+ tcp_parse_mptcp_options(skb, &mopt); -+ -+ if (mopt.is_mp_join) -+ return mptcp_do_join_short(skb, &mopt, sock_net(sk)); -+ if (mopt.drop_me) -+ goto drop; -+ -+ if (!sock_flag(sk, SOCK_MPTCP)) -+ mopt.saw_mpc = 0; -+ -+ /* If the requested version is higher than what we support, fall back */ -+ if (mopt.saw_mpc && mopt.mptcp_ver > tcp_sk(sk)->mptcp_ver) -+ mopt.saw_mpc = 0; -+ -+ if (skb->protocol == htons(ETH_P_IP)) { -+ if (mopt.saw_mpc) { -+ if (skb_rtable(skb)->rt_flags & -+ (RTCF_BROADCAST | RTCF_MULTICAST)) -+ goto drop; -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVE); -+ return tcp_conn_request(&mptcp_request_sock_ops, -+ &mptcp_request_sock_ipv4_ops, -+ sk, skb); -+ } -+ -+ return tcp_v4_conn_request(sk, skb); -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { -+ if (mopt.saw_mpc) { -+ if (!ipv6_unicast_destination(skb)) -+ goto drop; -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVE); -+ return tcp_conn_request(&mptcp6_request_sock_ops, -+ &mptcp_request_sock_ipv6_ops, -+ sk, skb); -+ } -+ -+ return tcp_v6_conn_request(sk, skb); -+#endif -+ } -+drop: -+ NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); -+ return 0; -+} -+ -+int mptcp_finish_handshake(struct sock *child, struct sk_buff *skb) -+ __releases(&child->sk_lock.slock) -+{ -+ int ret; -+ -+ /* We don't call tcp_child_process here, because we hold -+ * already the meta-sk-lock and are sure that it is not owned -+ * by the user. -+ */ -+ tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); -+ ret = tcp_rcv_state_process(child, skb); -+ bh_unlock_sock(child); -+ sock_put(child); -+ -+ return ret; -+} -+ -+static void __mptcp_get_info(const struct sock *meta_sk, -+ struct mptcp_meta_info *info) -+{ -+ const struct inet_connection_sock *meta_icsk = inet_csk(meta_sk); -+ const struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ u32 now = tcp_jiffies32; -+ -+ memset(info, 0, sizeof(*info)); -+ -+ info->mptcpi_state = meta_sk->sk_state; -+ info->mptcpi_retransmits = meta_icsk->icsk_retransmits; -+ info->mptcpi_probes = meta_icsk->icsk_probes_out; -+ info->mptcpi_backoff = meta_icsk->icsk_backoff; -+ -+ info->mptcpi_rto = jiffies_to_usecs(meta_icsk->icsk_rto); -+ -+ info->mptcpi_unacked = meta_tp->packets_out; -+ -+ info->mptcpi_last_data_sent = jiffies_to_msecs(now - meta_tp->lsndtime); -+ info->mptcpi_last_data_recv = jiffies_to_msecs(now - meta_icsk->icsk_ack.lrcvtime); -+ info->mptcpi_last_ack_recv = jiffies_to_msecs(now - meta_tp->rcv_tstamp); -+ -+ info->mptcpi_total_retrans = meta_tp->total_retrans; -+ -+ info->mptcpi_bytes_acked = meta_tp->bytes_acked; -+ info->mptcpi_bytes_received = meta_tp->bytes_received; -+} -+ -+static void mptcp_get_sub_info(struct sock *sk, struct mptcp_sub_info *info) -+{ -+ struct inet_sock *inet = inet_sk(sk); -+ -+ memset(info, 0, sizeof(*info)); -+ -+ if (sk->sk_family == AF_INET) { -+ info->src_v4.sin_family = AF_INET; -+ info->src_v4.sin_port = inet->inet_sport; -+ -+ info->src_v4.sin_addr.s_addr = inet->inet_rcv_saddr; -+ if (!info->src_v4.sin_addr.s_addr) -+ info->src_v4.sin_addr.s_addr = inet->inet_saddr; -+ -+ info->dst_v4.sin_family = AF_INET; -+ info->dst_v4.sin_port = inet->inet_dport; -+ info->dst_v4.sin_addr.s_addr = inet->inet_daddr; -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ -+ info->src_v6.sin6_family = AF_INET6; -+ info->src_v6.sin6_port = inet->inet_sport; -+ -+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) -+ info->src_v6.sin6_addr = np->saddr; -+ else -+ info->src_v6.sin6_addr = sk->sk_v6_rcv_saddr; -+ -+ info->dst_v6.sin6_family = AF_INET6; -+ info->dst_v6.sin6_port = inet->inet_dport; -+ info->dst_v6.sin6_addr = sk->sk_v6_daddr; -+#endif -+ } -+} -+ -+int mptcp_get_info(const struct sock *meta_sk, char __user *optval, int optlen) -+{ -+ const struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ -+ struct mptcp_meta_info meta_info; -+ struct mptcp_info m_info; -+ -+ unsigned int info_len; -+ -+ /* Check again with the lock held */ -+ if (!mptcp(meta_tp)) -+ return -EINVAL; -+ -+ if (copy_from_user(&m_info, optval, optlen)) -+ return -EFAULT; -+ -+ if (m_info.meta_info) { -+ unsigned int len; -+ -+ __mptcp_get_info(meta_sk, &meta_info); -+ -+ /* Need to set this, if user thinks that tcp_info is bigger than ours */ -+ len = min_t(unsigned int, m_info.meta_len, sizeof(meta_info)); -+ m_info.meta_len = len; -+ -+ if (copy_to_user((void __user *)m_info.meta_info, &meta_info, len)) -+ return -EFAULT; -+ } -+ -+ /* Need to set this, if user thinks that tcp_info is bigger than ours */ -+ info_len = min_t(unsigned int, m_info.tcp_info_len, sizeof(struct tcp_info)); -+ m_info.tcp_info_len = info_len; -+ -+ if (m_info.initial) { -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ -+ if (mpcb->master_sk) { -+ struct tcp_info info; -+ -+ tcp_get_info(mpcb->master_sk, &info, true); -+ if (copy_to_user((void __user *)m_info.initial, &info, info_len)) -+ return -EFAULT; -+ } else if (meta_tp->record_master_info && mpcb->master_info) { -+ if (copy_to_user((void __user *)m_info.initial, mpcb->master_info, info_len)) -+ return -EFAULT; -+ } else { -+ return meta_tp->record_master_info ? -ENOMEM : -EINVAL; -+ } -+ } -+ -+ if (m_info.subflows) { -+ unsigned int len, sub_len = 0; -+ struct mptcp_tcp_sock *mptcp; -+ char __user *ptr; -+ -+ ptr = (char __user *)m_info.subflows; -+ len = m_info.sub_len; -+ -+ mptcp_for_each_sub(meta_tp->mpcb, mptcp) { -+ struct tcp_info t_info; -+ unsigned int tmp_len; -+ -+ tcp_get_info(mptcp_to_sock(mptcp), &t_info, true); -+ -+ tmp_len = min_t(unsigned int, len, info_len); -+ len -= tmp_len; -+ -+ if (copy_to_user(ptr, &t_info, tmp_len)) -+ return -EFAULT; -+ -+ ptr += tmp_len; -+ sub_len += tmp_len; -+ -+ if (len == 0) -+ break; -+ } -+ -+ m_info.sub_len = sub_len; -+ } -+ -+ if (m_info.subflow_info) { -+ unsigned int len, sub_info_len, total_sub_info_len = 0; -+ struct mptcp_tcp_sock *mptcp; -+ char __user *ptr; -+ -+ ptr = (char __user *)m_info.subflow_info; -+ len = m_info.total_sub_info_len; -+ -+ sub_info_len = min_t(unsigned int, m_info.sub_info_len, -+ sizeof(struct mptcp_sub_info)); -+ m_info.sub_info_len = sub_info_len; -+ -+ mptcp_for_each_sub(meta_tp->mpcb, mptcp) { -+ struct mptcp_sub_info m_sub_info; -+ unsigned int tmp_len; -+ -+ mptcp_get_sub_info(mptcp_to_sock(mptcp), &m_sub_info); -+ -+ tmp_len = min_t(unsigned int, len, sub_info_len); -+ len -= tmp_len; -+ -+ if (copy_to_user(ptr, &m_sub_info, tmp_len)) -+ return -EFAULT; -+ -+ ptr += tmp_len; -+ total_sub_info_len += tmp_len; -+ -+ if (len == 0) -+ break; -+ } -+ -+ m_info.total_sub_info_len = total_sub_info_len; -+ } -+ -+ if (copy_to_user(optval, &m_info, optlen)) -+ return -EFAULT; -+ -+ return 0; -+} -+ -+void mptcp_clear_sk(struct sock *sk, int size) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ /* we do not want to clear tk_table field, because of RCU lookups */ -+ sk_prot_clear_nulls(sk, offsetof(struct tcp_sock, tk_table.next)); -+ -+ size -= offsetof(struct tcp_sock, tk_table.pprev); -+ memset((char *)&tp->tk_table.pprev, 0, size); -+} -+ -+static const struct snmp_mib mptcp_snmp_list[] = { -+ SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), -+ SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), -+ SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), -+ SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), -+ SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), -+ SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), -+ SNMP_MIB_ITEM("MPCapableRetransFallback", MPTCP_MIB_MPCAPABLERETRANSFALLBACK), -+ SNMP_MIB_ITEM("MPTCPCsumEnabled", MPTCP_MIB_CSUMENABLED), -+ SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), -+ SNMP_MIB_ITEM("MPFailRX", MPTCP_MIB_MPFAILRX), -+ SNMP_MIB_ITEM("MPCsumFail", MPTCP_MIB_CSUMFAIL), -+ SNMP_MIB_ITEM("MPFastcloseRX", MPTCP_MIB_FASTCLOSERX), -+ SNMP_MIB_ITEM("MPFastcloseTX", MPTCP_MIB_FASTCLOSETX), -+ SNMP_MIB_ITEM("MPFallbackAckSub", MPTCP_MIB_FBACKSUB), -+ SNMP_MIB_ITEM("MPFallbackAckInit", MPTCP_MIB_FBACKINIT), -+ SNMP_MIB_ITEM("MPFallbackDataSub", MPTCP_MIB_FBDATASUB), -+ SNMP_MIB_ITEM("MPFallbackDataInit", MPTCP_MIB_FBDATAINIT), -+ SNMP_MIB_ITEM("MPRemoveAddrSubDelete", MPTCP_MIB_REMADDRSUB), -+ SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), -+ SNMP_MIB_ITEM("MPJoinAlreadyFallenback", MPTCP_MIB_JOINFALLBACK), -+ SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), -+ SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), -+ SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), -+ SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), -+ SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), -+ SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), -+ SNMP_MIB_ITEM("MPJoinAckMissing", MPTCP_MIB_JOINACKFAIL), -+ SNMP_MIB_ITEM("MPJoinAckRTO", MPTCP_MIB_JOINACKRTO), -+ SNMP_MIB_ITEM("MPJoinAckRexmit", MPTCP_MIB_JOINACKRXMIT), -+ SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), -+ SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), -+ SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), -+ SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), -+ SNMP_MIB_ITEM("DSSTrimHead", MPTCP_MIB_DSSTRIMHEAD), -+ SNMP_MIB_ITEM("DSSSplitTail", MPTCP_MIB_DSSSPLITTAIL), -+ SNMP_MIB_ITEM("DSSPurgeOldSubSegs", MPTCP_MIB_PURGEOLD), -+ SNMP_MIB_ITEM("AddAddrRx", MPTCP_MIB_ADDADDRRX), -+ SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), -+ SNMP_MIB_ITEM("RemAddrRx", MPTCP_MIB_REMADDRRX), -+ SNMP_MIB_ITEM("RemAddrTx", MPTCP_MIB_REMADDRTX), -+ SNMP_MIB_ITEM("MPJoinAlternatePort", MPTCP_MIB_JOINALTERNATEPORT), -+ SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), -+ SNMP_MIB_SENTINEL -+}; -+ -+struct workqueue_struct *mptcp_wq; -+EXPORT_SYMBOL(mptcp_wq); -+ -+/* Output /proc/net/mptcp */ -+static int mptcp_pm_seq_show(struct seq_file *seq, void *v) -+{ -+ struct tcp_sock *meta_tp; -+ const struct net *net = seq->private; -+ unsigned int i, n = 0; -+ -+ seq_printf(seq, " sl loc_tok rem_tok v6 local_address remote_address st ns tx_queue rx_queue inode"); -+ seq_putc(seq, '\n'); -+ -+ for (i = 0; i <= mptcp_tk_htable.mask; i++) { -+ struct hlist_nulls_node *node; -+ rcu_read_lock(); -+ local_bh_disable(); -+ hlist_nulls_for_each_entry_rcu(meta_tp, node, -+ &mptcp_tk_htable.hashtable[i], -+ tk_table) { -+ struct sock *meta_sk = (struct sock *)meta_tp; -+ struct inet_sock *isk = inet_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ -+ if (!mptcp(meta_tp) || !net_eq(net, sock_net(meta_sk))) -+ continue; -+ -+ if (!mpcb) -+ continue; -+ -+ if (capable(CAP_NET_ADMIN)) { -+ seq_printf(seq, "%4d: %04X %04X ", n++, -+ mpcb->mptcp_loc_token, -+ mpcb->mptcp_rem_token); -+ } else { -+ seq_printf(seq, "%4d: %04X %04X ", n++, -1, -1); -+ } -+ if (meta_sk->sk_family == AF_INET || -+ mptcp_v6_is_v4_mapped(meta_sk)) { -+ seq_printf(seq, " 0 %08X:%04X %08X:%04X ", -+ isk->inet_rcv_saddr, -+ ntohs(isk->inet_sport), -+ isk->inet_daddr, -+ ntohs(isk->inet_dport)); -+#if IS_ENABLED(CONFIG_IPV6) -+ } else if (meta_sk->sk_family == AF_INET6) { -+ struct in6_addr *src = &meta_sk->sk_v6_rcv_saddr; -+ struct in6_addr *dst = &meta_sk->sk_v6_daddr; -+ seq_printf(seq, " 1 %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X", -+ src->s6_addr32[0], src->s6_addr32[1], -+ src->s6_addr32[2], src->s6_addr32[3], -+ ntohs(isk->inet_sport), -+ dst->s6_addr32[0], dst->s6_addr32[1], -+ dst->s6_addr32[2], dst->s6_addr32[3], -+ ntohs(isk->inet_dport)); -+#endif -+ } -+ -+ seq_printf(seq, " %02X %02X %08X:%08X %lu", -+ meta_sk->sk_state, mptcp_subflow_count(mpcb), -+ meta_tp->write_seq - meta_tp->snd_una, -+ max_t(int, meta_tp->rcv_nxt - -+ meta_tp->copied_seq, 0), -+ sock_i_ino(meta_sk)); -+ seq_putc(seq, '\n'); -+ } -+ -+ local_bh_enable(); -+ rcu_read_unlock(); -+ } -+ -+ return 0; -+} -+ -+static int mptcp_snmp_seq_show(struct seq_file *seq, void *v) -+{ -+ struct net *net = seq->private; -+ int i; -+ -+ for (i = 0; mptcp_snmp_list[i].name != NULL; i++) -+ seq_printf(seq, "%-32s\t%ld\n", mptcp_snmp_list[i].name, -+ snmp_fold_field(net->mptcp.mptcp_statistics, -+ mptcp_snmp_list[i].entry)); -+ -+ return 0; -+} -+ -+static int mptcp_pm_init_net(struct net *net) -+{ -+ net->mptcp.mptcp_statistics = alloc_percpu(struct mptcp_mib); -+ if (!net->mptcp.mptcp_statistics) -+ goto out_mptcp_mibs; -+ -+#ifdef CONFIG_PROC_FS -+ net->mptcp.proc_net_mptcp = proc_net_mkdir(net, "mptcp_net", net->proc_net); -+ if (!net->mptcp.proc_net_mptcp) -+ goto out_proc_net_mptcp; -+ if (!proc_create_net_single("mptcp", S_IRUGO, net->mptcp.proc_net_mptcp, -+ mptcp_pm_seq_show, NULL)) -+ goto out_mptcp_net_mptcp; -+ if (!proc_create_net_single("snmp", S_IRUGO, net->mptcp.proc_net_mptcp, -+ mptcp_snmp_seq_show, NULL)) -+ goto out_mptcp_net_snmp; -+#endif -+ -+ return 0; -+ -+#ifdef CONFIG_PROC_FS -+out_mptcp_net_snmp: -+ remove_proc_entry("mptcp", net->mptcp.proc_net_mptcp); -+out_mptcp_net_mptcp: -+ remove_proc_subtree("mptcp_net", net->proc_net); -+ net->mptcp.proc_net_mptcp = NULL; -+out_proc_net_mptcp: -+ free_percpu(net->mptcp.mptcp_statistics); -+#endif -+out_mptcp_mibs: -+ return -ENOMEM; -+} -+ -+static void mptcp_pm_exit_net(struct net *net) -+{ -+ remove_proc_entry("snmp", net->mptcp.proc_net_mptcp); -+ remove_proc_entry("mptcp", net->mptcp.proc_net_mptcp); -+ remove_proc_subtree("mptcp_net", net->proc_net); -+ free_percpu(net->mptcp.mptcp_statistics); -+} -+ -+static struct pernet_operations mptcp_pm_proc_ops = { -+ .init = mptcp_pm_init_net, -+ .exit = mptcp_pm_exit_net, -+}; -+ -+static unsigned long mptcp_htable_entries __initdata; -+ -+static int __init set_mptcp_htable_entries(char *str) -+{ -+ ssize_t ret; -+ -+ if (!str) -+ return 0; -+ -+ ret = kstrtoul(str, 0, &mptcp_htable_entries); -+ if (ret) -+ return 0; -+ -+ return 1; -+} -+__setup("mptcp_htable_entries=", set_mptcp_htable_entries); -+ -+/* General initialization of mptcp */ -+void __init mptcp_init(void) -+{ -+ unsigned int i; -+ struct ctl_table_header *mptcp_sysctl; -+ -+ mptcp_sock_cache = kmem_cache_create("mptcp_sock", -+ sizeof(struct mptcp_tcp_sock), -+ 0, SLAB_HWCACHE_ALIGN, -+ NULL); -+ if (!mptcp_sock_cache) -+ goto mptcp_sock_cache_failed; -+ -+ mptcp_cb_cache = kmem_cache_create("mptcp_cb", sizeof(struct mptcp_cb), -+ 0, SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN, -+ NULL); -+ if (!mptcp_cb_cache) -+ goto mptcp_cb_cache_failed; -+ -+ mptcp_tw_cache = kmem_cache_create("mptcp_tw", sizeof(struct mptcp_tw), -+ 0, SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN, -+ NULL); -+ if (!mptcp_tw_cache) -+ goto mptcp_tw_cache_failed; -+ -+ get_random_bytes(&mptcp_secret, sizeof(mptcp_secret)); -+ -+ mptcp_wq = alloc_workqueue("mptcp_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8); -+ if (!mptcp_wq) -+ goto alloc_workqueue_failed; -+ -+ mptcp_tk_htable.hashtable = -+ alloc_large_system_hash("MPTCP tokens", -+ sizeof(mptcp_tk_htable.hashtable[0]), -+ mptcp_htable_entries, -+ 18, /* one slot per 256KB of memory */ -+ 0, -+ NULL, -+ &mptcp_tk_htable.mask, -+ 1024, -+ mptcp_htable_entries ? 0 : 1024 * 1024); -+ -+ for (i = 0; i <= mptcp_tk_htable.mask; i++) -+ INIT_HLIST_NULLS_HEAD(&mptcp_tk_htable.hashtable[i], i); -+ -+ mptcp_reqsk_tk_htb.hashtable = -+ alloc_large_system_hash("MPTCP request tokens", -+ sizeof(mptcp_reqsk_tk_htb.hashtable[0]), -+ mptcp_htable_entries, -+ 18, /* one slot per 256KB of memory */ -+ 0, -+ NULL, -+ &mptcp_reqsk_tk_htb.mask, -+ 1024, -+ mptcp_htable_entries ? 0 : 1024 * 1024); -+ -+ for (i = 0; i <= mptcp_reqsk_tk_htb.mask; i++) -+ INIT_HLIST_NULLS_HEAD(&mptcp_reqsk_tk_htb.hashtable[i], i); -+ -+ -+ spin_lock_init(&mptcp_tk_hashlock); -+ -+ if (register_pernet_subsys(&mptcp_pm_proc_ops)) -+ goto pernet_failed; -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ if (mptcp_pm_v6_init()) -+ goto mptcp_pm_v6_failed; -+#endif -+ if (mptcp_pm_v4_init()) -+ goto mptcp_pm_v4_failed; -+ -+ mptcp_sysctl = register_net_sysctl(&init_net, "net/mptcp", mptcp_table); -+ if (!mptcp_sysctl) -+ goto register_sysctl_failed; -+ -+ if (mptcp_register_path_manager(&mptcp_pm_default)) -+ goto register_pm_failed; -+ -+ if (mptcp_register_scheduler(&mptcp_sched_default)) -+ goto register_sched_failed; -+ -+ pr_info("MPTCP: Unstable branch"); -+ -+ mptcp_init_failed = false; -+ -+ return; -+ -+register_sched_failed: -+ mptcp_unregister_path_manager(&mptcp_pm_default); -+register_pm_failed: -+ unregister_net_sysctl_table(mptcp_sysctl); -+register_sysctl_failed: -+ mptcp_pm_v4_undo(); -+mptcp_pm_v4_failed: -+#if IS_ENABLED(CONFIG_IPV6) -+ mptcp_pm_v6_undo(); -+mptcp_pm_v6_failed: -+#endif -+ unregister_pernet_subsys(&mptcp_pm_proc_ops); -+pernet_failed: -+ destroy_workqueue(mptcp_wq); -+alloc_workqueue_failed: -+ kmem_cache_destroy(mptcp_tw_cache); -+mptcp_tw_cache_failed: -+ kmem_cache_destroy(mptcp_cb_cache); -+mptcp_cb_cache_failed: -+ kmem_cache_destroy(mptcp_sock_cache); -+mptcp_sock_cache_failed: -+ mptcp_init_failed = true; -+} -diff --git a/net/mptcp/mptcp_ecf.c b/net/mptcp/mptcp_ecf.c -new file mode 100644 -index 000000000000..6b976b2b0c72 ---- /dev/null -+++ b/net/mptcp/mptcp_ecf.c -@@ -0,0 +1,195 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* MPTCP ECF Scheduler -+ * -+ * Algorithm Design: -+ * Yeon-sup Lim -+ * Don Towsley -+ * Erich M. Nahum -+ * Richard J. Gibbens -+ * -+ * Initial Implementation: -+ * Yeon-sup Lim -+ * -+ * Additional Authors: -+ * Daniel Weber -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+ -+static unsigned int mptcp_ecf_r_beta __read_mostly = 4; /* beta = 1/r_beta = 0.25 */ -+module_param(mptcp_ecf_r_beta, int, 0644); -+MODULE_PARM_DESC(mptcp_ecf_r_beta, "beta for ECF"); -+ -+struct ecfsched_priv { -+ u32 last_rbuf_opti; -+}; -+ -+struct ecfsched_cb { -+ u32 switching_margin; /* this is "waiting" in algorithm description */ -+}; -+ -+static struct ecfsched_priv *ecfsched_get_priv(const struct tcp_sock *tp) -+{ -+ return (struct ecfsched_priv *)&tp->mptcp->mptcp_sched[0]; -+} -+ -+static struct ecfsched_cb *ecfsched_get_cb(const struct tcp_sock *tp) -+{ -+ return (struct ecfsched_cb *)&tp->mpcb->mptcp_sched[0]; -+} -+ -+/* This is the ECF scheduler. This function decides on which flow to send -+ * a given MSS. If all subflows are found to be busy or the currently best -+ * subflow is estimated to be slower than waiting for minsk, NULL is returned. -+ */ -+static struct sock *ecf_get_available_subflow(struct sock *meta_sk, -+ struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sock *bestsk, *minsk = NULL; -+ struct tcp_sock *besttp; -+ struct mptcp_tcp_sock *mptcp; -+ struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(meta_sk)); -+ u32 min_srtt = U32_MAX; -+ u32 sub_sndbuf = 0; -+ u32 sub_packets_out = 0; -+ -+ /* Answer data_fin on same subflow!!! */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && -+ skb && mptcp_is_data_fin(skb)) { -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(bestsk)->mptcp->path_index == mpcb->dfin_path_index && -+ mptcp_is_available(bestsk, skb, zero_wnd_test)) -+ return bestsk; -+ } -+ } -+ -+ /* First, find the overall best (fastest) subflow */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ bestsk = mptcp_to_sock(mptcp); -+ besttp = tcp_sk(bestsk); -+ -+ /* Set of states for which we are allowed to send data */ -+ if (!mptcp_sk_can_send(bestsk)) -+ continue; -+ -+ /* We do not send data on this subflow unless it is -+ * fully established, i.e. the 4th ack has been received. -+ */ -+ if (besttp->mptcp->pre_established) -+ continue; -+ -+ sub_sndbuf += bestsk->sk_wmem_queued; -+ sub_packets_out += besttp->packets_out; -+ -+ /* record minimal rtt */ -+ if (besttp->srtt_us < min_srtt) { -+ min_srtt = besttp->srtt_us; -+ minsk = bestsk; -+ } -+ } -+ -+ /* find the current best subflow according to the default scheduler */ -+ bestsk = get_available_subflow(meta_sk, skb, zero_wnd_test); -+ -+ /* if we decided to use a slower flow, we have the option of not using it at all */ -+ if (bestsk && minsk && bestsk != minsk) { -+ u32 mss = tcp_current_mss(bestsk); /* assuming equal MSS */ -+ u32 sndbuf_meta = meta_sk->sk_wmem_queued; -+ u32 sndbuf_minus = sub_sndbuf; -+ u32 sndbuf = 0; -+ -+ u32 cwnd_f = tcp_sk(minsk)->snd_cwnd; -+ u32 srtt_f = tcp_sk(minsk)->srtt_us >> 3; -+ u32 rttvar_f = tcp_sk(minsk)->rttvar_us >> 1; -+ -+ u32 cwnd_s = tcp_sk(bestsk)->snd_cwnd; -+ u32 srtt_s = tcp_sk(bestsk)->srtt_us >> 3; -+ u32 rttvar_s = tcp_sk(bestsk)->rttvar_us >> 1; -+ -+ u32 delta = max(rttvar_f, rttvar_s); -+ -+ u32 x_f; -+ u64 lhs, rhs; /* to avoid overflow, using u64 */ -+ -+ if (tcp_sk(meta_sk)->packets_out > sub_packets_out) -+ sndbuf_minus += (tcp_sk(meta_sk)->packets_out - sub_packets_out) * mss; -+ -+ if (sndbuf_meta > sndbuf_minus) -+ sndbuf = sndbuf_meta - sndbuf_minus; -+ -+ /* we have something to send. -+ * at least one time tx over fastest subflow is required -+ */ -+ x_f = sndbuf > cwnd_f * mss ? sndbuf : cwnd_f * mss; -+ lhs = srtt_f * (x_f + cwnd_f * mss); -+ rhs = cwnd_f * mss * (srtt_s + delta); -+ -+ if (mptcp_ecf_r_beta * lhs < mptcp_ecf_r_beta * rhs + ecf_cb->switching_margin * rhs) { -+ u32 x_s = sndbuf > cwnd_s * mss ? sndbuf : cwnd_s * mss; -+ u64 lhs_s = srtt_s * x_s; -+ u64 rhs_s = cwnd_s * mss * (2 * srtt_f + delta); -+ -+ if (lhs_s >= rhs_s) { -+ /* too slower than fastest */ -+ ecf_cb->switching_margin = 1; -+ return NULL; -+ } -+ } else { -+ /* use slower one */ -+ ecf_cb->switching_margin = 0; -+ } -+ } -+ -+ return bestsk; -+} -+ -+static void ecfsched_init(struct sock *sk) -+{ -+ struct ecfsched_priv *ecf_p = ecfsched_get_priv(tcp_sk(sk)); -+ struct ecfsched_cb *ecf_cb = ecfsched_get_cb(tcp_sk(mptcp_meta_sk(sk))); -+ -+ ecf_p->last_rbuf_opti = tcp_jiffies32; -+ ecf_cb->switching_margin = 0; -+} -+ -+struct mptcp_sched_ops mptcp_sched_ecf = { -+ .get_subflow = ecf_get_available_subflow, -+ .next_segment = mptcp_next_segment, -+ .init = ecfsched_init, -+ .name = "ecf", -+ .owner = THIS_MODULE, -+}; -+ -+static int __init ecf_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct ecfsched_priv) > MPTCP_SCHED_SIZE); -+ BUILD_BUG_ON(sizeof(struct ecfsched_cb) > MPTCP_SCHED_DATA_SIZE); -+ -+ if (mptcp_register_scheduler(&mptcp_sched_ecf)) -+ return -1; -+ -+ return 0; -+} -+ -+static void ecf_unregister(void) -+{ -+ mptcp_unregister_scheduler(&mptcp_sched_ecf); -+} -+ -+module_init(ecf_register); -+module_exit(ecf_unregister); -+ -+MODULE_AUTHOR("Yeon-sup Lim, Daniel Weber"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("ECF (Earliest Completion First) scheduler for MPTCP, based on default minimum RTT scheduler"); -+MODULE_VERSION("0.95"); -diff --git a/net/mptcp/mptcp_fullmesh.c b/net/mptcp/mptcp_fullmesh.c -new file mode 100644 -index 000000000000..5424960256e6 ---- /dev/null -+++ b/net/mptcp/mptcp_fullmesh.c -@@ -0,0 +1,1938 @@ -+#include -+#include -+ -+#include -+#include -+ -+#if IS_ENABLED(CONFIG_IPV6) -+#include -+#include -+#endif -+ -+enum { -+ MPTCP_EVENT_ADD = 1, -+ MPTCP_EVENT_DEL, -+ MPTCP_EVENT_MOD, -+}; -+ -+#define MPTCP_SUBFLOW_RETRY_DELAY 1000 -+ -+/* Max number of local or remote addresses we can store. -+ * When changing, see the bitfield below in fullmesh_rem4/6. -+ */ -+#define MPTCP_MAX_ADDR 8 -+ -+struct fullmesh_rem4 { -+ u8 rem4_id; -+ u8 bitfield; -+ u8 retry_bitfield; -+ __be16 port; -+ struct in_addr addr; -+}; -+ -+struct fullmesh_rem6 { -+ u8 rem6_id; -+ u8 bitfield; -+ u8 retry_bitfield; -+ __be16 port; -+ struct in6_addr addr; -+}; -+ -+struct mptcp_loc_addr { -+ struct mptcp_loc4 locaddr4[MPTCP_MAX_ADDR]; -+ u8 loc4_bits; -+ u8 next_v4_index; -+ -+ struct mptcp_loc6 locaddr6[MPTCP_MAX_ADDR]; -+ u8 loc6_bits; -+ u8 next_v6_index; -+ struct rcu_head rcu; -+}; -+ -+struct mptcp_addr_event { -+ struct list_head list; -+ unsigned short family; -+ u8 code:7, -+ low_prio:1; -+ int if_idx; -+ union inet_addr addr; -+}; -+ -+struct fullmesh_priv { -+ /* Worker struct for subflow establishment */ -+ struct work_struct subflow_work; -+ /* Delayed worker, when the routing-tables are not yet ready. */ -+ struct delayed_work subflow_retry_work; -+ -+ /* Remote addresses */ -+ struct fullmesh_rem4 remaddr4[MPTCP_MAX_ADDR]; -+ struct fullmesh_rem6 remaddr6[MPTCP_MAX_ADDR]; -+ -+ struct mptcp_cb *mpcb; -+ -+ u16 remove_addrs; /* Addresses to remove */ -+ u8 announced_addrs_v4; /* IPv4 Addresses we did announce */ -+ u8 announced_addrs_v6; /* IPv6 Addresses we did announce */ -+ -+ u8 add_addr; /* Are we sending an add_addr? */ -+ -+ u8 rem4_bits; -+ u8 rem6_bits; -+ -+ /* Have we established the additional subflows for primary pair? */ -+ u8 first_pair:1; -+}; -+ -+struct mptcp_fm_ns { -+ struct mptcp_loc_addr __rcu *local; -+ spinlock_t local_lock; /* Protecting the above pointer */ -+ struct list_head events; -+ struct delayed_work address_worker; -+ -+ struct net *net; -+}; -+ -+static int num_subflows __read_mostly = 1; -+module_param(num_subflows, int, 0644); -+MODULE_PARM_DESC(num_subflows, "choose the number of subflows per pair of IP addresses of MPTCP connection"); -+ -+static int create_on_err __read_mostly; -+module_param(create_on_err, int, 0644); -+MODULE_PARM_DESC(create_on_err, "recreate the subflow upon a timeout"); -+ -+static struct mptcp_pm_ops full_mesh __read_mostly; -+ -+static void full_mesh_create_subflows(struct sock *meta_sk); -+ -+static struct mptcp_fm_ns *fm_get_ns(const struct net *net) -+{ -+ return (struct mptcp_fm_ns *)net->mptcp.path_managers[MPTCP_PM_FULLMESH]; -+} -+ -+static struct fullmesh_priv *fullmesh_get_priv(const struct mptcp_cb *mpcb) -+{ -+ return (struct fullmesh_priv *)&mpcb->mptcp_pm[0]; -+} -+ -+/* Find the first free index in the bitfield */ -+static int __mptcp_find_free_index(u8 bitfield, u8 base) -+{ -+ int i; -+ -+ /* There are anyways no free bits... */ -+ if (bitfield == 0xff) -+ goto exit; -+ -+ i = ffs(~(bitfield >> base)) - 1; -+ if (i < 0) -+ goto exit; -+ -+ /* No free bits when starting at base, try from 0 on */ -+ if (i + base >= sizeof(bitfield) * 8) -+ return __mptcp_find_free_index(bitfield, 0); -+ -+ return i + base; -+exit: -+ return -1; -+} -+ -+static int mptcp_find_free_index(u8 bitfield) -+{ -+ return __mptcp_find_free_index(bitfield, 0); -+} -+ -+static void mptcp_addv4_raddr(struct mptcp_cb *mpcb, -+ const struct in_addr *addr, -+ __be16 port, u8 id) -+{ -+ int i; -+ struct fullmesh_rem4 *rem4; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ mptcp_for_each_bit_set(fmp->rem4_bits, i) { -+ rem4 = &fmp->remaddr4[i]; -+ -+ /* Address is already in the list --- continue */ -+ if (rem4->rem4_id == id && -+ rem4->addr.s_addr == addr->s_addr && rem4->port == port) -+ return; -+ -+ /* This may be the case, when the peer is behind a NAT. He is -+ * trying to JOIN, thus sending the JOIN with a certain ID. -+ * However the src_addr of the IP-packet has been changed. We -+ * update the addr in the list, because this is the address as -+ * OUR BOX sees it. -+ */ -+ if (rem4->rem4_id == id && rem4->addr.s_addr != addr->s_addr) { -+ /* update the address */ -+ mptcp_debug("%s: updating old addr:%pI4 to addr %pI4 with id:%d\n", -+ __func__, &rem4->addr.s_addr, -+ &addr->s_addr, id); -+ rem4->addr.s_addr = addr->s_addr; -+ rem4->port = port; -+ mpcb->list_rcvd = 1; -+ return; -+ } -+ } -+ -+ i = mptcp_find_free_index(fmp->rem4_bits); -+ /* Do we have already the maximum number of local/remote addresses? */ -+ if (i < 0) { -+ mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI4\n", -+ __func__, MPTCP_MAX_ADDR, &addr->s_addr); -+ return; -+ } -+ -+ rem4 = &fmp->remaddr4[i]; -+ -+ /* Address is not known yet, store it */ -+ rem4->addr.s_addr = addr->s_addr; -+ rem4->port = port; -+ rem4->bitfield = 0; -+ rem4->retry_bitfield = 0; -+ rem4->rem4_id = id; -+ mpcb->list_rcvd = 1; -+ fmp->rem4_bits |= (1 << i); -+ -+ return; -+} -+ -+static void mptcp_addv6_raddr(struct mptcp_cb *mpcb, -+ const struct in6_addr *addr, -+ __be16 port, u8 id) -+{ -+ int i; -+ struct fullmesh_rem6 *rem6; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ mptcp_for_each_bit_set(fmp->rem6_bits, i) { -+ rem6 = &fmp->remaddr6[i]; -+ -+ /* Address is already in the list --- continue */ -+ if (rem6->rem6_id == id && -+ ipv6_addr_equal(&rem6->addr, addr) && rem6->port == port) -+ return; -+ -+ /* This may be the case, when the peer is behind a NAT. He is -+ * trying to JOIN, thus sending the JOIN with a certain ID. -+ * However the src_addr of the IP-packet has been changed. We -+ * update the addr in the list, because this is the address as -+ * OUR BOX sees it. -+ */ -+ if (rem6->rem6_id == id) { -+ /* update the address */ -+ mptcp_debug("%s: updating old addr: %pI6 to addr %pI6 with id:%d\n", -+ __func__, &rem6->addr, addr, id); -+ rem6->addr = *addr; -+ rem6->port = port; -+ mpcb->list_rcvd = 1; -+ return; -+ } -+ } -+ -+ i = mptcp_find_free_index(fmp->rem6_bits); -+ /* Do we have already the maximum number of local/remote addresses? */ -+ if (i < 0) { -+ mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI6\n", -+ __func__, MPTCP_MAX_ADDR, addr); -+ return; -+ } -+ -+ rem6 = &fmp->remaddr6[i]; -+ -+ /* Address is not known yet, store it */ -+ rem6->addr = *addr; -+ rem6->port = port; -+ rem6->bitfield = 0; -+ rem6->retry_bitfield = 0; -+ rem6->rem6_id = id; -+ mpcb->list_rcvd = 1; -+ fmp->rem6_bits |= (1 << i); -+ -+ return; -+} -+ -+static void mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id) -+{ -+ int i; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ mptcp_for_each_bit_set(fmp->rem4_bits, i) { -+ if (fmp->remaddr4[i].rem4_id == id) { -+ /* remove address from bitfield */ -+ fmp->rem4_bits &= ~(1 << i); -+ -+ break; -+ } -+ } -+} -+ -+static void mptcp_v6_rem_raddress(const struct mptcp_cb *mpcb, u8 id) -+{ -+ int i; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ mptcp_for_each_bit_set(fmp->rem6_bits, i) { -+ if (fmp->remaddr6[i].rem6_id == id) { -+ /* remove address from bitfield */ -+ fmp->rem6_bits &= ~(1 << i); -+ -+ break; -+ } -+ } -+} -+ -+/* Sets the bitfield of the remote-address field */ -+static void mptcp_v4_set_init_addr_bit(const struct mptcp_cb *mpcb, -+ const struct in_addr *addr, u8 index) -+{ -+ int i; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ mptcp_for_each_bit_set(fmp->rem4_bits, i) { -+ if (fmp->remaddr4[i].addr.s_addr == addr->s_addr) { -+ fmp->remaddr4[i].bitfield |= (1 << index); -+ return; -+ } -+ } -+} -+ -+/* Sets the bitfield of the remote-address field */ -+static void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb, -+ const struct in6_addr *addr, u8 index) -+{ -+ int i; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ mptcp_for_each_bit_set(fmp->rem6_bits, i) { -+ if (ipv6_addr_equal(&fmp->remaddr6[i].addr, addr)) { -+ fmp->remaddr6[i].bitfield |= (1 << index); -+ return; -+ } -+ } -+} -+ -+static void mptcp_set_init_addr_bit(struct mptcp_cb *mpcb, -+ const union inet_addr *addr, -+ sa_family_t family, u8 id) -+{ -+ if (family == AF_INET) -+ mptcp_v4_set_init_addr_bit(mpcb, &addr->in, id); -+ else -+ mptcp_v6_set_init_addr_bit(mpcb, &addr->in6, id); -+} -+ -+static void mptcp_v4_subflows(struct sock *meta_sk, -+ const struct mptcp_loc4 *loc, -+ struct mptcp_rem4 *rem) -+{ -+ int i; -+ -+ for (i = 1; i < num_subflows; i++) -+ mptcp_init4_subsockets(meta_sk, loc, rem); -+} -+ -+#if IS_ENABLED(CONFIG_IPV6) -+static void mptcp_v6_subflows(struct sock *meta_sk, -+ const struct mptcp_loc6 *loc, -+ struct mptcp_rem6 *rem) -+{ -+ int i; -+ -+ for (i = 1; i < num_subflows; i++) -+ mptcp_init6_subsockets(meta_sk, loc, rem); -+} -+#endif -+ -+static void retry_subflow_worker(struct work_struct *work) -+{ -+ struct delayed_work *delayed_work = container_of(work, -+ struct delayed_work, -+ work); -+ struct fullmesh_priv *fmp = container_of(delayed_work, -+ struct fullmesh_priv, -+ subflow_retry_work); -+ struct mptcp_cb *mpcb = fmp->mpcb; -+ struct sock *meta_sk = mpcb->meta_sk; -+ struct mptcp_loc_addr *mptcp_local; -+ struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); -+ int iter = 0, i; -+ -+ /* We need a local (stable) copy of the address-list. Really, it is not -+ * such a big deal, if the address-list is not 100% up-to-date. -+ */ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference_bh(fm_ns->local); -+ mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC); -+ rcu_read_unlock_bh(); -+ -+ if (!mptcp_local) -+ return; -+ -+next_subflow: -+ if (iter) { -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ -+ cond_resched(); -+ } -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ if (!mptcp(tcp_sk(meta_sk))) -+ goto exit; -+ -+ iter++; -+ -+ if (sock_flag(meta_sk, SOCK_DEAD)) -+ goto exit; -+ -+ mptcp_for_each_bit_set(fmp->rem4_bits, i) { -+ struct fullmesh_rem4 *rem = &fmp->remaddr4[i]; -+ /* Do we need to retry establishing a subflow ? */ -+ if (rem->retry_bitfield) { -+ int i = mptcp_find_free_index(~rem->retry_bitfield); -+ struct mptcp_rem4 rem4; -+ -+ rem->bitfield |= (1 << i); -+ rem->retry_bitfield &= ~(1 << i); -+ -+ rem4.addr = rem->addr; -+ rem4.port = rem->port; -+ rem4.rem4_id = rem->rem4_id; -+ -+ mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i], &rem4); -+ mptcp_v4_subflows(meta_sk, -+ &mptcp_local->locaddr4[i], -+ &rem4); -+ goto next_subflow; -+ } -+ } -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ mptcp_for_each_bit_set(fmp->rem6_bits, i) { -+ struct fullmesh_rem6 *rem = &fmp->remaddr6[i]; -+ -+ /* Do we need to retry establishing a subflow ? */ -+ if (rem->retry_bitfield) { -+ int i = mptcp_find_free_index(~rem->retry_bitfield); -+ struct mptcp_rem6 rem6; -+ -+ rem->bitfield |= (1 << i); -+ rem->retry_bitfield &= ~(1 << i); -+ -+ rem6.addr = rem->addr; -+ rem6.port = rem->port; -+ rem6.rem6_id = rem->rem6_id; -+ -+ mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i], &rem6); -+ mptcp_v6_subflows(meta_sk, -+ &mptcp_local->locaddr6[i], -+ &rem6); -+ goto next_subflow; -+ } -+ } -+#endif -+ -+exit: -+ kfree(mptcp_local); -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ mptcp_mpcb_put(mpcb); -+ sock_put(meta_sk); -+} -+ -+/** -+ * Create all new subflows, by doing calls to mptcp_initX_subsockets -+ * -+ * This function uses a goto next_subflow, to allow releasing the lock between -+ * new subflows and giving other processes a chance to do some work on the -+ * socket and potentially finishing the communication. -+ **/ -+static void create_subflow_worker(struct work_struct *work) -+{ -+ struct fullmesh_priv *fmp = container_of(work, struct fullmesh_priv, -+ subflow_work); -+ struct mptcp_cb *mpcb = fmp->mpcb; -+ struct sock *meta_sk = mpcb->meta_sk; -+ struct mptcp_loc_addr *mptcp_local; -+ const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); -+ int iter = 0, retry = 0; -+ int i; -+ -+ /* We need a local (stable) copy of the address-list. Really, it is not -+ * such a big deal, if the address-list is not 100% up-to-date. -+ */ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference_bh(fm_ns->local); -+ mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC); -+ rcu_read_unlock_bh(); -+ -+ if (!mptcp_local) -+ return; -+ -+next_subflow: -+ if (iter) { -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ -+ cond_resched(); -+ } -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ if (sock_flag(meta_sk, SOCK_DEAD) || !mptcp(tcp_sk(meta_sk))) -+ goto exit; -+ -+ if (mpcb->master_sk && -+ !tcp_sk(mpcb->master_sk)->mptcp->fully_established) -+ goto exit; -+ -+ /* Create the additional subflows for the first pair */ -+ if (fmp->first_pair == 0 && mpcb->master_sk) { -+ struct mptcp_loc4 loc; -+ struct mptcp_rem4 rem; -+ -+ loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr; -+ loc.loc4_id = 0; -+ loc.low_prio = 0; -+ loc.if_idx = mpcb->master_sk->sk_bound_dev_if; -+ -+ rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr; -+ rem.port = inet_sk(meta_sk)->inet_dport; -+ rem.rem4_id = 0; /* Default 0 */ -+ -+ mptcp_v4_subflows(meta_sk, &loc, &rem); -+ -+ fmp->first_pair = 1; -+ } -+ iter++; -+ -+ mptcp_for_each_bit_set(fmp->rem4_bits, i) { -+ struct fullmesh_rem4 *rem; -+ u8 remaining_bits; -+ -+ rem = &fmp->remaddr4[i]; -+ remaining_bits = ~(rem->bitfield) & mptcp_local->loc4_bits; -+ -+ /* Are there still combinations to handle? */ -+ if (remaining_bits) { -+ int i = mptcp_find_free_index(~remaining_bits); -+ struct mptcp_rem4 rem4; -+ -+ rem->bitfield |= (1 << i); -+ -+ rem4.addr = rem->addr; -+ rem4.port = rem->port; -+ rem4.rem4_id = rem->rem4_id; -+ -+ /* If a route is not yet available then retry once */ -+ if (mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i], -+ &rem4) == -ENETUNREACH) -+ retry = rem->retry_bitfield |= (1 << i); -+ else -+ mptcp_v4_subflows(meta_sk, -+ &mptcp_local->locaddr4[i], -+ &rem4); -+ goto next_subflow; -+ } -+ } -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ if (fmp->first_pair == 0 && mpcb->master_sk) { -+ struct mptcp_loc6 loc; -+ struct mptcp_rem6 rem; -+ -+ loc.addr = inet6_sk(meta_sk)->saddr; -+ loc.loc6_id = 0; -+ loc.low_prio = 0; -+ loc.if_idx = mpcb->master_sk->sk_bound_dev_if; -+ -+ rem.addr = meta_sk->sk_v6_daddr; -+ rem.port = inet_sk(meta_sk)->inet_dport; -+ rem.rem6_id = 0; /* Default 0 */ -+ -+ mptcp_v6_subflows(meta_sk, &loc, &rem); -+ -+ fmp->first_pair = 1; -+ } -+ mptcp_for_each_bit_set(fmp->rem6_bits, i) { -+ struct fullmesh_rem6 *rem; -+ u8 remaining_bits; -+ -+ rem = &fmp->remaddr6[i]; -+ remaining_bits = ~(rem->bitfield) & mptcp_local->loc6_bits; -+ -+ /* Are there still combinations to handle? */ -+ if (remaining_bits) { -+ int i = mptcp_find_free_index(~remaining_bits); -+ struct mptcp_rem6 rem6; -+ -+ rem->bitfield |= (1 << i); -+ -+ rem6.addr = rem->addr; -+ rem6.port = rem->port; -+ rem6.rem6_id = rem->rem6_id; -+ -+ /* If a route is not yet available then retry once */ -+ if (mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i], -+ &rem6) == -ENETUNREACH) -+ retry = rem->retry_bitfield |= (1 << i); -+ else -+ mptcp_v6_subflows(meta_sk, -+ &mptcp_local->locaddr6[i], -+ &rem6); -+ goto next_subflow; -+ } -+ } -+#endif -+ -+ if (retry && !delayed_work_pending(&fmp->subflow_retry_work)) { -+ sock_hold(meta_sk); -+ refcount_inc(&mpcb->mpcb_refcnt); -+ queue_delayed_work(mptcp_wq, &fmp->subflow_retry_work, -+ msecs_to_jiffies(MPTCP_SUBFLOW_RETRY_DELAY)); -+ } -+ -+exit: -+ kfree(mptcp_local); -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ mptcp_mpcb_put(mpcb); -+ sock_put(meta_sk); -+} -+ -+static void announce_remove_addr(u8 addr_id, struct sock *meta_sk) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ struct sock *sk = mptcp_select_ack_sock(meta_sk); -+ -+ fmp->remove_addrs |= (1 << addr_id); -+ mpcb->addr_signal = 1; -+ -+ if (sk) -+ tcp_send_ack(sk); -+} -+ -+static void update_addr_bitfields(struct sock *meta_sk, -+ const struct mptcp_loc_addr *mptcp_local) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ int i; -+ -+ /* The bits in announced_addrs_* always match with loc*_bits. So, a -+ * simple & operation unsets the correct bits, because these go from -+ * announced to non-announced -+ */ -+ fmp->announced_addrs_v4 &= mptcp_local->loc4_bits; -+ -+ mptcp_for_each_bit_set(fmp->rem4_bits, i) { -+ fmp->remaddr4[i].bitfield &= mptcp_local->loc4_bits; -+ fmp->remaddr4[i].retry_bitfield &= mptcp_local->loc4_bits; -+ } -+ -+ fmp->announced_addrs_v6 &= mptcp_local->loc6_bits; -+ -+ mptcp_for_each_bit_set(fmp->rem6_bits, i) { -+ fmp->remaddr6[i].bitfield &= mptcp_local->loc6_bits; -+ fmp->remaddr6[i].retry_bitfield &= mptcp_local->loc6_bits; -+ } -+} -+ -+static int mptcp_find_address(const struct mptcp_loc_addr *mptcp_local, -+ sa_family_t family, const union inet_addr *addr, -+ int if_idx) -+{ -+ int i; -+ u8 loc_bits; -+ bool found = false; -+ -+ if (family == AF_INET) -+ loc_bits = mptcp_local->loc4_bits; -+ else -+ loc_bits = mptcp_local->loc6_bits; -+ -+ mptcp_for_each_bit_set(loc_bits, i) { -+ if (family == AF_INET && -+ (!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx) && -+ mptcp_local->locaddr4[i].addr.s_addr == addr->in.s_addr) { -+ found = true; -+ break; -+ } -+ if (family == AF_INET6 && -+ (!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx) && -+ ipv6_addr_equal(&mptcp_local->locaddr6[i].addr, -+ &addr->in6)) { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) -+ return -1; -+ -+ return i; -+} -+ -+static int mptcp_find_address_transp(const struct mptcp_loc_addr *mptcp_local, -+ sa_family_t family, int if_idx) -+{ -+ bool found = false; -+ u8 loc_bits; -+ int i; -+ -+ if (family == AF_INET) -+ loc_bits = mptcp_local->loc4_bits; -+ else -+ loc_bits = mptcp_local->loc6_bits; -+ -+ mptcp_for_each_bit_set(loc_bits, i) { -+ if (family == AF_INET && -+ (!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx)) { -+ found = true; -+ break; -+ } -+ if (family == AF_INET6 && -+ (!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx)) { -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) -+ return -1; -+ -+ return i; -+} -+ -+static void mptcp_address_worker(struct work_struct *work) -+{ -+ const struct delayed_work *delayed_work = container_of(work, -+ struct delayed_work, -+ work); -+ struct mptcp_fm_ns *fm_ns = container_of(delayed_work, -+ struct mptcp_fm_ns, -+ address_worker); -+ struct net *net = fm_ns->net; -+ struct mptcp_addr_event *event = NULL; -+ struct mptcp_loc_addr *mptcp_local, *old; -+ int i, id = -1; /* id is used in the socket-code on a delete-event */ -+ bool success; /* Used to indicate if we succeeded handling the event */ -+ -+next_event: -+ success = false; -+ kfree(event); -+ -+ /* First, let's dequeue an event from our event-list */ -+ rcu_read_lock_bh(); -+ spin_lock(&fm_ns->local_lock); -+ -+ event = list_first_entry_or_null(&fm_ns->events, -+ struct mptcp_addr_event, list); -+ if (!event) { -+ spin_unlock(&fm_ns->local_lock); -+ rcu_read_unlock_bh(); -+ return; -+ } -+ -+ list_del(&event->list); -+ -+ mptcp_local = rcu_dereference_bh(fm_ns->local); -+ -+ if (event->code == MPTCP_EVENT_DEL) { -+ id = mptcp_find_address(mptcp_local, event->family, -+ &event->addr, event->if_idx); -+ -+ /* Not in the list - so we don't care */ -+ if (id < 0) { -+ mptcp_debug("%s could not find id\n", __func__); -+ goto duno; -+ } -+ -+ old = mptcp_local; -+ mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), -+ GFP_ATOMIC); -+ if (!mptcp_local) -+ goto duno; -+ -+ if (event->family == AF_INET) -+ mptcp_local->loc4_bits &= ~(1 << id); -+ else -+ mptcp_local->loc6_bits &= ~(1 << id); -+ -+ rcu_assign_pointer(fm_ns->local, mptcp_local); -+ kfree_rcu(old, rcu); -+ } else { -+ int i = mptcp_find_address(mptcp_local, event->family, -+ &event->addr, event->if_idx); -+ int j = i; -+ -+ if (j < 0) { -+ /* Not in the list, so we have to find an empty slot */ -+ if (event->family == AF_INET) -+ i = __mptcp_find_free_index(mptcp_local->loc4_bits, -+ mptcp_local->next_v4_index); -+ if (event->family == AF_INET6) -+ i = __mptcp_find_free_index(mptcp_local->loc6_bits, -+ mptcp_local->next_v6_index); -+ -+ if (i < 0) { -+ mptcp_debug("%s no more space\n", __func__); -+ goto duno; -+ } -+ -+ /* It might have been a MOD-event. */ -+ event->code = MPTCP_EVENT_ADD; -+ } else { -+ /* Let's check if anything changes */ -+ if (event->family == AF_INET && -+ event->low_prio == mptcp_local->locaddr4[i].low_prio) -+ goto duno; -+ -+ if (event->family == AF_INET6 && -+ event->low_prio == mptcp_local->locaddr6[i].low_prio) -+ goto duno; -+ } -+ -+ old = mptcp_local; -+ mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), -+ GFP_ATOMIC); -+ if (!mptcp_local) -+ goto duno; -+ -+ if (event->family == AF_INET) { -+ mptcp_local->locaddr4[i].addr.s_addr = event->addr.in.s_addr; -+ mptcp_local->locaddr4[i].loc4_id = i + 1; -+ mptcp_local->locaddr4[i].low_prio = event->low_prio; -+ mptcp_local->locaddr4[i].if_idx = event->if_idx; -+ -+ mptcp_debug("%s updated IP %pI4 on ifidx %u prio %u id %u\n", -+ __func__, &event->addr.in.s_addr, -+ event->if_idx, event->low_prio, i + 1); -+ } else { -+ mptcp_local->locaddr6[i].addr = event->addr.in6; -+ mptcp_local->locaddr6[i].loc6_id = i + MPTCP_MAX_ADDR; -+ mptcp_local->locaddr6[i].low_prio = event->low_prio; -+ mptcp_local->locaddr6[i].if_idx = event->if_idx; -+ -+ mptcp_debug("%s updated IP %pI6 on ifidx %u prio %u id %u\n", -+ __func__, &event->addr.in6, -+ event->if_idx, event->low_prio, i + MPTCP_MAX_ADDR); -+ } -+ -+ if (j < 0) { -+ if (event->family == AF_INET) { -+ mptcp_local->loc4_bits |= (1 << i); -+ mptcp_local->next_v4_index = i + 1; -+ } else { -+ mptcp_local->loc6_bits |= (1 << i); -+ mptcp_local->next_v6_index = i + 1; -+ } -+ } -+ -+ rcu_assign_pointer(fm_ns->local, mptcp_local); -+ kfree_rcu(old, rcu); -+ } -+ success = true; -+ -+duno: -+ spin_unlock(&fm_ns->local_lock); -+ rcu_read_unlock_bh(); -+ -+ if (!success) -+ goto next_event; -+ -+ /* Now we iterate over the MPTCP-sockets and apply the event. */ -+ for (i = 0; i <= mptcp_tk_htable.mask; i++) { -+ const struct hlist_nulls_node *node; -+ struct tcp_sock *meta_tp; -+ -+ rcu_read_lock_bh(); -+ hlist_nulls_for_each_entry_rcu(meta_tp, node, -+ &mptcp_tk_htable.hashtable[i], -+ tk_table) { -+ struct sock *meta_sk = (struct sock *)meta_tp, *sk; -+ bool meta_v4 = meta_sk->sk_family == AF_INET; -+ struct mptcp_cb *mpcb; -+ -+ if (sock_net(meta_sk) != net) -+ continue; -+ -+ if (meta_v4) { -+ /* skip IPv6 events if meta is IPv4 */ -+ if (event->family == AF_INET6) -+ continue; -+ } else if (event->family == AF_INET && meta_sk->sk_ipv6only) { -+ /* skip IPv4 events if IPV6_V6ONLY is set */ -+ continue; -+ } -+ -+ if (unlikely(!refcount_inc_not_zero(&meta_sk->sk_refcnt))) -+ continue; -+ -+ bh_lock_sock(meta_sk); -+ -+ mpcb = meta_tp->mpcb; -+ if (!mpcb) -+ goto next; -+ -+ if (!mptcp(meta_tp) || !is_meta_sk(meta_sk) || -+ mptcp_in_infinite_mapping_weak(mpcb)) -+ goto next; -+ -+ /* May be that the pm has changed in-between */ -+ if (mpcb->pm_ops != &full_mesh) -+ goto next; -+ -+ if (sock_owned_by_user(meta_sk)) { -+ if (!test_and_set_bit(MPTCP_PATH_MANAGER_DEFERRED, -+ &meta_sk->sk_tsq_flags)) -+ sock_hold(meta_sk); -+ -+ goto next; -+ } -+ -+ if (event->code == MPTCP_EVENT_ADD) { -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ fmp->add_addr++; -+ mpcb->addr_signal = 1; -+ -+ sk = mptcp_select_ack_sock(meta_sk); -+ if (sk) -+ tcp_send_ack(sk); -+ -+ full_mesh_create_subflows(meta_sk); -+ } -+ -+ if (event->code == MPTCP_EVENT_DEL) { -+ struct mptcp_tcp_sock *mptcp; -+ struct mptcp_loc_addr *mptcp_local; -+ struct hlist_node *tmp; -+ bool found = false; -+ -+ mptcp_local = rcu_dereference_bh(fm_ns->local); -+ -+ /* In any case, we need to update our bitfields */ -+ if (id >= 0) -+ update_addr_bitfields(meta_sk, mptcp_local); -+ -+ /* Look for the socket and remove him */ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ -+ if ((event->family == AF_INET6 && -+ (sk->sk_family == AF_INET || -+ mptcp_v6_is_v4_mapped(sk))) || -+ (event->family == AF_INET && -+ (sk->sk_family == AF_INET6 && -+ !mptcp_v6_is_v4_mapped(sk)))) -+ continue; -+ -+ if (event->family == AF_INET && -+ (sk->sk_family == AF_INET || -+ mptcp_v6_is_v4_mapped(sk)) && -+ inet_sk(sk)->inet_saddr != event->addr.in.s_addr) -+ continue; -+ -+ if (event->family == AF_INET6 && -+ sk->sk_family == AF_INET6 && -+ !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6)) -+ continue; -+ -+ /* Reinject, so that pf = 1 and so we -+ * won't select this one as the -+ * ack-sock. -+ */ -+ mptcp_reinject_data(sk, 0); -+ -+ /* We announce the removal of this id */ -+ announce_remove_addr(tcp_sk(sk)->mptcp->loc_id, meta_sk); -+ -+ mptcp_sub_force_close(sk); -+ found = true; -+ } -+ -+ if (found) -+ goto next; -+ -+ /* The id may have been given by the event, -+ * matching on a local address. And it may not -+ * have matched on one of the above sockets, -+ * because the client never created a subflow. -+ * So, we have to finally remove it here. -+ */ -+ if (id >= 0) { -+ u8 loc_id = id -+ + (event->family == AF_INET ? 1 : MPTCP_MAX_ADDR); -+ announce_remove_addr(loc_id, meta_sk); -+ } -+ } -+ -+ if (event->code == MPTCP_EVENT_MOD) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ if (event->family == AF_INET && -+ (sk->sk_family == AF_INET || -+ mptcp_v6_is_v4_mapped(sk)) && -+ inet_sk(sk)->inet_saddr == event->addr.in.s_addr) { -+ if (event->low_prio != tp->mptcp->low_prio) { -+ tp->mptcp->send_mp_prio = 1; -+ tp->mptcp->low_prio = event->low_prio; -+ -+ tcp_send_ack(sk); -+ } -+ } -+ -+ if (event->family == AF_INET6 && -+ sk->sk_family == AF_INET6 && -+ !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6)) { -+ if (event->low_prio != tp->mptcp->low_prio) { -+ tp->mptcp->send_mp_prio = 1; -+ tp->mptcp->low_prio = event->low_prio; -+ -+ tcp_send_ack(sk); -+ } -+ } -+ } -+ } -+next: -+ bh_unlock_sock(meta_sk); -+ sock_put(meta_sk); -+ } -+ rcu_read_unlock_bh(); -+ } -+ goto next_event; -+} -+ -+static struct mptcp_addr_event *lookup_similar_event(const struct net *net, -+ const struct mptcp_addr_event *event) -+{ -+ struct mptcp_addr_event *eventq; -+ struct mptcp_fm_ns *fm_ns = fm_get_ns(net); -+ -+ list_for_each_entry(eventq, &fm_ns->events, list) { -+ if (eventq->family != event->family) -+ continue; -+ if (eventq->if_idx != event->if_idx) -+ continue; -+ if (event->family == AF_INET) { -+ if (eventq->addr.in.s_addr == event->addr.in.s_addr) -+ return eventq; -+ } else { -+ if (ipv6_addr_equal(&eventq->addr.in6, &event->addr.in6)) -+ return eventq; -+ } -+ } -+ return NULL; -+} -+ -+/* We already hold the net-namespace MPTCP-lock */ -+static void add_pm_event(struct net *net, const struct mptcp_addr_event *event) -+{ -+ struct mptcp_addr_event *eventq = lookup_similar_event(net, event); -+ struct mptcp_fm_ns *fm_ns = fm_get_ns(net); -+ -+ if (eventq) { -+ switch (event->code) { -+ case MPTCP_EVENT_DEL: -+ mptcp_debug("%s del old_code %u\n", __func__, eventq->code); -+ list_del(&eventq->list); -+ kfree(eventq); -+ break; -+ case MPTCP_EVENT_ADD: -+ mptcp_debug("%s add old_code %u\n", __func__, eventq->code); -+ eventq->low_prio = event->low_prio; -+ eventq->code = MPTCP_EVENT_ADD; -+ return; -+ case MPTCP_EVENT_MOD: -+ mptcp_debug("%s mod old_code %u\n", __func__, eventq->code); -+ eventq->low_prio = event->low_prio; -+ eventq->code = MPTCP_EVENT_MOD; -+ return; -+ } -+ } -+ -+ /* OK, we have to add the new address to the wait queue */ -+ eventq = kmemdup(event, sizeof(struct mptcp_addr_event), GFP_ATOMIC); -+ if (!eventq) -+ return; -+ -+ list_add_tail(&eventq->list, &fm_ns->events); -+ -+ /* Create work-queue */ -+ if (!delayed_work_pending(&fm_ns->address_worker)) -+ queue_delayed_work(mptcp_wq, &fm_ns->address_worker, -+ msecs_to_jiffies(500)); -+} -+ -+static void addr4_event_handler(const struct in_ifaddr *ifa, unsigned long event, -+ struct net *net) -+{ -+ const struct net_device *netdev = ifa->ifa_dev->dev; -+ struct mptcp_fm_ns *fm_ns = fm_get_ns(net); -+ struct mptcp_addr_event mpevent; -+ -+ if (ifa->ifa_scope > RT_SCOPE_LINK || -+ ipv4_is_loopback(ifa->ifa_local)) -+ return; -+ -+ spin_lock_bh(&fm_ns->local_lock); -+ -+ mpevent.family = AF_INET; -+ mpevent.addr.in.s_addr = ifa->ifa_local; -+ mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0; -+ mpevent.if_idx = netdev->ifindex; -+ -+ if (event == NETDEV_DOWN || !netif_running(netdev) || -+ (netdev->flags & IFF_NOMULTIPATH) || !(netdev->flags & IFF_UP)) -+ mpevent.code = MPTCP_EVENT_DEL; -+ else if (event == NETDEV_UP) -+ mpevent.code = MPTCP_EVENT_ADD; -+ else if (event == NETDEV_CHANGE) -+ mpevent.code = MPTCP_EVENT_MOD; -+ -+ mptcp_debug("%s created event for %pI4, code %u prio %u idx %u\n", __func__, -+ &ifa->ifa_local, mpevent.code, mpevent.low_prio, mpevent.if_idx); -+ add_pm_event(net, &mpevent); -+ -+ spin_unlock_bh(&fm_ns->local_lock); -+ return; -+} -+ -+/* React on IPv4-addr add/rem-events */ -+static int mptcp_pm_inetaddr_event(struct notifier_block *this, -+ unsigned long event, void *ptr) -+{ -+ const struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; -+ struct net *net = dev_net(ifa->ifa_dev->dev); -+ -+ if (!(event == NETDEV_UP || event == NETDEV_DOWN || -+ event == NETDEV_CHANGE)) -+ return NOTIFY_DONE; -+ -+ addr4_event_handler(ifa, event, net); -+ -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block mptcp_pm_inetaddr_notifier = { -+ .notifier_call = mptcp_pm_inetaddr_event, -+}; -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ -+static int inet6_addr_event(struct notifier_block *this, unsigned long event, -+ void *ptr); -+ -+static void addr6_event_handler(const struct inet6_ifaddr *ifa, unsigned long event, -+ struct net *net) -+{ -+ const struct net_device *netdev = ifa->idev->dev; -+ int addr_type = ipv6_addr_type(&ifa->addr); -+ struct mptcp_fm_ns *fm_ns = fm_get_ns(net); -+ struct mptcp_addr_event mpevent; -+ -+ if (ifa->scope > RT_SCOPE_LINK || -+ addr_type == IPV6_ADDR_ANY || -+ (addr_type & IPV6_ADDR_LOOPBACK) || -+ (addr_type & IPV6_ADDR_LINKLOCAL)) -+ return; -+ -+ spin_lock_bh(&fm_ns->local_lock); -+ -+ mpevent.family = AF_INET6; -+ mpevent.addr.in6 = ifa->addr; -+ mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0; -+ mpevent.if_idx = netdev->ifindex; -+ -+ if (event == NETDEV_DOWN || !netif_running(netdev) || -+ (netdev->flags & IFF_NOMULTIPATH) || !(netdev->flags & IFF_UP)) -+ mpevent.code = MPTCP_EVENT_DEL; -+ else if (event == NETDEV_UP) -+ mpevent.code = MPTCP_EVENT_ADD; -+ else if (event == NETDEV_CHANGE) -+ mpevent.code = MPTCP_EVENT_MOD; -+ -+ mptcp_debug("%s created event for %pI6, code %u prio %u idx %u\n", __func__, -+ &ifa->addr, mpevent.code, mpevent.low_prio, mpevent.if_idx); -+ add_pm_event(net, &mpevent); -+ -+ spin_unlock_bh(&fm_ns->local_lock); -+ return; -+} -+ -+/* React on IPv6-addr add/rem-events */ -+static int inet6_addr_event(struct notifier_block *this, unsigned long event, -+ void *ptr) -+{ -+ struct inet6_ifaddr *ifa6 = (struct inet6_ifaddr *)ptr; -+ struct net *net = dev_net(ifa6->idev->dev); -+ -+ if (!(event == NETDEV_UP || event == NETDEV_DOWN || -+ event == NETDEV_CHANGE)) -+ return NOTIFY_DONE; -+ -+ addr6_event_handler(ifa6, event, net); -+ -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block inet6_addr_notifier = { -+ .notifier_call = inet6_addr_event, -+}; -+ -+#endif -+ -+/* React on ifup/down-events */ -+static int netdev_event(struct notifier_block *this, unsigned long event, -+ void *ptr) -+{ -+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr); -+ struct in_device *in_dev; -+#if IS_ENABLED(CONFIG_IPV6) -+ struct inet6_dev *in6_dev; -+#endif -+ -+ if (!(event == NETDEV_UP || event == NETDEV_DOWN || -+ event == NETDEV_CHANGE)) -+ return NOTIFY_DONE; -+ -+ rcu_read_lock(); -+ in_dev = __in_dev_get_rtnl(dev); -+ -+ if (in_dev) { -+ struct in_ifaddr *ifa; -+ -+ in_dev_for_each_ifa_rcu(ifa, in_dev) { -+ mptcp_pm_inetaddr_event(NULL, event, ifa); -+ } -+ } -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ in6_dev = __in6_dev_get(dev); -+ -+ if (in6_dev) { -+ struct inet6_ifaddr *ifa6; -+ list_for_each_entry(ifa6, &in6_dev->addr_list, if_list) -+ inet6_addr_event(NULL, event, ifa6); -+ } -+#endif -+ -+ rcu_read_unlock(); -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block mptcp_pm_netdev_notifier = { -+ .notifier_call = netdev_event, -+}; -+ -+static void full_mesh_add_raddr(struct mptcp_cb *mpcb, -+ const union inet_addr *addr, -+ sa_family_t family, __be16 port, u8 id) -+{ -+ if (family == AF_INET) -+ mptcp_addv4_raddr(mpcb, &addr->in, port, id); -+ else -+ mptcp_addv6_raddr(mpcb, &addr->in6, port, id); -+} -+ -+static void full_mesh_new_session(const struct sock *meta_sk) -+{ -+ struct mptcp_loc_addr *mptcp_local; -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); -+ struct tcp_sock *master_tp = tcp_sk(mpcb->master_sk); -+ int i, index, if_idx = 0; -+ union inet_addr saddr, daddr; -+ sa_family_t family = AF_INET; -+ bool meta_v4 = meta_sk->sk_family == AF_INET; -+ -+ /* Init local variables necessary for the rest */ -+ if (meta_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(meta_sk)) { -+ saddr.ip = inet_sk(meta_sk)->inet_saddr; -+ daddr.ip = inet_sk(meta_sk)->inet_daddr; -+ if_idx = mpcb->master_sk->sk_bound_dev_if; -+ family = AF_INET; -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { -+ saddr.in6 = inet6_sk(meta_sk)->saddr; -+ daddr.in6 = meta_sk->sk_v6_daddr; -+ if_idx = mpcb->master_sk->sk_bound_dev_if; -+ family = AF_INET6; -+#endif -+ } -+ -+ if (inet_sk(meta_sk)->transparent) -+ if_idx = inet_sk(meta_sk)->rx_dst_ifindex; -+ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference(fm_ns->local); -+ -+ if (inet_sk(meta_sk)->transparent) -+ index = mptcp_find_address_transp(mptcp_local, family, if_idx); -+ else -+ index = mptcp_find_address(mptcp_local, family, &saddr, if_idx); -+ if (index < 0) -+ goto fallback; -+ -+ if (family == AF_INET) -+ master_tp->mptcp->low_prio = mptcp_local->locaddr4[index].low_prio; -+ else -+ master_tp->mptcp->low_prio = mptcp_local->locaddr6[index].low_prio; -+ master_tp->mptcp->send_mp_prio = master_tp->mptcp->low_prio; -+ -+ full_mesh_add_raddr(mpcb, &daddr, family, 0, 0); -+ mptcp_set_init_addr_bit(mpcb, &daddr, family, index); -+ -+ /* Initialize workqueue-struct */ -+ INIT_WORK(&fmp->subflow_work, create_subflow_worker); -+ INIT_DELAYED_WORK(&fmp->subflow_retry_work, retry_subflow_worker); -+ fmp->mpcb = mpcb; -+ -+ if (!meta_v4 && meta_sk->sk_ipv6only) -+ goto skip_ipv4; -+ -+ /* Look for the address among the local addresses */ -+ mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { -+ __be32 ifa_address = mptcp_local->locaddr4[i].addr.s_addr; -+ -+ /* We do not need to announce the initial subflow's address again */ -+ if (family == AF_INET && -+ (!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx) && -+ saddr.ip == ifa_address) -+ continue; -+ -+ fmp->add_addr++; -+ mpcb->addr_signal = 1; -+ } -+ -+skip_ipv4: -+#if IS_ENABLED(CONFIG_IPV6) -+ /* skip IPv6 addresses if meta-socket is IPv4 */ -+ if (meta_v4) -+ goto skip_ipv6; -+ -+ mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { -+ const struct in6_addr *ifa6 = &mptcp_local->locaddr6[i].addr; -+ -+ /* We do not need to announce the initial subflow's address again */ -+ if (family == AF_INET6 && -+ (!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx) && -+ ipv6_addr_equal(&saddr.in6, ifa6)) -+ continue; -+ -+ fmp->add_addr++; -+ mpcb->addr_signal = 1; -+ } -+ -+skip_ipv6: -+#endif -+ -+ rcu_read_unlock_bh(); -+ -+ if (family == AF_INET) -+ fmp->announced_addrs_v4 |= (1 << index); -+ else -+ fmp->announced_addrs_v6 |= (1 << index); -+ -+ for (i = fmp->add_addr; i && fmp->add_addr; i--) -+ tcp_send_ack(mpcb->master_sk); -+ -+ if (master_tp->mptcp->send_mp_prio) -+ tcp_send_ack(mpcb->master_sk); -+ -+ return; -+ -+fallback: -+ rcu_read_unlock_bh(); -+ mptcp_fallback_default(mpcb); -+ return; -+} -+ -+static void full_mesh_create_subflows(struct sock *meta_sk) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ -+ if (mptcp_in_infinite_mapping_weak(mpcb) || -+ mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD)) -+ return; -+ -+ if (mpcb->master_sk && -+ !tcp_sk(mpcb->master_sk)->mptcp->fully_established) -+ return; -+ -+ if (!work_pending(&fmp->subflow_work)) { -+ sock_hold(meta_sk); -+ refcount_inc(&mpcb->mpcb_refcnt); -+ queue_work(mptcp_wq, &fmp->subflow_work); -+ } -+} -+ -+/* Called upon release_sock, if the socket was owned by the user during -+ * a path-management event. -+ */ -+static void full_mesh_release_sock(struct sock *meta_sk) -+{ -+ struct mptcp_loc_addr *mptcp_local; -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); -+ bool meta_v4 = meta_sk->sk_family == AF_INET; -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ int i; -+ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference(fm_ns->local); -+ -+ if (!meta_v4 && meta_sk->sk_ipv6only) -+ goto skip_ipv4; -+ -+ /* First, detect modifications or additions */ -+ mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { -+ struct in_addr ifa = mptcp_local->locaddr4[i].addr; -+ bool found = false; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (sk->sk_family == AF_INET6 && -+ !mptcp_v6_is_v4_mapped(sk)) -+ continue; -+ -+ if (inet_sk(sk)->inet_saddr != ifa.s_addr) -+ continue; -+ -+ found = true; -+ -+ if (mptcp_local->locaddr4[i].low_prio != tp->mptcp->low_prio) { -+ tp->mptcp->send_mp_prio = 1; -+ tp->mptcp->low_prio = mptcp_local->locaddr4[i].low_prio; -+ -+ tcp_send_ack(sk); -+ } -+ } -+ -+ if (!found) { -+ struct sock *sk; -+ -+ fmp->add_addr++; -+ mpcb->addr_signal = 1; -+ -+ sk = mptcp_select_ack_sock(meta_sk); -+ if (sk) -+ tcp_send_ack(sk); -+ full_mesh_create_subflows(meta_sk); -+ } -+ } -+ -+skip_ipv4: -+#if IS_ENABLED(CONFIG_IPV6) -+ /* skip IPv6 addresses if meta-socket is IPv4 */ -+ if (meta_v4) -+ goto removal; -+ -+ mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { -+ struct in6_addr ifa = mptcp_local->locaddr6[i].addr; -+ bool found = false; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (sk->sk_family == AF_INET || -+ mptcp_v6_is_v4_mapped(sk)) -+ continue; -+ -+ if (!ipv6_addr_equal(&inet6_sk(sk)->saddr, &ifa)) -+ continue; -+ -+ found = true; -+ -+ if (mptcp_local->locaddr6[i].low_prio != tp->mptcp->low_prio) { -+ tp->mptcp->send_mp_prio = 1; -+ tp->mptcp->low_prio = mptcp_local->locaddr6[i].low_prio; -+ -+ tcp_send_ack(sk); -+ } -+ } -+ -+ if (!found) { -+ struct sock *sk; -+ -+ fmp->add_addr++; -+ mpcb->addr_signal = 1; -+ -+ sk = mptcp_select_ack_sock(meta_sk); -+ if (sk) -+ tcp_send_ack(sk); -+ full_mesh_create_subflows(meta_sk); -+ } -+ } -+ -+removal: -+#endif -+ -+ /* Now, detect address-removals */ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ bool shall_remove = true; -+ -+ if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) { -+ mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { -+ if (inet_sk(sk)->inet_saddr == mptcp_local->locaddr4[i].addr.s_addr) { -+ shall_remove = false; -+ break; -+ } -+ } -+ } else { -+ mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { -+ if (ipv6_addr_equal(&inet6_sk(sk)->saddr, &mptcp_local->locaddr6[i].addr)) { -+ shall_remove = false; -+ break; -+ } -+ } -+ } -+ -+ if (shall_remove) { -+ /* Reinject, so that pf = 1 and so we -+ * won't select this one as the -+ * ack-sock. -+ */ -+ mptcp_reinject_data(sk, 0); -+ -+ announce_remove_addr(tcp_sk(sk)->mptcp->loc_id, -+ meta_sk); -+ -+ mptcp_sub_force_close(sk); -+ } -+ } -+ -+ /* Just call it optimistically. It actually cannot do any harm */ -+ update_addr_bitfields(meta_sk, mptcp_local); -+ -+ rcu_read_unlock_bh(); -+} -+ -+static int full_mesh_get_local_id(const struct sock *meta_sk, -+ sa_family_t family, union inet_addr *addr, -+ bool *low_prio) -+{ -+ struct mptcp_loc_addr *mptcp_local; -+ const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); -+ int index, id = -1; -+ -+ /* Handle the backup-flows */ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference(fm_ns->local); -+ -+ index = mptcp_find_address(mptcp_local, family, addr, 0); -+ -+ if (index != -1) { -+ if (family == AF_INET) { -+ id = mptcp_local->locaddr4[index].loc4_id; -+ *low_prio = mptcp_local->locaddr4[index].low_prio; -+ } else { -+ id = mptcp_local->locaddr6[index].loc6_id; -+ *low_prio = mptcp_local->locaddr6[index].low_prio; -+ } -+ } -+ -+ -+ rcu_read_unlock_bh(); -+ -+ return id; -+} -+ -+static void full_mesh_addr_signal(struct sock *sk, unsigned *size, -+ struct tcp_out_options *opts, -+ struct sk_buff *skb) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ struct sock *meta_sk = mpcb->meta_sk; -+ struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); -+ struct mptcp_loc_addr *mptcp_local; -+ struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk)); -+ int remove_addr_len; -+ u8 unannouncedv4 = 0, unannouncedv6 = 0; -+ bool meta_v4 = meta_sk->sk_family == AF_INET; -+ -+ mpcb->addr_signal = 0; -+ -+ if (likely(!fmp->add_addr)) -+ goto remove_addr; -+ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference(fm_ns->local); -+ -+ if (!meta_v4 && meta_sk->sk_ipv6only) -+ goto skip_ipv4; -+ -+ /* IPv4 */ -+ unannouncedv4 = (~fmp->announced_addrs_v4) & mptcp_local->loc4_bits; -+ if (unannouncedv4 && -+ ((mpcb->mptcp_ver == MPTCP_VERSION_0 && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN) || -+ (mpcb->mptcp_ver >= MPTCP_VERSION_1 && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1))) { -+ int ind = mptcp_find_free_index(~unannouncedv4); -+ -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_ADD_ADDR; -+ opts->add_addr4.addr_id = mptcp_local->locaddr4[ind].loc4_id; -+ opts->add_addr4.addr = mptcp_local->locaddr4[ind].addr; -+ opts->add_addr_v4 = 1; -+ if (mpcb->mptcp_ver >= MPTCP_VERSION_1) { -+ u8 mptcp_hash_mac[SHA256_DIGEST_SIZE]; -+ -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, -+ (u8 *)&mpcb->mptcp_rem_key, mptcp_hash_mac, 2, -+ 1, (u8 *)&mptcp_local->locaddr4[ind].loc4_id, -+ 4, (u8 *)&opts->add_addr4.addr.s_addr); -+ opts->add_addr4.trunc_mac = *(u64 *)&mptcp_hash_mac[SHA256_DIGEST_SIZE - sizeof(u64)]; -+ } -+ -+ if (skb) { -+ fmp->announced_addrs_v4 |= (1 << ind); -+ fmp->add_addr--; -+ } -+ -+ if (mpcb->mptcp_ver < MPTCP_VERSION_1) -+ *size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN; -+ if (mpcb->mptcp_ver >= MPTCP_VERSION_1) -+ *size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1; -+ -+ goto skip_ipv6; -+ } -+ -+ if (meta_v4) -+ goto skip_ipv6; -+skip_ipv4: -+ /* IPv6 */ -+ unannouncedv6 = (~fmp->announced_addrs_v6) & mptcp_local->loc6_bits; -+ if (unannouncedv6 && -+ ((mpcb->mptcp_ver == MPTCP_VERSION_0 && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN) || -+ (mpcb->mptcp_ver >= MPTCP_VERSION_1 && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1))) { -+ int ind = mptcp_find_free_index(~unannouncedv6); -+ -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_ADD_ADDR; -+ opts->add_addr6.addr_id = mptcp_local->locaddr6[ind].loc6_id; -+ opts->add_addr6.addr = mptcp_local->locaddr6[ind].addr; -+ opts->add_addr_v6 = 1; -+ if (mpcb->mptcp_ver >= MPTCP_VERSION_1) { -+ u8 mptcp_hash_mac[SHA256_DIGEST_SIZE]; -+ -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, -+ (u8 *)&mpcb->mptcp_rem_key, mptcp_hash_mac, 2, -+ 1, (u8 *)&mptcp_local->locaddr6[ind].loc6_id, -+ 16, (u8 *)&opts->add_addr6.addr.s6_addr); -+ opts->add_addr6.trunc_mac = *(u64 *)&mptcp_hash_mac[SHA256_DIGEST_SIZE - sizeof(u64)]; -+ } -+ -+ if (skb) { -+ fmp->announced_addrs_v6 |= (1 << ind); -+ fmp->add_addr--; -+ } -+ if (mpcb->mptcp_ver < MPTCP_VERSION_1) -+ *size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN; -+ if (mpcb->mptcp_ver >= MPTCP_VERSION_1) -+ *size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1; -+ } -+ -+skip_ipv6: -+ rcu_read_unlock_bh(); -+ -+ if (!unannouncedv4 && !unannouncedv6 && skb) -+ fmp->add_addr--; -+ -+remove_addr: -+ if (likely(!fmp->remove_addrs)) -+ goto exit; -+ -+ remove_addr_len = mptcp_sub_len_remove_addr_align(fmp->remove_addrs); -+ if (MAX_TCP_OPTION_SPACE - *size < remove_addr_len) -+ goto exit; -+ -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_REMOVE_ADDR; -+ opts->remove_addrs = fmp->remove_addrs; -+ *size += remove_addr_len; -+ if (skb) -+ fmp->remove_addrs = 0; -+ -+exit: -+ mpcb->addr_signal = !!(fmp->add_addr || fmp->remove_addrs); -+} -+ -+static void full_mesh_rem_raddr(struct mptcp_cb *mpcb, u8 rem_id) -+{ -+ mptcp_v4_rem_raddress(mpcb, rem_id); -+ mptcp_v6_rem_raddress(mpcb, rem_id); -+} -+ -+static void full_mesh_delete_subflow(struct sock *sk) -+{ -+ struct fullmesh_priv *fmp = fullmesh_get_priv(tcp_sk(sk)->mpcb); -+ struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk)); -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct mptcp_loc_addr *mptcp_local; -+ int index, i; -+ -+ if (!create_on_err) -+ return; -+ -+ if (!mptcp_can_new_subflow(meta_sk)) -+ return; -+ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference_bh(fm_ns->local); -+ -+ if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) { -+ union inet_addr saddr; -+ -+ saddr.ip = inet_sk(sk)->inet_saddr; -+ index = mptcp_find_address(mptcp_local, AF_INET, &saddr, -+ sk->sk_bound_dev_if); -+ if (index < 0) -+ goto out; -+ -+ mptcp_for_each_bit_set(fmp->rem4_bits, i) { -+ struct fullmesh_rem4 *rem4 = &fmp->remaddr4[i]; -+ -+ if (rem4->addr.s_addr != sk->sk_daddr) -+ continue; -+ -+ if (rem4->port && rem4->port != inet_sk(sk)->inet_dport) -+ continue; -+ -+ rem4->bitfield &= ~(1 << index); -+ } -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { -+ union inet_addr saddr; -+ -+ saddr.in6 = inet6_sk(sk)->saddr; -+ index = mptcp_find_address(mptcp_local, AF_INET6, &saddr, -+ sk->sk_bound_dev_if); -+ if (index < 0) -+ goto out; -+ -+ mptcp_for_each_bit_set(fmp->rem6_bits, i) { -+ struct fullmesh_rem6 *rem6 = &fmp->remaddr6[i]; -+ -+ if (!ipv6_addr_equal(&rem6->addr, &sk->sk_v6_daddr)) -+ continue; -+ -+ if (rem6->port && rem6->port != inet_sk(sk)->inet_dport) -+ continue; -+ -+ rem6->bitfield &= ~(1 << index); -+ } -+#endif -+ } -+ -+out: -+ rcu_read_unlock_bh(); -+ -+ /* re-schedule the creation of failed subflows */ -+ if (tcp_sk(sk)->mptcp->sk_err == ETIMEDOUT || sk->sk_err == ETIMEDOUT) -+ full_mesh_create_subflows(meta_sk); -+} -+ -+/* Output /proc/net/mptcp_fullmesh */ -+static int mptcp_fm_seq_show(struct seq_file *seq, void *v) -+{ -+ const struct net *net = seq->private; -+ struct mptcp_loc_addr *mptcp_local; -+ const struct mptcp_fm_ns *fm_ns = fm_get_ns(net); -+ int i; -+ -+ seq_printf(seq, "Index, Address-ID, Backup, IP-address, if-idx\n"); -+ -+ rcu_read_lock_bh(); -+ mptcp_local = rcu_dereference(fm_ns->local); -+ -+ seq_printf(seq, "IPv4, next v4-index: %u\n", mptcp_local->next_v4_index); -+ -+ mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { -+ struct mptcp_loc4 *loc4 = &mptcp_local->locaddr4[i]; -+ -+ seq_printf(seq, "%u, %u, %u, %pI4, %u\n", i, loc4->loc4_id, -+ loc4->low_prio, &loc4->addr, loc4->if_idx); -+ } -+ -+ seq_printf(seq, "IPv6, next v6-index: %u\n", mptcp_local->next_v6_index); -+ -+ mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { -+ struct mptcp_loc6 *loc6 = &mptcp_local->locaddr6[i]; -+ -+ seq_printf(seq, "%u, %u, %u, %pI6, %u\n", i, loc6->loc6_id, -+ loc6->low_prio, &loc6->addr, loc6->if_idx); -+ } -+ rcu_read_unlock_bh(); -+ -+ return 0; -+} -+ -+static int mptcp_fm_init_net(struct net *net) -+{ -+ struct mptcp_loc_addr *mptcp_local; -+ struct mptcp_fm_ns *fm_ns; -+ int err = 0; -+ -+ fm_ns = kzalloc(sizeof(*fm_ns), GFP_KERNEL); -+ if (!fm_ns) -+ return -ENOBUFS; -+ -+ mptcp_local = kzalloc(sizeof(*mptcp_local), GFP_KERNEL); -+ if (!mptcp_local) { -+ err = -ENOBUFS; -+ goto err_mptcp_local; -+ } -+ -+ if (!proc_create_net_single("mptcp_fullmesh", S_IRUGO, net->proc_net, -+ mptcp_fm_seq_show, NULL)) { -+ err = -ENOMEM; -+ goto err_seq_fops; -+ } -+ -+ mptcp_local->next_v4_index = 1; -+ -+ rcu_assign_pointer(fm_ns->local, mptcp_local); -+ INIT_DELAYED_WORK(&fm_ns->address_worker, mptcp_address_worker); -+ INIT_LIST_HEAD(&fm_ns->events); -+ spin_lock_init(&fm_ns->local_lock); -+ fm_ns->net = net; -+ net->mptcp.path_managers[MPTCP_PM_FULLMESH] = fm_ns; -+ -+ return 0; -+err_seq_fops: -+ kfree(mptcp_local); -+err_mptcp_local: -+ kfree(fm_ns); -+ return err; -+} -+ -+static void mptcp_fm_exit_net(struct net *net) -+{ -+ struct mptcp_addr_event *eventq, *tmp; -+ struct mptcp_fm_ns *fm_ns; -+ struct mptcp_loc_addr *mptcp_local; -+ -+ fm_ns = fm_get_ns(net); -+ cancel_delayed_work_sync(&fm_ns->address_worker); -+ -+ rcu_read_lock_bh(); -+ -+ mptcp_local = rcu_dereference_bh(fm_ns->local); -+ kfree_rcu(mptcp_local, rcu); -+ -+ spin_lock(&fm_ns->local_lock); -+ list_for_each_entry_safe(eventq, tmp, &fm_ns->events, list) { -+ list_del(&eventq->list); -+ kfree(eventq); -+ } -+ spin_unlock(&fm_ns->local_lock); -+ -+ rcu_read_unlock_bh(); -+ -+ remove_proc_entry("mptcp_fullmesh", net->proc_net); -+ -+ kfree(fm_ns); -+} -+ -+static struct pernet_operations full_mesh_net_ops = { -+ .init = mptcp_fm_init_net, -+ .exit = mptcp_fm_exit_net, -+}; -+ -+static struct mptcp_pm_ops full_mesh __read_mostly = { -+ .new_session = full_mesh_new_session, -+ .release_sock = full_mesh_release_sock, -+ .fully_established = full_mesh_create_subflows, -+ .new_remote_address = full_mesh_create_subflows, -+ .get_local_id = full_mesh_get_local_id, -+ .addr_signal = full_mesh_addr_signal, -+ .add_raddr = full_mesh_add_raddr, -+ .rem_raddr = full_mesh_rem_raddr, -+ .delete_subflow = full_mesh_delete_subflow, -+ .name = "fullmesh", -+ .owner = THIS_MODULE, -+}; -+ -+/* General initialization of MPTCP_PM */ -+static int __init full_mesh_register(void) -+{ -+ int ret; -+ -+ BUILD_BUG_ON(sizeof(struct fullmesh_priv) > MPTCP_PM_SIZE); -+ -+ ret = register_pernet_subsys(&full_mesh_net_ops); -+ if (ret) -+ goto out; -+ -+ ret = register_inetaddr_notifier(&mptcp_pm_inetaddr_notifier); -+ if (ret) -+ goto err_reg_inetaddr; -+ ret = register_netdevice_notifier(&mptcp_pm_netdev_notifier); -+ if (ret) -+ goto err_reg_netdev; -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ ret = register_inet6addr_notifier(&inet6_addr_notifier); -+ if (ret) -+ goto err_reg_inet6addr; -+#endif -+ -+ ret = mptcp_register_path_manager(&full_mesh); -+ if (ret) -+ goto err_reg_pm; -+ -+out: -+ return ret; -+ -+ -+err_reg_pm: -+#if IS_ENABLED(CONFIG_IPV6) -+ unregister_inet6addr_notifier(&inet6_addr_notifier); -+err_reg_inet6addr: -+#endif -+ unregister_netdevice_notifier(&mptcp_pm_netdev_notifier); -+err_reg_netdev: -+ unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier); -+err_reg_inetaddr: -+ unregister_pernet_subsys(&full_mesh_net_ops); -+ goto out; -+} -+ -+static void full_mesh_unregister(void) -+{ -+#if IS_ENABLED(CONFIG_IPV6) -+ unregister_inet6addr_notifier(&inet6_addr_notifier); -+#endif -+ unregister_netdevice_notifier(&mptcp_pm_netdev_notifier); -+ unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier); -+ unregister_pernet_subsys(&full_mesh_net_ops); -+ mptcp_unregister_path_manager(&full_mesh); -+} -+ -+module_init(full_mesh_register); -+module_exit(full_mesh_unregister); -+ -+MODULE_AUTHOR("Christoph Paasch"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("Full-Mesh MPTCP"); -+MODULE_VERSION("0.88"); -diff --git a/net/mptcp/mptcp_input.c b/net/mptcp/mptcp_input.c -new file mode 100644 -index 000000000000..ae9cc7209613 ---- /dev/null -+++ b/net/mptcp/mptcp_input.c -@@ -0,0 +1,2546 @@ -+/* -+ * MPTCP implementation - Sending side -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+ -+#include -+#include -+#include -+ -+#include -+ -+/* is seq1 < seq2 ? */ -+static inline bool before64(const u64 seq1, const u64 seq2) -+{ -+ return (s64)(seq1 - seq2) < 0; -+} -+ -+/* is seq1 > seq2 ? */ -+#define after64(seq1, seq2) before64(seq2, seq1) -+ -+static inline void mptcp_become_fully_estab(struct sock *sk) -+{ -+ tcp_sk(sk)->mptcp->fully_established = 1; -+ -+ if (is_master_tp(tcp_sk(sk)) && -+ tcp_sk(sk)->mpcb->pm_ops->fully_established) -+ tcp_sk(sk)->mpcb->pm_ops->fully_established(mptcp_meta_sk(sk)); -+} -+ -+/* Similar to tcp_tso_acked without any memory accounting */ -+static inline int mptcp_tso_acked_reinject(const struct sock *meta_sk, -+ struct sk_buff *skb) -+{ -+ const struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ u32 packets_acked, len, delta_truesize; -+ -+ BUG_ON(!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)); -+ -+ packets_acked = tcp_skb_pcount(skb); -+ -+ if (skb_unclone(skb, GFP_ATOMIC)) -+ return 0; -+ -+ len = meta_tp->snd_una - TCP_SKB_CB(skb)->seq; -+ delta_truesize = __pskb_trim_head(skb, len); -+ -+ TCP_SKB_CB(skb)->seq += len; -+ skb->ip_summed = CHECKSUM_PARTIAL; -+ -+ if (delta_truesize) -+ skb->truesize -= delta_truesize; -+ -+ /* Any change of skb->len requires recalculation of tso factor. */ -+ if (tcp_skb_pcount(skb) > 1) -+ tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb)); -+ packets_acked -= tcp_skb_pcount(skb); -+ -+ if (packets_acked) { -+ BUG_ON(tcp_skb_pcount(skb) == 0); -+ BUG_ON(!before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)); -+ } -+ -+ return packets_acked; -+} -+ -+/* Cleans the meta-socket retransmission queue and the reinject-queue. */ -+static void mptcp_clean_rtx_queue(struct sock *meta_sk, u32 prior_snd_una) -+{ -+ struct sk_buff *skb, *tmp, *next; -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ bool fully_acked = true; -+ bool acked = false; -+ u32 acked_pcount; -+ -+ for (skb = skb_rb_first(&meta_sk->tcp_rtx_queue); skb; skb = next) { -+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb); -+ -+ tcp_ack_tstamp(meta_sk, skb, prior_snd_una); -+ -+ if (after(scb->end_seq, meta_tp->snd_una)) { -+ if (tcp_skb_pcount(skb) == 1 || -+ !after(meta_tp->snd_una, scb->seq)) -+ break; -+ -+ acked_pcount = tcp_tso_acked(meta_sk, skb); -+ if (!acked_pcount) -+ break; -+ fully_acked = false; -+ } else { -+ acked_pcount = tcp_skb_pcount(skb); -+ } -+ -+ acked = true; -+ meta_tp->packets_out -= acked_pcount; -+ meta_tp->retrans_stamp = 0; -+ -+ if (!fully_acked) -+ break; -+ -+ next = skb_rb_next(skb); -+ -+ if (mptcp_is_data_fin(skb)) { -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ /* DATA_FIN has been acknowledged - now we can close -+ * the subflows -+ */ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ unsigned long delay = 0; -+ -+ /* If we are the passive closer, don't trigger -+ * subflow-fin until the subflow has been finned -+ * by the peer - thus we add a delay. -+ */ -+ if (mpcb->passive_close && -+ sk_it->sk_state == TCP_ESTABLISHED) -+ delay = inet_csk(sk_it)->icsk_rto << 3; -+ -+ mptcp_sub_close(sk_it, delay); -+ } -+ } -+ tcp_rtx_queue_unlink_and_free(skb, meta_sk); -+ } -+ /* Remove acknowledged data from the reinject queue */ -+ skb_queue_walk_safe(&mpcb->reinject_queue, skb, tmp) { -+ if (before(meta_tp->snd_una, TCP_SKB_CB(skb)->end_seq)) { -+ if (tcp_skb_pcount(skb) == 1 || -+ !after(meta_tp->snd_una, TCP_SKB_CB(skb)->seq)) -+ break; -+ -+ mptcp_tso_acked_reinject(meta_sk, skb); -+ break; -+ } -+ -+ __skb_unlink(skb, &mpcb->reinject_queue); -+ __kfree_skb(skb); -+ } -+ -+ if (likely(between(meta_tp->snd_up, prior_snd_una, meta_tp->snd_una))) -+ meta_tp->snd_up = meta_tp->snd_una; -+ -+ if (acked) { -+ tcp_rearm_rto(meta_sk); -+ /* Normally this is done in tcp_try_undo_loss - but MPTCP -+ * does not call this function. -+ */ -+ inet_csk(meta_sk)->icsk_retransmits = 0; -+ } -+} -+ -+/* Inspired by tcp_rcv_state_process */ -+/* Returns 0 if processing the packet can continue -+ * -1 if connection was closed with an active reset -+ * 1 if connection was closed and processing should stop. -+ */ -+static int mptcp_rcv_state_process(struct sock *meta_sk, struct sock *sk, -+ const struct sk_buff *skb, u32 data_seq, -+ u16 data_len) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk); -+ const struct tcphdr *th = tcp_hdr(skb); -+ -+ /* State-machine handling if FIN has been enqueued and he has -+ * been acked (snd_una == write_seq) - it's important that this -+ * here is after sk_wmem_free_skb because otherwise -+ * sk_forward_alloc is wrong upon inet_csk_destroy_sock() -+ */ -+ switch (meta_sk->sk_state) { -+ case TCP_FIN_WAIT1: { -+ struct dst_entry *dst; -+ int tmo; -+ -+ if (meta_tp->snd_una != meta_tp->write_seq) -+ break; -+ -+ tcp_set_state(meta_sk, TCP_FIN_WAIT2); -+ meta_sk->sk_shutdown |= SEND_SHUTDOWN; -+ -+ dst = __sk_dst_get(sk); -+ if (dst) -+ dst_confirm(dst); -+ -+ if (!sock_flag(meta_sk, SOCK_DEAD)) { -+ /* Wake up lingering close() */ -+ meta_sk->sk_state_change(meta_sk); -+ break; -+ } -+ -+ if (meta_tp->linger2 < 0 || -+ (data_len && -+ after(data_seq + data_len - (mptcp_is_data_fin2(skb, tp) ? 1 : 0), -+ meta_tp->rcv_nxt))) { -+ mptcp_send_active_reset(meta_sk, GFP_ATOMIC); -+ tcp_done(meta_sk); -+ NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA); -+ return -1; -+ } -+ -+ tmo = tcp_fin_time(meta_sk); -+ if (tmo > TCP_TIMEWAIT_LEN) { -+ inet_csk_reset_keepalive_timer(meta_sk, tmo - TCP_TIMEWAIT_LEN); -+ } else if (mptcp_is_data_fin2(skb, tp) || sock_owned_by_user(meta_sk)) { -+ /* Bad case. We could lose such FIN otherwise. -+ * It is not a big problem, but it looks confusing -+ * and not so rare event. We still can lose it now, -+ * if it spins in bh_lock_sock(), but it is really -+ * marginal case. -+ */ -+ inet_csk_reset_keepalive_timer(meta_sk, tmo); -+ } else { -+ meta_tp->ops->time_wait(meta_sk, TCP_FIN_WAIT2, tmo); -+ } -+ break; -+ } -+ case TCP_CLOSING: -+ case TCP_LAST_ACK: -+ if (meta_tp->snd_una == meta_tp->write_seq) { -+ tcp_done(meta_sk); -+ return 1; -+ } -+ break; -+ } -+ -+ /* step 7: process the segment text */ -+ switch (meta_sk->sk_state) { -+ case TCP_FIN_WAIT1: -+ case TCP_FIN_WAIT2: -+ /* RFC 793 says to queue data in these states, -+ * RFC 1122 says we MUST send a reset. -+ * BSD 4.4 also does reset. -+ */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN) { -+ if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && -+ after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt) && -+ !mptcp_is_data_fin2(skb, tp)) { -+ NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPABORTONDATA); -+ mptcp_send_active_reset(meta_sk, GFP_ATOMIC); -+ tcp_reset(meta_sk); -+ return -1; -+ } -+ } -+ break; -+ } -+ -+ return 0; -+} -+ -+/** -+ * @return: -+ * i) 1: Everything's fine. -+ * ii) -1: A reset has been sent on the subflow - csum-failure -+ * iii) 0: csum-failure but no reset sent, because it's the last subflow. -+ * Last packet should not be destroyed by the caller because it has -+ * been done here. -+ */ -+static int mptcp_verif_dss_csum(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct sk_buff *tmp, *tmp1, *last = NULL; -+ __wsum csum_tcp = 0; /* cumulative checksum of pld + mptcp-header */ -+ int ans = 1, overflowed = 0, offset = 0, dss_csum_added = 0; -+ int iter = 0; -+ u32 next_seq, offset_seq; -+ -+ skb_queue_walk_safe(&sk->sk_receive_queue, tmp, tmp1) { -+ unsigned int csum_len; -+ -+ /* init next seq in first round */ -+ if (!iter) -+ next_seq = TCP_SKB_CB(tmp)->seq; -+ offset_seq = next_seq - TCP_SKB_CB(tmp)->seq; -+ -+ if (before(tp->mptcp->map_subseq + tp->mptcp->map_data_len, TCP_SKB_CB(tmp)->end_seq)) -+ /* Mapping ends in the middle of the packet - -+ * csum only these bytes -+ */ -+ csum_len = tp->mptcp->map_subseq + tp->mptcp->map_data_len - TCP_SKB_CB(tmp)->seq; -+ else -+ csum_len = tmp->len; -+ -+ csum_len -= offset_seq; -+ offset = 0; -+ if (overflowed) { -+ char first_word[4]; -+ first_word[0] = 0; -+ first_word[1] = 0; -+ first_word[2] = 0; -+ first_word[3] = *(tmp->data + offset_seq); -+ csum_tcp = csum_partial(first_word, 4, csum_tcp); -+ offset = 1; -+ csum_len--; -+ overflowed = 0; -+ } -+ -+ csum_tcp = skb_checksum(tmp, offset + offset_seq, csum_len, -+ csum_tcp); -+ -+ /* Was it on an odd-length? Then we have to merge the next byte -+ * correctly (see above) -+ */ -+ if (csum_len != (csum_len & (~1))) -+ overflowed = 1; -+ -+ if (mptcp_is_data_seq(tmp) && !dss_csum_added) { -+ __be32 data_seq = htonl((u32)(tp->mptcp->map_data_seq >> 32)); -+ -+ /* If a 64-bit dss is present, we increase the offset -+ * by 4 bytes, as the high-order 64-bits will be added -+ * in the final csum_partial-call. -+ */ -+ u32 offset = skb_transport_offset(tmp) + -+ TCP_SKB_CB(tmp)->dss_off; -+ if (TCP_SKB_CB(tmp)->mptcp_flags & MPTCPHDR_SEQ64_SET) -+ offset += 4; -+ -+ csum_tcp = skb_checksum(tmp, offset, -+ MPTCP_SUB_LEN_SEQ_CSUM, -+ csum_tcp); -+ -+ csum_tcp = csum_partial(&data_seq, -+ sizeof(data_seq), csum_tcp); -+ -+ dss_csum_added = 1; /* Just do it once */ -+ } else if (mptcp_is_data_mpcapable(tmp) && !dss_csum_added) { -+ u32 offset = skb_transport_offset(tmp) + TCP_SKB_CB(tmp)->dss_off; -+ __be64 data_seq = htonll(tp->mptcp->map_data_seq); -+ __be32 rel_seq = htonl(tp->mptcp->map_subseq - tp->mptcp->rcv_isn); -+ -+ csum_tcp = csum_partial(&data_seq, sizeof(data_seq), csum_tcp); -+ csum_tcp = csum_partial(&rel_seq, sizeof(rel_seq), csum_tcp); -+ -+ csum_tcp = skb_checksum(tmp, offset, 4, csum_tcp); -+ -+ dss_csum_added = 1; -+ } -+ last = tmp; -+ iter++; -+ -+ if (!skb_queue_is_last(&sk->sk_receive_queue, tmp) && -+ !before(TCP_SKB_CB(tmp1)->seq, -+ tp->mptcp->map_subseq + tp->mptcp->map_data_len)) -+ break; -+ next_seq = TCP_SKB_CB(tmp)->end_seq; -+ } -+ -+ /* Now, checksum must be 0 */ -+ if (unlikely(csum_fold(csum_tcp))) { -+ struct mptcp_tcp_sock *mptcp; -+ struct sock *sk_it = NULL; -+ -+ pr_debug("%s csum is wrong: %#x tcp-seq %u dss_csum_added %d overflowed %d iterations %d\n", -+ __func__, csum_fold(csum_tcp), TCP_SKB_CB(last)->seq, -+ dss_csum_added, overflowed, iter); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CSUMFAIL); -+ tp->mptcp->send_mp_fail = 1; -+ -+ /* map_data_seq is the data-seq number of the -+ * mapping we are currently checking -+ */ -+ tp->mpcb->csum_cutoff_seq = tp->mptcp->map_data_seq; -+ -+ /* Search for another subflow that is fully established */ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ sk_it = mptcp_to_sock(mptcp); -+ -+ if (sk_it != sk && -+ tcp_sk(sk_it)->mptcp->fully_established) -+ break; -+ -+ sk_it = NULL; -+ } -+ -+ if (sk_it) { -+ mptcp_send_reset(sk); -+ ans = -1; -+ } else { -+ tp->mpcb->send_infinite_mapping = 1; -+ -+ /* Need to purge the rcv-queue as it's no more valid */ -+ while ((tmp = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { -+ tp->copied_seq = TCP_SKB_CB(tmp)->end_seq; -+ kfree_skb(tmp); -+ } -+ -+ mptcp_fallback_close(tp->mpcb, sk); -+ -+ ans = 0; -+ } -+ } -+ -+ return ans; -+} -+ -+static inline void mptcp_prepare_skb(struct sk_buff *skb, -+ const struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -+ u32 inc = 0, end_seq = tcb->end_seq; -+ -+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) -+ end_seq--; -+ /* If skb is the end of this mapping (end is always at mapping-boundary -+ * thanks to the splitting/trimming), then we need to increase -+ * data-end-seq by 1 if this here is a data-fin. -+ * -+ * We need to do -1 because end_seq includes the subflow-FIN. -+ */ -+ if (tp->mptcp->map_data_fin && -+ end_seq == tp->mptcp->map_subseq + tp->mptcp->map_data_len) { -+ inc = 1; -+ -+ /* We manually set the fin-flag if it is a data-fin. For easy -+ * processing in tcp_recvmsg. -+ */ -+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; -+ } else { -+ /* We may have a subflow-fin with data but without data-fin */ -+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_FIN; -+ } -+ -+ /* Adapt data-seq's to the packet itself. We kinda transform the -+ * dss-mapping to a per-packet granularity. This is necessary to -+ * correctly handle overlapping mappings coming from different -+ * subflows. Otherwise it would be a complete mess. -+ */ -+ tcb->seq = ((u32)tp->mptcp->map_data_seq) + tcb->seq - tp->mptcp->map_subseq; -+ tcb->end_seq = tcb->seq + skb->len + inc; -+} -+ -+static inline void mptcp_reset_mapping(struct tcp_sock *tp, u32 old_copied_seq) -+{ -+ tp->mptcp->map_data_len = 0; -+ tp->mptcp->map_data_seq = 0; -+ tp->mptcp->map_subseq = 0; -+ tp->mptcp->map_data_fin = 0; -+ tp->mptcp->mapping_present = 0; -+ -+ /* In infinite mapping receiver mode, we have to advance the implied -+ * data-sequence number when we progress the subflow's data. -+ */ -+ if (tp->mpcb->infinite_mapping_rcv) -+ tp->mpcb->infinite_rcv_seq += (tp->copied_seq - old_copied_seq); -+} -+ -+/* The DSS-mapping received on the sk only covers the second half of the skb -+ * (cut at seq). We trim the head from the skb. -+ * Data will be freed upon kfree(). -+ * -+ * Inspired by tcp_trim_head(). -+ */ -+static void mptcp_skb_trim_head(struct sk_buff *skb, struct sock *sk, u32 seq) -+{ -+ int len = seq - TCP_SKB_CB(skb)->seq; -+ u32 new_seq = TCP_SKB_CB(skb)->seq + len; -+ u32 delta_truesize; -+ -+ delta_truesize = __pskb_trim_head(skb, len); -+ -+ TCP_SKB_CB(skb)->seq = new_seq; -+ -+ if (delta_truesize) { -+ skb->truesize -= delta_truesize; -+ atomic_sub(delta_truesize, &sk->sk_rmem_alloc); -+ sk_mem_uncharge(sk, delta_truesize); -+ } -+} -+ -+/* The DSS-mapping received on the sk only covers the first half of the skb -+ * (cut at seq). We create a second skb (@return), and queue it in the rcv-queue -+ * as further packets may resolve the mapping of the second half of data. -+ * -+ * Inspired by tcp_fragment(). -+ */ -+static int mptcp_skb_split_tail(struct sk_buff *skb, struct sock *sk, u32 seq) -+{ -+ struct sk_buff *buff; -+ int nsize; -+ int nlen, len; -+ u8 flags; -+ -+ len = seq - TCP_SKB_CB(skb)->seq; -+ nsize = skb_headlen(skb) - len + tcp_sk(sk)->tcp_header_len; -+ if (nsize < 0) -+ nsize = 0; -+ -+ /* Get a new skb... force flag on. */ -+ buff = alloc_skb(nsize, GFP_ATOMIC); -+ if (buff == NULL) -+ return -ENOMEM; -+ -+ skb_reserve(buff, tcp_sk(sk)->tcp_header_len); -+ skb_reset_transport_header(buff); -+ -+ flags = TCP_SKB_CB(skb)->tcp_flags; -+ TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN); -+ TCP_SKB_CB(buff)->tcp_flags = flags; -+ -+ /* We absolutly need to call skb_set_owner_r before refreshing the -+ * truesize of buff, otherwise the moved data will account twice. -+ */ -+ skb_set_owner_r(buff, sk); -+ nlen = skb->len - len - nsize; -+ buff->truesize += nlen; -+ skb->truesize -= nlen; -+ -+ /* Correct the sequence numbers. */ -+ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; -+ TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; -+ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; -+ -+ skb_split(skb, buff, len); -+ -+ __skb_queue_after(&sk->sk_receive_queue, skb, buff); -+ -+ return 0; -+} -+ -+/* @return: 0 everything is fine. Just continue processing -+ * 1 subflow is broken stop everything -+ * -1 this packet was broken - continue with the next one. -+ */ -+static int mptcp_prevalidate_skb(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ -+ /* If we are in infinite mode, the subflow-fin is in fact a data-fin. */ -+ if (!skb->len && (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && -+ !mptcp_is_data_fin(skb) && !mpcb->infinite_mapping_rcv) { -+ /* Remove a pure subflow-fin from the queue and increase -+ * copied_seq. -+ */ -+ tp->copied_seq = TCP_SKB_CB(skb)->end_seq; -+ __skb_unlink(skb, &sk->sk_receive_queue); -+ __kfree_skb(skb); -+ return -1; -+ } -+ -+ /* If we are not yet fully established and do not know the mapping for -+ * this segment, this path has to fallback to infinite or be torn down. -+ */ -+ if (!tp->mptcp->fully_established && !mptcp_is_data_seq(skb) && -+ !mptcp_is_data_mpcapable(skb) && -+ !tp->mptcp->mapping_present && !mpcb->infinite_mapping_rcv) { -+ pr_debug("%s %#x will fallback - pi %d from %pS, seq %u mptcp-flags %#x\n", -+ __func__, mpcb->mptcp_loc_token, -+ tp->mptcp->path_index, __builtin_return_address(0), -+ TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->mptcp_flags); -+ -+ if (!is_master_tp(tp)) { -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBDATASUB); -+ mptcp_send_reset(sk); -+ return 1; -+ } -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FBDATAINIT); -+ -+ mpcb->infinite_mapping_snd = 1; -+ mpcb->infinite_mapping_rcv = 1; -+ mpcb->infinite_rcv_seq = mptcp_get_rcv_nxt_64(mptcp_meta_tp(tp)); -+ -+ mptcp_fallback_close(mpcb, sk); -+ -+ /* We do a seamless fallback and should not send a inf.mapping. */ -+ mpcb->send_infinite_mapping = 0; -+ tp->mptcp->fully_established = 1; -+ } -+ -+ /* Receiver-side becomes fully established when a whole rcv-window has -+ * been received without the need to fallback due to the previous -+ * condition. -+ */ -+ if (!tp->mptcp->fully_established) { -+ tp->mptcp->init_rcv_wnd -= skb->len; -+ if (tp->mptcp->init_rcv_wnd < 0) -+ mptcp_become_fully_estab(sk); -+ } -+ -+ return 0; -+} -+ -+static void mptcp_restart_sending(struct sock *meta_sk) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct sk_buff *wq_head, *skb, *tmp; -+ -+ skb = tcp_rtx_queue_head(meta_sk); -+ -+ /* We resend everything that has not been acknowledged, thus we need -+ * to move it from the rtx-tree to the write-queue. -+ */ -+ wq_head = tcp_write_queue_head(meta_sk); -+ -+ skb_rbtree_walk_from_safe(skb, tmp) { -+ list_del(&skb->tcp_tsorted_anchor); -+ tcp_rtx_queue_unlink(skb, meta_sk); -+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); -+ -+ if (wq_head) -+ __skb_queue_before(&meta_sk->sk_write_queue, wq_head, skb); -+ else -+ tcp_add_write_queue_tail(meta_sk, skb); -+ } -+ -+ /* We artificially restart the whole send-queue. Thus, -+ * it is as if no packets are in flight -+ */ -+ meta_tp->packets_out = 0; -+ -+ /* If the snd_nxt already wrapped around, we have to -+ * undo the wrapping, as we are restarting from snd_una -+ * on. -+ */ -+ if (meta_tp->snd_nxt < meta_tp->snd_una) { -+ mpcb->snd_high_order[mpcb->snd_hiseq_index] -= 2; -+ mpcb->snd_hiseq_index = mpcb->snd_hiseq_index ? 0 : 1; -+ } -+ meta_tp->snd_nxt = meta_tp->snd_una; -+ -+ /* Trigger a sending on the meta. */ -+ mptcp_push_pending_frames(meta_sk); -+} -+ -+/* @return: 0 everything is fine. Just continue processing -+ * 1 subflow is broken stop everything -+ * -1 this packet was broken - continue with the next one. -+ */ -+static int mptcp_detect_mapping(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -+ u32 *ptr; -+ u32 data_seq, sub_seq, data_len, tcp_end_seq; -+ bool set_infinite_rcv = false; -+ -+ /* If we are in infinite-mapping-mode, the subflow is guaranteed to be -+ * in-order at the data-level. Thus data-seq-numbers can be inferred -+ * from what is expected at the data-level. -+ */ -+ if (mpcb->infinite_mapping_rcv) { -+ /* copied_seq may be bigger than tcb->seq (e.g., when the peer -+ * retransmits data that actually has already been acknowledged with -+ * newer data, if he did not receive our acks). Thus, we need -+ * to account for this overlap as well. -+ */ -+ tp->mptcp->map_data_seq = mpcb->infinite_rcv_seq - (tp->copied_seq - tcb->seq); -+ tp->mptcp->map_subseq = tcb->seq; -+ tp->mptcp->map_data_len = skb->len; -+ tp->mptcp->map_data_fin = !!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN); -+ tp->mptcp->mapping_present = 1; -+ return 0; -+ } -+ -+ if (!tp->mptcp->mapping_present && mptcp_is_data_mpcapable(skb)) { -+ __u32 *ptr = (__u32 *)(skb_transport_header(skb) + TCP_SKB_CB(skb)->dss_off); -+ -+ sub_seq = 1 + tp->mptcp->rcv_isn; -+ data_seq = meta_tp->rcv_nxt; -+ data_len = get_unaligned_be16(ptr); -+ } else if (!mptcp_is_data_seq(skb)) { -+ /* No mapping here? -+ * Exit - it is either already set or still on its way -+ */ -+ if (!tp->mptcp->mapping_present && -+ tp->rcv_nxt - tp->copied_seq > 65536) { -+ /* Too many packets without a mapping, -+ * this subflow is broken -+ */ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW); -+ mptcp_send_reset(sk); -+ return 1; -+ } -+ -+ return 0; -+ } else { -+ /* Well, then the DSS-mapping is there. So, read it! */ -+ ptr = mptcp_skb_set_data_seq(skb, &data_seq, mpcb); -+ ptr++; -+ sub_seq = get_unaligned_be32(ptr) + tp->mptcp->rcv_isn; -+ ptr++; -+ data_len = get_unaligned_be16(ptr); -+ } -+ -+ /* If it's an empty skb with DATA_FIN, sub_seq must get fixed. -+ * The draft sets it to 0, but we really would like to have the -+ * real value, to have an easy handling afterwards here in this -+ * function. -+ */ -+ if (mptcp_is_data_fin(skb) && skb->len == 0) -+ sub_seq = TCP_SKB_CB(skb)->seq; -+ -+ /* If there is already a mapping - we check if it maps with the current -+ * one. If not - we reset. -+ */ -+ if (tp->mptcp->mapping_present && -+ (data_seq != (u32)tp->mptcp->map_data_seq || -+ sub_seq != tp->mptcp->map_subseq || -+ data_len != tp->mptcp->map_data_len + tp->mptcp->map_data_fin || -+ mptcp_is_data_fin(skb) != tp->mptcp->map_data_fin)) { -+ /* Mapping in packet is different from what we want */ -+ pr_debug("%s Mappings do not match!\n", __func__); -+ pr_debug("%s dseq %u mdseq %u, sseq %u msseq %u dlen %u mdlen %u dfin %d mdfin %d\n", -+ __func__, data_seq, (u32)tp->mptcp->map_data_seq, -+ sub_seq, tp->mptcp->map_subseq, data_len, -+ tp->mptcp->map_data_len, mptcp_is_data_fin(skb), -+ tp->mptcp->map_data_fin); -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DSSNOMATCH); -+ mptcp_send_reset(sk); -+ return 1; -+ } -+ -+ /* If the previous check was good, the current mapping is valid and we exit. */ -+ if (tp->mptcp->mapping_present) -+ return 0; -+ -+ /* Mapping not yet set on this subflow - we set it here! */ -+ -+ if (!data_len) { -+ mpcb->infinite_mapping_rcv = 1; -+ mpcb->send_infinite_mapping = 1; -+ tp->mptcp->fully_established = 1; -+ /* We need to repeat mp_fail's until the sender felt -+ * back to infinite-mapping - here we stop repeating it. -+ */ -+ tp->mptcp->send_mp_fail = 0; -+ -+ /* We have to fixup data_len - it must be the same as skb->len */ -+ data_len = skb->len + (mptcp_is_data_fin(skb) ? 1 : 0); -+ sub_seq = tcb->seq; -+ -+ mptcp_restart_sending(tp->meta_sk); -+ -+ mptcp_fallback_close(mpcb, sk); -+ -+ /* data_seq and so on are set correctly */ -+ -+ /* At this point, the meta-ofo-queue has to be emptied, -+ * as the following data is guaranteed to be in-order at -+ * the data and subflow-level -+ */ -+ skb_rbtree_purge(&meta_tp->out_of_order_queue); -+ -+ set_infinite_rcv = true; -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_INFINITEMAPRX); -+ } -+ -+ /* We are sending mp-fail's and thus are in fallback mode. -+ * Ignore packets which do not announce the fallback and still -+ * want to provide a mapping. -+ */ -+ if (tp->mptcp->send_mp_fail) { -+ tp->copied_seq = TCP_SKB_CB(skb)->end_seq; -+ __skb_unlink(skb, &sk->sk_receive_queue); -+ __kfree_skb(skb); -+ return -1; -+ } -+ -+ /* FIN increased the mapping-length by 1 */ -+ if (mptcp_is_data_fin(skb)) -+ data_len--; -+ -+ /* Subflow-sequences of packet must be -+ * (at least partially) be part of the DSS-mapping's -+ * subflow-sequence-space. -+ * -+ * Basically the mapping is not valid, if either of the -+ * following conditions is true: -+ * -+ * 1. It's not a data_fin and -+ * MPTCP-sub_seq >= TCP-end_seq -+ * -+ * 2. It's a data_fin and TCP-end_seq > TCP-seq and -+ * MPTCP-sub_seq >= TCP-end_seq -+ * -+ * The previous two can be merged into: -+ * TCP-end_seq > TCP-seq and MPTCP-sub_seq >= TCP-end_seq -+ * Because if it's not a data-fin, TCP-end_seq > TCP-seq -+ * -+ * 3. It's a data_fin and skb->len == 0 and -+ * MPTCP-sub_seq > TCP-end_seq -+ * -+ * 4. It's not a data_fin and TCP-end_seq > TCP-seq and -+ * MPTCP-sub_seq + MPTCP-data_len <= TCP-seq -+ */ -+ -+ /* subflow-fin is not part of the mapping - ignore it here ! */ -+ tcp_end_seq = tcb->end_seq; -+ if (tcb->tcp_flags & TCPHDR_FIN) -+ tcp_end_seq--; -+ if ((!before(sub_seq, tcb->end_seq) && after(tcp_end_seq, tcb->seq)) || -+ (mptcp_is_data_fin(skb) && skb->len == 0 && after(sub_seq, tcb->end_seq)) || -+ (!after(sub_seq + data_len, tcb->seq) && after(tcp_end_seq, tcb->seq))) { -+ /* Subflow-sequences of packet is different from what is in the -+ * packet's dss-mapping. The peer is misbehaving - reset -+ */ -+ pr_debug("%s Packet's mapping does not map to the DSS sub_seq %u end_seq %u, tcp_end_seq %u seq %u dfin %u len %u data_len %u copied_seq %u\n", -+ __func__, sub_seq, tcb->end_seq, tcp_end_seq, -+ tcb->seq, mptcp_is_data_fin(skb), -+ skb->len, data_len, tp->copied_seq); -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DSSTCPMISMATCH); -+ mptcp_send_reset(sk); -+ return 1; -+ } -+ -+ /* Does the DSS had 64-bit seqnum's ? */ -+ if (!(tcb->mptcp_flags & MPTCPHDR_SEQ64_SET)) { -+ /* Wrapped around? */ -+ if (unlikely(after(data_seq, meta_tp->rcv_nxt) && data_seq < meta_tp->rcv_nxt)) { -+ tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, !mpcb->rcv_hiseq_index, data_seq); -+ } else { -+ /* Else, access the default high-order bits */ -+ tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index, data_seq); -+ } -+ } else { -+ tp->mptcp->map_data_seq = mptcp_get_data_seq_64(mpcb, (tcb->mptcp_flags & MPTCPHDR_SEQ64_INDEX) ? 1 : 0, data_seq); -+ -+ if (unlikely(tcb->mptcp_flags & MPTCPHDR_SEQ64_OFO)) { -+ /* We make sure that the data_seq is invalid. -+ * It will be dropped later. -+ */ -+ tp->mptcp->map_data_seq += 0xFFFFFFFF; -+ tp->mptcp->map_data_seq += 0xFFFFFFFF; -+ } -+ } -+ -+ if (set_infinite_rcv) -+ mpcb->infinite_rcv_seq = tp->mptcp->map_data_seq; -+ -+ tp->mptcp->map_data_len = data_len; -+ tp->mptcp->map_subseq = sub_seq; -+ tp->mptcp->map_data_fin = mptcp_is_data_fin(skb) ? 1 : 0; -+ tp->mptcp->mapping_present = 1; -+ -+ return 0; -+} -+ -+/* Similar to tcp_sequence(...) */ -+static inline bool mptcp_sequence(const struct tcp_sock *meta_tp, -+ u64 data_seq, u64 end_data_seq) -+{ -+ const struct mptcp_cb *mpcb = meta_tp->mpcb; -+ u64 rcv_wup64; -+ -+ /* Wrap-around? */ -+ if (meta_tp->rcv_wup > meta_tp->rcv_nxt) { -+ rcv_wup64 = ((u64)(mpcb->rcv_high_order[mpcb->rcv_hiseq_index] - 1) << 32) | -+ meta_tp->rcv_wup; -+ } else { -+ rcv_wup64 = mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index, -+ meta_tp->rcv_wup); -+ } -+ -+ return !before64(end_data_seq, rcv_wup64) && -+ !after64(data_seq, mptcp_get_rcv_nxt_64(meta_tp) + tcp_receive_window_now(meta_tp)); -+} -+ -+/* @return: 0 everything is fine. Just continue processing -+ * -1 this packet was broken - continue with the next one. -+ */ -+static int mptcp_validate_mapping(struct sock *sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct sk_buff *tmp, *tmp1; -+ u32 tcp_end_seq; -+ -+ if (!tp->mptcp->mapping_present) -+ return 0; -+ -+ /* either, the new skb gave us the mapping and the first segment -+ * in the sub-rcv-queue has to be trimmed ... -+ */ -+ tmp = skb_peek(&sk->sk_receive_queue); -+ if (before(TCP_SKB_CB(tmp)->seq, tp->mptcp->map_subseq) && -+ after(TCP_SKB_CB(tmp)->end_seq, tp->mptcp->map_subseq)) { -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DSSTRIMHEAD); -+ mptcp_skb_trim_head(tmp, sk, tp->mptcp->map_subseq); -+ } -+ -+ /* ... or the new skb (tail) has to be split at the end. */ -+ tcp_end_seq = TCP_SKB_CB(skb)->end_seq; -+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) -+ tcp_end_seq--; -+ if (after(tcp_end_seq, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) { -+ u32 seq = tp->mptcp->map_subseq + tp->mptcp->map_data_len; -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DSSSPLITTAIL); -+ if (mptcp_skb_split_tail(skb, sk, seq)) { /* Allocation failed */ -+ /* TODO : maybe handle this here better. -+ * We now just force meta-retransmission. -+ */ -+ tp->copied_seq = TCP_SKB_CB(skb)->end_seq; -+ __skb_unlink(skb, &sk->sk_receive_queue); -+ __kfree_skb(skb); -+ return -1; -+ } -+ } -+ -+ /* Now, remove old sk_buff's from the receive-queue. -+ * This may happen if the mapping has been lost for these segments and -+ * the next mapping has already been received. -+ */ -+ if (before(TCP_SKB_CB(skb_peek(&sk->sk_receive_queue))->seq, tp->mptcp->map_subseq)) { -+ skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) { -+ if (!before(TCP_SKB_CB(tmp1)->seq, tp->mptcp->map_subseq)) -+ break; -+ -+ tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; -+ __skb_unlink(tmp1, &sk->sk_receive_queue); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PURGEOLD); -+ /* Impossible that we could free skb here, because his -+ * mapping is known to be valid from previous checks -+ */ -+ __kfree_skb(tmp1); -+ } -+ } -+ -+ return 0; -+} -+ -+/* @return: 0 everything is fine. Just continue processing -+ * 1 subflow is broken stop everything -+ * -1 this mapping has been put in the meta-receive-queue -+ * -2 this mapping has been eaten by the application -+ */ -+static int mptcp_queue_skb(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp); -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ struct sk_buff *tmp, *tmp1; -+ u64 rcv_nxt64 = mptcp_get_rcv_nxt_64(meta_tp); -+ u32 old_copied_seq = tp->copied_seq; -+ bool data_queued = false; -+ -+ /* Have we not yet received the full mapping? */ -+ if (!tp->mptcp->mapping_present || -+ before(tp->rcv_nxt, tp->mptcp->map_subseq + tp->mptcp->map_data_len)) -+ return 0; -+ -+ /* Is this an overlapping mapping? rcv_nxt >= end_data_seq -+ * OR -+ * This mapping is out of window -+ */ -+ if (!before64(rcv_nxt64, tp->mptcp->map_data_seq + tp->mptcp->map_data_len + tp->mptcp->map_data_fin) || -+ !mptcp_sequence(meta_tp, tp->mptcp->map_data_seq, -+ tp->mptcp->map_data_seq + tp->mptcp->map_data_len + tp->mptcp->map_data_fin)) { -+ skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) { -+ __skb_unlink(tmp1, &sk->sk_receive_queue); -+ tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; -+ __kfree_skb(tmp1); -+ -+ if (!skb_queue_empty(&sk->sk_receive_queue) && -+ !before(TCP_SKB_CB(tmp)->seq, -+ tp->mptcp->map_subseq + tp->mptcp->map_data_len)) -+ break; -+ } -+ -+ mptcp_reset_mapping(tp, old_copied_seq); -+ -+ return -1; -+ } -+ -+ /* Record it, because we want to send our data_fin on the same path */ -+ if (tp->mptcp->map_data_fin) { -+ mpcb->dfin_path_index = tp->mptcp->path_index; -+ mpcb->dfin_combined = !!(sk->sk_shutdown & RCV_SHUTDOWN); -+ } -+ -+ /* Verify the checksum */ -+ if (mpcb->dss_csum && !mpcb->infinite_mapping_rcv) { -+ int ret = mptcp_verif_dss_csum(sk); -+ -+ if (ret <= 0) { -+ mptcp_reset_mapping(tp, old_copied_seq); -+ return 1; -+ } -+ } -+ -+ if (before64(rcv_nxt64, tp->mptcp->map_data_seq)) { -+ /* Seg's have to go to the meta-ofo-queue */ -+ skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) { -+ tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; -+ mptcp_prepare_skb(tmp1, sk); -+ __skb_unlink(tmp1, &sk->sk_receive_queue); -+ /* MUST be done here, because fragstolen may be true later. -+ * Then, kfree_skb_partial will not account the memory. -+ */ -+ skb_orphan(tmp1); -+ -+ if (!mpcb->in_time_wait) /* In time-wait, do not receive data */ -+ tcp_data_queue_ofo(meta_sk, tmp1); -+ else -+ __kfree_skb(tmp1); -+ -+ if (!skb_queue_empty(&sk->sk_receive_queue) && -+ !before(TCP_SKB_CB(tmp)->seq, -+ tp->mptcp->map_subseq + tp->mptcp->map_data_len)) -+ break; -+ } -+ -+ /* Quick ACK if more 3/4 of the receive window is filled */ -+ if (after64(tp->mptcp->map_data_seq, -+ rcv_nxt64 + 3 * (tcp_receive_window_now(meta_tp) >> 2))) -+ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); -+ -+ } else { -+ /* Ready for the meta-rcv-queue */ -+ skb_queue_walk_safe(&sk->sk_receive_queue, tmp1, tmp) { -+ int eaten = 0; -+ bool fragstolen = false; -+ u32 old_rcv_nxt = meta_tp->rcv_nxt; -+ -+ tp->copied_seq = TCP_SKB_CB(tmp1)->end_seq; -+ mptcp_prepare_skb(tmp1, sk); -+ __skb_unlink(tmp1, &sk->sk_receive_queue); -+ /* MUST be done here, because fragstolen may be true. -+ * Then, kfree_skb_partial will not account the memory. -+ */ -+ skb_orphan(tmp1); -+ -+ /* This segment has already been received */ -+ if (!after(TCP_SKB_CB(tmp1)->end_seq, meta_tp->rcv_nxt)) { -+ __kfree_skb(tmp1); -+ goto next; -+ } -+ -+ if (mpcb->in_time_wait) /* In time-wait, do not receive data */ -+ eaten = 1; -+ -+ if (!eaten) -+ eaten = tcp_queue_rcv(meta_sk, tmp1, &fragstolen); -+ -+ meta_tp->rcv_nxt = TCP_SKB_CB(tmp1)->end_seq; -+ -+ if (TCP_SKB_CB(tmp1)->tcp_flags & TCPHDR_FIN) -+ mptcp_fin(meta_sk); -+ -+ /* Check if this fills a gap in the ofo queue */ -+ if (!RB_EMPTY_ROOT(&meta_tp->out_of_order_queue)) -+ tcp_ofo_queue(meta_sk); -+ -+ mptcp_check_rcvseq_wrap(meta_tp, old_rcv_nxt); -+ -+ if (eaten) -+ kfree_skb_partial(tmp1, fragstolen); -+ -+ data_queued = true; -+next: -+ if (!skb_queue_empty(&sk->sk_receive_queue) && -+ !before(TCP_SKB_CB(tmp)->seq, -+ tp->mptcp->map_subseq + tp->mptcp->map_data_len)) -+ break; -+ } -+ } -+ -+ inet_csk(meta_sk)->icsk_ack.lrcvtime = tcp_jiffies32; -+ mptcp_reset_mapping(tp, old_copied_seq); -+ -+ return data_queued ? -1 : -2; -+} -+ -+void mptcp_data_ready(struct sock *sk) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct sk_buff *skb, *tmp; -+ int queued = 0; -+ -+ tcp_mstamp_refresh(tcp_sk(meta_sk)); -+ -+ /* restart before the check, because mptcp_fin might have changed the -+ * state. -+ */ -+restart: -+ /* If the meta cannot receive data, there is no point in pushing data. -+ * If we are in time-wait, we may still be waiting for the final FIN. -+ * So, we should proceed with the processing. -+ */ -+ if (!mptcp_sk_can_recv(meta_sk) && !tcp_sk(sk)->mpcb->in_time_wait) { -+ skb_queue_purge(&sk->sk_receive_queue); -+ tcp_sk(sk)->copied_seq = tcp_sk(sk)->rcv_nxt; -+ goto exit; -+ } -+ -+ /* Iterate over all segments, detect their mapping (if we don't have -+ * one yet), validate them and push everything one level higher. -+ */ -+ skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { -+ int ret; -+ /* Pre-validation - e.g., early fallback */ -+ ret = mptcp_prevalidate_skb(sk, skb); -+ if (ret < 0) -+ goto restart; -+ else if (ret > 0) -+ break; -+ -+ /* Set the current mapping */ -+ ret = mptcp_detect_mapping(sk, skb); -+ if (ret < 0) -+ goto restart; -+ else if (ret > 0) -+ break; -+ -+ /* Validation */ -+ if (mptcp_validate_mapping(sk, skb) < 0) -+ goto restart; -+ -+ /* Push a level higher */ -+ ret = mptcp_queue_skb(sk); -+ if (ret < 0) { -+ if (ret == -1) -+ queued = ret; -+ goto restart; -+ } else if (ret == 0) { -+ continue; -+ } else { /* ret == 1 */ -+ break; -+ } -+ } -+ -+exit: -+ if (tcp_sk(sk)->close_it && sk->sk_state == TCP_FIN_WAIT2) { -+ tcp_send_ack(sk); -+ tcp_sk(sk)->ops->time_wait(sk, TCP_TIME_WAIT, 0); -+ } -+ -+ if (queued == -1 && !sock_flag(meta_sk, SOCK_DEAD)) -+ meta_sk->sk_data_ready(meta_sk); -+} -+ -+struct mp_join *mptcp_find_join(const struct sk_buff *skb) -+{ -+ const struct tcphdr *th = tcp_hdr(skb); -+ unsigned char *ptr; -+ int length = (th->doff * 4) - sizeof(struct tcphdr); -+ -+ /* Jump through the options to check whether JOIN is there */ -+ ptr = (unsigned char *)(th + 1); -+ while (length > 0) { -+ int opcode = *ptr++; -+ int opsize; -+ -+ switch (opcode) { -+ case TCPOPT_EOL: -+ return NULL; -+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ -+ length--; -+ continue; -+ default: -+ opsize = *ptr++; -+ if (opsize < 2) /* "silly options" */ -+ return NULL; -+ if (opsize > length) -+ return NULL; /* don't parse partial options */ -+ if (opcode == TCPOPT_MPTCP && -+ ((struct mptcp_option *)(ptr - 2))->sub == MPTCP_SUB_JOIN) { -+ return (struct mp_join *)(ptr - 2); -+ } -+ ptr += opsize - 2; -+ length -= opsize; -+ } -+ } -+ return NULL; -+} -+ -+int mptcp_lookup_join(struct sk_buff *skb, struct inet_timewait_sock *tw) -+{ -+ struct sock *meta_sk; -+ u32 token; -+ bool meta_v4; -+ struct mp_join *join_opt = mptcp_find_join(skb); -+ if (!join_opt) -+ return 0; -+ -+ /* MPTCP structures were not initialized, so return error */ -+ if (mptcp_init_failed) -+ return -1; -+ -+ token = join_opt->u.syn.token; -+ meta_sk = mptcp_hash_find(dev_net(skb_dst(skb)->dev), token); -+ if (!meta_sk) { -+ MPTCP_INC_STATS(dev_net(skb_dst(skb)->dev), MPTCP_MIB_JOINNOTOKEN); -+ mptcp_debug("%s:mpcb not found:%x\n", __func__, token); -+ return -1; -+ } -+ -+ meta_v4 = meta_sk->sk_family == AF_INET; -+ if (meta_v4) { -+ if (skb->protocol == htons(ETH_P_IPV6)) { -+ mptcp_debug("SYN+MP_JOIN with IPV6 address on pure IPV4 meta\n"); -+ sock_put(meta_sk); /* Taken by mptcp_hash_find */ -+ return -1; -+ } -+ } else if (skb->protocol == htons(ETH_P_IP) && meta_sk->sk_ipv6only) { -+ mptcp_debug("SYN+MP_JOIN with IPV4 address on IPV6_V6ONLY meta\n"); -+ sock_put(meta_sk); /* Taken by mptcp_hash_find */ -+ return -1; -+ } -+ -+ /* Coming from time-wait-sock processing in tcp_v4_rcv. -+ * We have to deschedule it before continuing, because otherwise -+ * mptcp_v4_do_rcv will hit again on it inside tcp_v4_hnd_req. -+ */ -+ if (tw) -+ inet_twsk_deschedule_put(tw); -+ -+ /* OK, this is a new syn/join, let's create a new open request and -+ * send syn+ack -+ */ -+ if (skb->protocol == htons(ETH_P_IP)) { -+ tcp_v4_do_rcv(meta_sk, skb); -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { -+ tcp_v6_do_rcv(meta_sk, skb); -+#endif /* CONFIG_IPV6 */ -+ } -+ sock_put(meta_sk); /* Taken by mptcp_hash_find */ -+ return 1; -+} -+ -+int mptcp_do_join_short(struct sk_buff *skb, -+ const struct mptcp_options_received *mopt, -+ struct net *net) -+{ -+ struct sock *meta_sk; -+ u32 token; -+ bool meta_v4; -+ -+ token = mopt->mptcp_rem_token; -+ meta_sk = mptcp_hash_find(net, token); -+ if (!meta_sk) { -+ MPTCP_INC_STATS(dev_net(skb_dst(skb)->dev), MPTCP_MIB_JOINNOTOKEN); -+ mptcp_debug("%s:mpcb not found:%x\n", __func__, token); -+ return -1; -+ } -+ -+ meta_v4 = meta_sk->sk_family == AF_INET; -+ if (meta_v4) { -+ if (skb->protocol == htons(ETH_P_IPV6)) { -+ mptcp_debug("SYN+MP_JOIN with IPV6 address on pure IPV4 meta\n"); -+ sock_put(meta_sk); /* Taken by mptcp_hash_find */ -+ return -1; -+ } -+ } else if (skb->protocol == htons(ETH_P_IP) && meta_sk->sk_ipv6only) { -+ mptcp_debug("SYN+MP_JOIN with IPV4 address on IPV6_V6ONLY meta\n"); -+ sock_put(meta_sk); /* Taken by mptcp_hash_find */ -+ return -1; -+ } -+ -+ /* OK, this is a new syn/join, let's create a new open request and -+ * send syn+ack -+ */ -+ -+ /* mptcp_v4_do_rcv tries to free the skb - we prevent this, as -+ * the skb will finally be freed by tcp_v4_do_rcv (where we are -+ * coming from) -+ */ -+ skb_get(skb); -+ if (skb->protocol == htons(ETH_P_IP)) { -+ tcp_v4_do_rcv(meta_sk, skb); -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { /* IPv6 */ -+ tcp_v6_do_rcv(meta_sk, skb); -+#endif /* CONFIG_IPV6 */ -+ } -+ -+ sock_put(meta_sk); /* Taken by mptcp_hash_find */ -+ return 0; -+} -+ -+/** -+ * Equivalent of tcp_fin() for MPTCP -+ * Can be called only when the FIN is validly part -+ * of the data seqnum space. Not before when we get holes. -+ */ -+void mptcp_fin(struct sock *meta_sk) -+{ -+ struct sock *sk = NULL; -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct mptcp_tcp_sock *mptcp; -+ unsigned char state; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(sk_it)->mptcp->path_index == mpcb->dfin_path_index) { -+ sk = sk_it; -+ break; -+ } -+ } -+ -+ if (!sk || sk->sk_state == TCP_CLOSE) -+ sk = mptcp_select_ack_sock(meta_sk); -+ -+ inet_csk_schedule_ack(sk); -+ -+ if (!mpcb->in_time_wait) { -+ meta_sk->sk_shutdown |= RCV_SHUTDOWN; -+ sock_set_flag(meta_sk, SOCK_DONE); -+ state = meta_sk->sk_state; -+ } else { -+ state = mpcb->mptw_state; -+ } -+ -+ switch (state) { -+ case TCP_SYN_RECV: -+ case TCP_ESTABLISHED: -+ /* Move to CLOSE_WAIT */ -+ tcp_set_state(meta_sk, TCP_CLOSE_WAIT); -+ inet_csk(sk)->icsk_ack.pingpong = 1; -+ break; -+ -+ case TCP_CLOSE_WAIT: -+ case TCP_CLOSING: -+ /* Received a retransmission of the FIN, do -+ * nothing. -+ */ -+ break; -+ case TCP_LAST_ACK: -+ /* RFC793: Remain in the LAST-ACK state. */ -+ break; -+ -+ case TCP_FIN_WAIT1: -+ /* This case occurs when a simultaneous close -+ * happens, we must ack the received FIN and -+ * enter the CLOSING state. -+ */ -+ tcp_send_ack(sk); -+ tcp_set_state(meta_sk, TCP_CLOSING); -+ break; -+ case TCP_FIN_WAIT2: -+ /* Received a FIN -- send ACK and enter TIME_WAIT. */ -+ tcp_send_ack(sk); -+ meta_tp->ops->time_wait(meta_sk, TCP_TIME_WAIT, 0); -+ break; -+ default: -+ /* Only TCP_LISTEN and TCP_CLOSE are left, in these -+ * cases we should never reach this piece of code. -+ */ -+ pr_err("%s: Impossible, meta_sk->sk_state=%d\n", __func__, -+ meta_sk->sk_state); -+ break; -+ } -+ -+ /* It _is_ possible, that we have something out-of-order _after_ FIN. -+ * Probably, we should reset in this case. For now drop them. -+ */ -+ skb_rbtree_purge(&meta_tp->out_of_order_queue); -+ sk_mem_reclaim(meta_sk); -+ -+ if (!sock_flag(meta_sk, SOCK_DEAD)) { -+ meta_sk->sk_state_change(meta_sk); -+ -+ /* Do not send POLL_HUP for half duplex close. */ -+ if (meta_sk->sk_shutdown == SHUTDOWN_MASK || -+ meta_sk->sk_state == TCP_CLOSE) -+ sk_wake_async(meta_sk, SOCK_WAKE_WAITD, POLL_HUP); -+ else -+ sk_wake_async(meta_sk, SOCK_WAKE_WAITD, POLL_IN); -+ } -+ -+ return; -+} -+ -+/* Similar to tcp_xmit_retransmit_queue */ -+static void mptcp_xmit_retransmit_queue(struct sock *meta_sk) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct sk_buff *skb, *rtx_head; -+ -+ if (!meta_tp->packets_out) -+ return; -+ -+ skb = rtx_head = tcp_rtx_queue_head(meta_sk); -+ skb_rbtree_walk_from(skb) { -+ if (mptcp_retransmit_skb(meta_sk, skb)) -+ return; -+ -+ if (skb == rtx_head) -+ inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS, -+ inet_csk(meta_sk)->icsk_rto, -+ TCP_RTO_MAX); -+ } -+} -+ -+static void mptcp_snd_una_update(struct tcp_sock *meta_tp, u32 data_ack) -+{ -+ u32 delta = data_ack - meta_tp->snd_una; -+ -+ sock_owned_by_me((struct sock *)meta_tp); -+ meta_tp->bytes_acked += delta; -+ meta_tp->snd_una = data_ack; -+} -+ -+static void mptcp_stop_subflow_chronos(struct sock *meta_sk, -+ const enum tcp_chrono type) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ tcp_chrono_stop(sk_it, type); -+ } -+} -+ -+/* Handle the DATA_ACK */ -+static bool mptcp_process_data_ack(struct sock *sk, const struct sk_buff *skb) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk), *tp = tcp_sk(sk); -+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -+ u32 prior_snd_una = meta_tp->snd_una; -+ int prior_packets; -+ u32 nwin, data_ack, data_seq; -+ u16 data_len = 0; -+ -+ /* A valid packet came in - subflow is operational again */ -+ tp->pf = 0; -+ -+ /* Even if there is no data-ack, we stop retransmitting. -+ * Except if this is a SYN/ACK. Then it is just a retransmission -+ */ -+ if (tp->mptcp->pre_established && !tcp_hdr(skb)->syn) { -+ tp->mptcp->pre_established = 0; -+ sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer); -+ -+ if (meta_tp->mpcb->pm_ops->established_subflow) -+ meta_tp->mpcb->pm_ops->established_subflow(sk); -+ } -+ -+ /* If we are in infinite mapping mode, rx_opt.data_ack has been -+ * set by mptcp_clean_rtx_infinite. -+ */ -+ if (!(tcb->mptcp_flags & MPTCPHDR_ACK) && !tp->mpcb->infinite_mapping_snd) -+ return false; -+ -+ if (unlikely(!tp->mptcp->fully_established) && -+ tp->mptcp->snt_isn + 1 != TCP_SKB_CB(skb)->ack_seq) -+ /* As soon as a subflow-data-ack (not acking syn, thus snt_isn + 1) -+ * includes a data-ack, we are fully established -+ */ -+ mptcp_become_fully_estab(sk); -+ -+ /* After we did the subflow-only processing (stopping timer and marking -+ * subflow as established), check if we can proceed with MPTCP-level -+ * processing. -+ */ -+ if (meta_sk->sk_state == TCP_CLOSE) -+ return false; -+ -+ /* Get the data_seq */ -+ if (mptcp_is_data_seq(skb)) { -+ data_seq = tp->mptcp->rx_opt.data_seq; -+ data_len = tp->mptcp->rx_opt.data_len; -+ } else { -+ data_seq = meta_tp->snd_wl1; -+ } -+ -+ data_ack = tp->mptcp->rx_opt.data_ack; -+ -+ /* If the ack is older than previous acks -+ * then we can probably ignore it. -+ */ -+ if (before(data_ack, prior_snd_una)) -+ goto exit; -+ -+ /* If the ack includes data we haven't sent yet, discard -+ * this segment (RFC793 Section 3.9). -+ */ -+ if (after(data_ack, meta_tp->snd_nxt)) -+ goto exit; -+ -+ /* First valid DATA_ACK, we can stop sending the special MP_CAPABLE */ -+ tp->mpcb->send_mptcpv1_mpcapable = 0; -+ -+ /*** Now, update the window - inspired by tcp_ack_update_window ***/ -+ nwin = ntohs(tcp_hdr(skb)->window); -+ -+ if (likely(!tcp_hdr(skb)->syn)) -+ nwin <<= tp->rx_opt.snd_wscale; -+ -+ if (tcp_may_update_window(meta_tp, data_ack, data_seq, nwin)) { -+ tcp_update_wl(meta_tp, data_seq); -+ -+ /* Draft v09, Section 3.3.5: -+ * [...] It should only update its local receive window values -+ * when the largest sequence number allowed (i.e. DATA_ACK + -+ * receive window) increases. [...] -+ */ -+ if (meta_tp->snd_wnd != nwin && -+ !before(data_ack + nwin, tcp_wnd_end(meta_tp))) { -+ meta_tp->snd_wnd = nwin; -+ -+ if (nwin > meta_tp->max_window) -+ meta_tp->max_window = nwin; -+ } -+ } -+ /*** Done, update the window ***/ -+ -+ /* We passed data and got it acked, remove any soft error -+ * log. Something worked... -+ */ -+ sk->sk_err_soft = 0; -+ inet_csk(meta_sk)->icsk_probes_out = 0; -+ meta_tp->rcv_tstamp = tcp_jiffies32; -+ prior_packets = meta_tp->packets_out; -+ if (!prior_packets) -+ goto no_queue; -+ -+ mptcp_snd_una_update(meta_tp, data_ack); -+ -+ mptcp_clean_rtx_queue(meta_sk, prior_snd_una); -+ -+ /* We are in loss-state, and something got acked, retransmit the whole -+ * queue now! -+ */ -+ if (inet_csk(meta_sk)->icsk_ca_state == TCP_CA_Loss && -+ after(data_ack, prior_snd_una)) { -+ mptcp_xmit_retransmit_queue(meta_sk); -+ inet_csk(meta_sk)->icsk_ca_state = TCP_CA_Open; -+ } -+ -+ /* Simplified version of tcp_new_space, because the snd-buffer -+ * is handled by all the subflows. -+ */ -+ if (sock_flag(meta_sk, SOCK_QUEUE_SHRUNK)) { -+ sock_reset_flag(meta_sk, SOCK_QUEUE_SHRUNK); -+ if (meta_sk->sk_socket && -+ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags)) -+ meta_sk->sk_write_space(meta_sk); -+ -+ if (meta_sk->sk_socket && -+ !test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags)) { -+ tcp_chrono_stop(meta_sk, TCP_CHRONO_SNDBUF_LIMITED); -+ mptcp_stop_subflow_chronos(meta_sk, -+ TCP_CHRONO_SNDBUF_LIMITED); -+ } -+ } -+ -+ if (meta_sk->sk_state != TCP_ESTABLISHED) { -+ int ret = mptcp_rcv_state_process(meta_sk, sk, skb, data_seq, data_len); -+ -+ if (ret < 0) -+ return true; -+ else if (ret > 0) -+ return false; -+ } -+ -+exit: -+ mptcp_push_pending_frames(meta_sk); -+ -+ return false; -+ -+no_queue: -+ if (tcp_send_head(meta_sk)) -+ tcp_ack_probe(meta_sk); -+ -+ mptcp_push_pending_frames(meta_sk); -+ -+ return false; -+} -+ -+void mptcp_clean_rtx_infinite(const struct sk_buff *skb, struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(mptcp_meta_sk(sk)); -+ -+ if (!tp->mpcb->infinite_mapping_snd) -+ return; -+ -+ /* The difference between both write_seq's represents the offset between -+ * data-sequence and subflow-sequence. As we are infinite, this must -+ * match. -+ * -+ * Thus, from this difference we can infer the meta snd_una. -+ */ -+ tp->mptcp->rx_opt.data_ack = meta_tp->snd_nxt - tp->snd_nxt + -+ tp->snd_una; -+ -+ mptcp_process_data_ack(sk, skb); -+} -+ -+/**** static functions used by mptcp_parse_options */ -+ -+static void mptcp_send_reset_rem_id(const struct mptcp_cb *mpcb, u8 rem_id) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(sk_it)->mptcp->rem_id == rem_id) { -+ mptcp_reinject_data(sk_it, 0); -+ mptcp_send_reset(sk_it); -+ } -+ } -+} -+ -+static inline bool is_valid_addropt_opsize(u8 mptcp_ver, -+ struct mp_add_addr *mpadd, -+ int opsize) -+{ -+#if IS_ENABLED(CONFIG_IPV6) -+ if (mptcp_ver < MPTCP_VERSION_1 && mpadd->u_bit.v0.ipver == 6) { -+ return opsize == MPTCP_SUB_LEN_ADD_ADDR6 || -+ opsize == MPTCP_SUB_LEN_ADD_ADDR6 + 2; -+ } -+ if (mptcp_ver >= MPTCP_VERSION_1) -+ return opsize == MPTCP_SUB_LEN_ADD_ADDR6_VER1 || -+ opsize == MPTCP_SUB_LEN_ADD_ADDR6_VER1 + 2 || -+ opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 || -+ opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2; -+#endif -+ if (mptcp_ver < MPTCP_VERSION_1 && mpadd->u_bit.v0.ipver == 4) { -+ return opsize == MPTCP_SUB_LEN_ADD_ADDR4 || -+ opsize == MPTCP_SUB_LEN_ADD_ADDR4 + 2; -+ } -+ if (mptcp_ver >= MPTCP_VERSION_1) { -+ return opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 || -+ opsize == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2; -+ } -+ return false; -+} -+ -+void mptcp_parse_options(const uint8_t *ptr, int opsize, -+ struct mptcp_options_received *mopt, -+ const struct sk_buff *skb, -+ struct tcp_sock *tp) -+{ -+ const struct mptcp_option *mp_opt = (struct mptcp_option *)ptr; -+ const struct tcphdr *th = tcp_hdr(skb); -+ -+ /* If the socket is mp-capable we would have a mopt. */ -+ if (!mopt) -+ return; -+ -+ switch (mp_opt->sub) { -+ case MPTCP_SUB_CAPABLE: -+ { -+ const struct mp_capable *mpcapable = (struct mp_capable *)ptr; -+ -+ if (mpcapable->ver == MPTCP_VERSION_0 && -+ ((th->syn && opsize != MPTCP_SUB_LEN_CAPABLE_SYN) || -+ (!th->syn && th->ack && opsize != MPTCP_SUB_LEN_CAPABLE_ACK))) { -+ mptcp_debug("%s: mp_capable v0: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ if (mpcapable->ver == MPTCP_VERSION_1 && -+ ((th->syn && !th->ack && opsize != MPTCPV1_SUB_LEN_CAPABLE_SYN) || -+ (th->syn && th->ack && opsize != MPTCPV1_SUB_LEN_CAPABLE_SYNACK) || -+ (!th->syn && th->ack && opsize != MPTCPV1_SUB_LEN_CAPABLE_ACK && -+ opsize != MPTCPV1_SUB_LEN_CAPABLE_DATA && -+ opsize != MPTCPV1_SUB_LEN_CAPABLE_DATA_CSUM))) { -+ mptcp_debug("%s: mp_capable v1: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ /* MPTCP-RFC 6824: -+ * "If receiving a message with the 'B' flag set to 1, and this -+ * is not understood, then this SYN MUST be silently ignored; -+ */ -+ if (mpcapable->b) { -+ mopt->drop_me = 1; -+ break; -+ } -+ -+ /* MPTCP-RFC 6824: -+ * "An implementation that only supports this method MUST set -+ * bit "H" to 1, and bits "C" through "G" to 0." -+ */ -+ if (!mpcapable->h) -+ break; -+ -+ mopt->saw_mpc = 1; -+ mopt->dss_csum = sysctl_mptcp_checksum || mpcapable->a; -+ -+ if (mpcapable->ver == MPTCP_VERSION_0) { -+ if (opsize == MPTCP_SUB_LEN_CAPABLE_SYN) -+ mopt->mptcp_sender_key = mpcapable->sender_key; -+ -+ if (opsize == MPTCP_SUB_LEN_CAPABLE_ACK) { -+ mopt->mptcp_sender_key = mpcapable->sender_key; -+ mopt->mptcp_receiver_key = mpcapable->receiver_key; -+ } -+ } else if (mpcapable->ver == MPTCP_VERSION_1) { -+ if (opsize == MPTCPV1_SUB_LEN_CAPABLE_SYNACK) -+ mopt->mptcp_sender_key = mpcapable->sender_key; -+ -+ if (opsize == MPTCPV1_SUB_LEN_CAPABLE_ACK) { -+ mopt->mptcp_sender_key = mpcapable->sender_key; -+ mopt->mptcp_receiver_key = mpcapable->receiver_key; -+ } -+ -+ if (opsize == MPTCPV1_SUB_LEN_CAPABLE_DATA || -+ opsize == MPTCPV1_SUB_LEN_CAPABLE_DATA_CSUM) { -+ mopt->mptcp_sender_key = mpcapable->sender_key; -+ mopt->mptcp_receiver_key = mpcapable->receiver_key; -+ -+ TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_MPC_DATA; -+ -+ ptr += sizeof(struct mp_capable); -+ TCP_SKB_CB(skb)->dss_off = (ptr - skb_transport_header(skb)); -+ -+ /* Is a check-sum present? */ -+ if (opsize == MPTCPV1_SUB_LEN_CAPABLE_DATA_CSUM) -+ TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_DSS_CSUM; -+ } -+ } -+ -+ mopt->mptcp_ver = mpcapable->ver; -+ break; -+ } -+ case MPTCP_SUB_JOIN: -+ { -+ const struct mp_join *mpjoin = (struct mp_join *)ptr; -+ -+ if (opsize != MPTCP_SUB_LEN_JOIN_SYN && -+ opsize != MPTCP_SUB_LEN_JOIN_SYNACK && -+ opsize != MPTCP_SUB_LEN_JOIN_ACK) { -+ mptcp_debug("%s: mp_join: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ /* saw_mpc must be set, because in tcp_check_req we assume that -+ * it is set to support falling back to reg. TCP if a rexmitted -+ * SYN has no MP_CAPABLE or MP_JOIN -+ */ -+ switch (opsize) { -+ case MPTCP_SUB_LEN_JOIN_SYN: -+ mopt->is_mp_join = 1; -+ mopt->saw_mpc = 1; -+ mopt->low_prio = mpjoin->b; -+ mopt->rem_id = mpjoin->addr_id; -+ mopt->mptcp_rem_token = mpjoin->u.syn.token; -+ mopt->mptcp_recv_nonce = mpjoin->u.syn.nonce; -+ break; -+ case MPTCP_SUB_LEN_JOIN_SYNACK: -+ mopt->saw_mpc = 1; -+ mopt->low_prio = mpjoin->b; -+ mopt->rem_id = mpjoin->addr_id; -+ mopt->mptcp_recv_tmac = mpjoin->u.synack.mac; -+ mopt->mptcp_recv_nonce = mpjoin->u.synack.nonce; -+ break; -+ case MPTCP_SUB_LEN_JOIN_ACK: -+ mopt->saw_mpc = 1; -+ mopt->join_ack = 1; -+ memcpy(mopt->mptcp_recv_mac, mpjoin->u.ack.mac, 20); -+ break; -+ } -+ break; -+ } -+ case MPTCP_SUB_DSS: -+ { -+ const struct mp_dss *mdss = (struct mp_dss *)ptr; -+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -+ -+ /* We check opsize for the csum and non-csum case. We do this, -+ * because the draft says that the csum SHOULD be ignored if -+ * it has not been negotiated in the MP_CAPABLE but still is -+ * present in the data. -+ * -+ * It will get ignored later in mptcp_queue_skb. -+ */ -+ if (opsize != mptcp_sub_len_dss(mdss, 0) && -+ opsize != mptcp_sub_len_dss(mdss, 1)) { -+ mptcp_debug("%s: mp_dss: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ ptr += 4; -+ -+ if (mdss->A) { -+ tcb->mptcp_flags |= MPTCPHDR_ACK; -+ -+ if (mdss->a) { -+ mopt->data_ack = (u32) get_unaligned_be64(ptr); -+ ptr += MPTCP_SUB_LEN_ACK_64; -+ } else { -+ mopt->data_ack = get_unaligned_be32(ptr); -+ ptr += MPTCP_SUB_LEN_ACK; -+ } -+ } -+ -+ tcb->dss_off = (ptr - skb_transport_header(skb)); -+ -+ if (mdss->M) { -+ if (mdss->m) { -+ u64 data_seq64 = get_unaligned_be64(ptr); -+ -+ tcb->mptcp_flags |= MPTCPHDR_SEQ64_SET; -+ mopt->data_seq = (u32) data_seq64; -+ -+ ptr += 12; /* 64-bit dseq + subseq */ -+ } else { -+ mopt->data_seq = get_unaligned_be32(ptr); -+ ptr += 8; /* 32-bit dseq + subseq */ -+ } -+ mopt->data_len = get_unaligned_be16(ptr); -+ -+ tcb->mptcp_flags |= MPTCPHDR_SEQ; -+ -+ /* Is a check-sum present? */ -+ if (opsize == mptcp_sub_len_dss(mdss, 1)) -+ tcb->mptcp_flags |= MPTCPHDR_DSS_CSUM; -+ -+ /* DATA_FIN only possible with DSS-mapping */ -+ if (mdss->F) -+ tcb->mptcp_flags |= MPTCPHDR_FIN; -+ } -+ -+ break; -+ } -+ case MPTCP_SUB_ADD_ADDR: -+ { -+ struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr; -+ -+ /* If tcp_sock is not available, MPTCP version can't be -+ * retrieved and ADD_ADDR opsize validation is not possible. -+ */ -+ if (!tp || !tp->mpcb) -+ break; -+ -+ if (!is_valid_addropt_opsize(tp->mpcb->mptcp_ver, -+ mpadd, opsize)) { -+ mptcp_debug("%s: mp_add_addr: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ /* We have to manually parse the options if we got two of them. */ -+ if (mopt->saw_add_addr) { -+ mopt->more_add_addr = 1; -+ break; -+ } -+ mopt->saw_add_addr = 1; -+ mopt->add_addr_ptr = ptr; -+ break; -+ } -+ case MPTCP_SUB_REMOVE_ADDR: -+ if ((opsize - MPTCP_SUB_LEN_REMOVE_ADDR) < 0) { -+ mptcp_debug("%s: mp_remove_addr: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ if (mopt->saw_rem_addr) { -+ mopt->more_rem_addr = 1; -+ break; -+ } -+ mopt->saw_rem_addr = 1; -+ mopt->rem_addr_ptr = ptr; -+ break; -+ case MPTCP_SUB_PRIO: -+ { -+ const struct mp_prio *mpprio = (struct mp_prio *)ptr; -+ -+ if (opsize != MPTCP_SUB_LEN_PRIO && -+ opsize != MPTCP_SUB_LEN_PRIO_ADDR) { -+ mptcp_debug("%s: mp_prio: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ mopt->saw_low_prio = 1; -+ mopt->low_prio = mpprio->b; -+ -+ if (opsize == MPTCP_SUB_LEN_PRIO_ADDR) { -+ mopt->saw_low_prio = 2; -+ mopt->prio_addr_id = mpprio->addr_id; -+ } -+ break; -+ } -+ case MPTCP_SUB_FAIL: -+ if (opsize != MPTCP_SUB_LEN_FAIL) { -+ mptcp_debug("%s: mp_fail: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ mopt->mp_fail = 1; -+ break; -+ case MPTCP_SUB_FCLOSE: -+ if (opsize != MPTCP_SUB_LEN_FCLOSE) { -+ mptcp_debug("%s: mp_fclose: bad option size %d\n", -+ __func__, opsize); -+ break; -+ } -+ -+ mopt->mp_fclose = 1; -+ mopt->mptcp_sender_key = ((struct mp_fclose *)ptr)->key; -+ -+ break; -+ default: -+ mptcp_debug("%s: Received unkown subtype: %d\n", -+ __func__, mp_opt->sub); -+ break; -+ } -+} -+ -+/** Parse only MPTCP options */ -+void tcp_parse_mptcp_options(const struct sk_buff *skb, -+ struct mptcp_options_received *mopt) -+{ -+ const struct tcphdr *th = tcp_hdr(skb); -+ int length = (th->doff * 4) - sizeof(struct tcphdr); -+ const unsigned char *ptr = (const unsigned char *)(th + 1); -+ -+ while (length > 0) { -+ int opcode = *ptr++; -+ int opsize; -+ -+ switch (opcode) { -+ case TCPOPT_EOL: -+ return; -+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ -+ length--; -+ continue; -+ default: -+ opsize = *ptr++; -+ if (opsize < 2) /* "silly options" */ -+ return; -+ if (opsize > length) -+ return; /* don't parse partial options */ -+ if (opcode == TCPOPT_MPTCP) -+ mptcp_parse_options(ptr - 2, opsize, mopt, skb, NULL); -+ } -+ ptr += opsize - 2; -+ length -= opsize; -+ } -+} -+ -+bool mptcp_check_rtt(const struct tcp_sock *tp, int time) -+{ -+ struct mptcp_cb *mpcb = tp->mpcb; -+ struct mptcp_tcp_sock *mptcp; -+ u32 rtt_max = 0; -+ -+ /* In MPTCP, we take the max delay across all flows, -+ * in order to take into account meta-reordering buffers. -+ */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ -+ if (!mptcp_sk_can_recv(sk)) -+ continue; -+ -+ if (rtt_max < tcp_sk(sk)->rcv_rtt_est.rtt_us) -+ rtt_max = tcp_sk(sk)->rcv_rtt_est.rtt_us; -+ } -+ if (time < (rtt_max >> 3) || !rtt_max) -+ return true; -+ -+ return false; -+} -+ -+static void mptcp_handle_add_addr(const unsigned char *ptr, struct sock *sk) -+{ -+ struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr; -+ struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ union inet_addr addr; -+ sa_family_t family; -+ __be16 port = 0; -+ bool is_v4; -+ -+ if (mpcb->mptcp_ver < MPTCP_VERSION_1) { -+ is_v4 = mpadd->u_bit.v0.ipver == 4; -+ } else { -+ is_v4 = mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1 || -+ mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2; -+ -+ /* TODO: support ADD_ADDRv1 retransmissions */ -+ if (mpadd->u_bit.v1.echo) -+ return; -+ } -+ -+ if (is_v4) { -+ u8 hash_mac_check[SHA256_DIGEST_SIZE]; -+ __be16 hmacport = 0; -+ char *recv_hmac; -+ -+ if (mpcb->mptcp_ver < MPTCP_VERSION_1) -+ goto skip_hmac_v4; -+ -+ recv_hmac = (char *)mpadd->u.v4.mac; -+ if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1) { -+ recv_hmac -= sizeof(mpadd->u.v4.port); -+ } else if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2) { -+ hmacport = mpadd->u.v4.port; -+ } -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 3, -+ 1, (u8 *)&mpadd->addr_id, -+ 4, (u8 *)&mpadd->u.v4.addr.s_addr, -+ 2, (u8 *)&hmacport); -+ if (memcmp(&hash_mac_check[SHA256_DIGEST_SIZE - sizeof(u64)], recv_hmac, 8) != 0) -+ /* ADD_ADDR2 discarded */ -+ return; -+skip_hmac_v4: -+ if ((mpcb->mptcp_ver == MPTCP_VERSION_0 && -+ mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4 + 2) || -+ (mpcb->mptcp_ver == MPTCP_VERSION_1 && -+ mpadd->len == MPTCP_SUB_LEN_ADD_ADDR4_VER1 + 2)) -+ port = mpadd->u.v4.port; -+ family = AF_INET; -+ addr.in = mpadd->u.v4.addr; -+#if IS_ENABLED(CONFIG_IPV6) -+ } else { -+ u8 hash_mac_check[SHA256_DIGEST_SIZE]; -+ __be16 hmacport = 0; -+ char *recv_hmac; -+ -+ if (mpcb->mptcp_ver < MPTCP_VERSION_1) -+ goto skip_hmac_v6; -+ -+ recv_hmac = (char *)mpadd->u.v6.mac; -+ if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6_VER1) { -+ recv_hmac -= sizeof(mpadd->u.v6.port); -+ } else if (mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6_VER1 + 2) { -+ hmacport = mpadd->u.v6.port; -+ } -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 3, -+ 1, (u8 *)&mpadd->addr_id, -+ 16, (u8 *)&mpadd->u.v6.addr.s6_addr, -+ 2, (u8 *)&hmacport); -+ if (memcmp(&hash_mac_check[SHA256_DIGEST_SIZE - sizeof(u64)], recv_hmac, 8) != 0) -+ /* ADD_ADDR2 discarded */ -+ return; -+skip_hmac_v6: -+ if ((mpcb->mptcp_ver == MPTCP_VERSION_0 && -+ mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6 + 2) || -+ (mpcb->mptcp_ver == MPTCP_VERSION_1 && -+ mpadd->len == MPTCP_SUB_LEN_ADD_ADDR6_VER1 + 2)) -+ port = mpadd->u.v6.port; -+ family = AF_INET6; -+ addr.in6 = mpadd->u.v6.addr; -+#endif /* CONFIG_IPV6 */ -+ } -+ -+ if (mpcb->pm_ops->add_raddr) -+ mpcb->pm_ops->add_raddr(mpcb, &addr, family, port, mpadd->addr_id); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRRX); -+} -+ -+static void mptcp_handle_rem_addr(const unsigned char *ptr, struct sock *sk) -+{ -+ struct mp_remove_addr *mprem = (struct mp_remove_addr *)ptr; -+ int i; -+ u8 rem_id; -+ struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ -+ for (i = 0; i <= mprem->len - MPTCP_SUB_LEN_REMOVE_ADDR; i++) { -+ rem_id = (&mprem->addrs_id)[i]; -+ -+ if (mpcb->pm_ops->rem_raddr) -+ mpcb->pm_ops->rem_raddr(mpcb, rem_id); -+ mptcp_send_reset_rem_id(mpcb, rem_id); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_REMADDRSUB); -+ } -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_REMADDRRX); -+} -+ -+static void mptcp_parse_addropt(const struct sk_buff *skb, struct sock *sk) -+{ -+ struct tcphdr *th = tcp_hdr(skb); -+ unsigned char *ptr; -+ int length = (th->doff * 4) - sizeof(struct tcphdr); -+ -+ /* Jump through the options to check whether ADD_ADDR is there */ -+ ptr = (unsigned char *)(th + 1); -+ while (length > 0) { -+ int opcode = *ptr++; -+ int opsize; -+ -+ switch (opcode) { -+ case TCPOPT_EOL: -+ return; -+ case TCPOPT_NOP: -+ length--; -+ continue; -+ default: -+ opsize = *ptr++; -+ if (opsize < 2) -+ return; -+ if (opsize > length) -+ return; /* don't parse partial options */ -+ if (opcode == TCPOPT_MPTCP && -+ ((struct mptcp_option *)ptr)->sub == MPTCP_SUB_ADD_ADDR) { -+ u8 mptcp_ver = tcp_sk(sk)->mpcb->mptcp_ver; -+ struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr; -+ -+ if (!is_valid_addropt_opsize(mptcp_ver, mpadd, -+ opsize)) -+ goto cont; -+ -+ mptcp_handle_add_addr(ptr, sk); -+ } -+ if (opcode == TCPOPT_MPTCP && -+ ((struct mptcp_option *)ptr)->sub == MPTCP_SUB_REMOVE_ADDR) { -+ if ((opsize - MPTCP_SUB_LEN_REMOVE_ADDR) < 0) -+ goto cont; -+ -+ mptcp_handle_rem_addr(ptr, sk); -+ } -+cont: -+ ptr += opsize - 2; -+ length -= opsize; -+ } -+ } -+ return; -+} -+ -+static bool mptcp_mp_fastclose_rcvd(struct sock *sk) -+{ -+ struct mptcp_tcp_sock *mptcp = tcp_sk(sk)->mptcp; -+ struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ -+ if (likely(!mptcp->rx_opt.mp_fclose)) -+ return false; -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_FASTCLOSERX); -+ mptcp->rx_opt.mp_fclose = 0; -+ if (mptcp->rx_opt.mptcp_sender_key != mpcb->mptcp_loc_key) -+ return false; -+ -+ mptcp_sub_force_close_all(mpcb, NULL); -+ -+ tcp_reset(mptcp_meta_sk(sk)); -+ -+ return true; -+} -+ -+static void mptcp_mp_fail_rcvd(struct sock *sk, const struct tcphdr *th) -+{ -+ struct mptcp_tcp_sock *mptcp = tcp_sk(sk)->mptcp; -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); -+ mptcp->rx_opt.mp_fail = 0; -+ -+ if (!th->rst && !mpcb->infinite_mapping_snd) { -+ mpcb->send_infinite_mapping = 1; -+ -+ mptcp_restart_sending(meta_sk); -+ -+ mptcp_fallback_close(mpcb, sk); -+ } -+} -+ -+static inline void mptcp_path_array_check(struct sock *meta_sk) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ -+ if (unlikely(mpcb->list_rcvd)) { -+ mpcb->list_rcvd = 0; -+ if (mpcb->pm_ops->new_remote_address) -+ mpcb->pm_ops->new_remote_address(meta_sk); -+ } -+} -+ -+bool mptcp_handle_options(struct sock *sk, const struct tcphdr *th, -+ const struct sk_buff *skb) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_options_received *mopt = &tp->mptcp->rx_opt; -+ struct mptcp_cb *mpcb = tp->mpcb; -+ -+ if (tp->mpcb->infinite_mapping_rcv || tp->mpcb->infinite_mapping_snd) -+ return false; -+ -+ if (mptcp_mp_fastclose_rcvd(sk)) -+ return true; -+ -+ if (sk->sk_state == TCP_RST_WAIT && !th->rst) -+ return true; -+ -+ if (mopt->saw_mpc && !tp->mpcb->rem_key_set) -+ mptcp_initialize_recv_vars(mptcp_meta_tp(tp), tp->mpcb, -+ mopt->mptcp_sender_key); -+ -+ if (unlikely(mopt->mp_fail)) -+ mptcp_mp_fail_rcvd(sk, th); -+ -+ /* RFC 6824, Section 3.3: -+ * If a checksum is not present when its use has been negotiated, the -+ * receiver MUST close the subflow with a RST as it is considered broken. -+ */ -+ if ((mptcp_is_data_seq(skb) || mptcp_is_data_mpcapable(skb)) && -+ tp->mpcb->dss_csum && -+ !(TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_DSS_CSUM)) { -+ mptcp_send_reset(sk); -+ return true; -+ } -+ -+ /* We have to acknowledge retransmissions of the third -+ * ack. -+ */ -+ if (mopt->join_ack) { -+ tcp_send_delayed_ack(sk); -+ mopt->join_ack = 0; -+ } -+ -+ if (mopt->saw_add_addr || mopt->saw_rem_addr) { -+ if (mopt->more_add_addr || mopt->more_rem_addr) { -+ mptcp_parse_addropt(skb, sk); -+ } else { -+ if (mopt->saw_add_addr) -+ mptcp_handle_add_addr(mopt->add_addr_ptr, sk); -+ if (mopt->saw_rem_addr) -+ mptcp_handle_rem_addr(mopt->rem_addr_ptr, sk); -+ } -+ -+ mopt->more_add_addr = 0; -+ mopt->saw_add_addr = 0; -+ mopt->more_rem_addr = 0; -+ mopt->saw_rem_addr = 0; -+ } -+ if (mopt->saw_low_prio) { -+ if (mopt->saw_low_prio == 1) { -+ tp->mptcp->rcv_low_prio = mopt->low_prio; -+ if (mpcb->pm_ops->prio_changed) -+ mpcb->pm_ops->prio_changed(sk, mopt->low_prio); -+ } else { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ if (mptcp->rem_id == mopt->prio_addr_id) { -+ mptcp->rcv_low_prio = mopt->low_prio; -+ if (mpcb->pm_ops->prio_changed) -+ mpcb->pm_ops->prio_changed(sk, -+ mopt->low_prio); -+ } -+ } -+ } -+ mopt->saw_low_prio = 0; -+ } -+ -+ if (mptcp_process_data_ack(sk, skb)) -+ return true; -+ -+ mptcp_path_array_check(mptcp_meta_sk(sk)); -+ /* Socket may have been mp_killed by a REMOVE_ADDR */ -+ if (tp->mp_killed) -+ return true; -+ -+ return false; -+} -+ -+static void _mptcp_rcv_synsent_fastopen(struct sock *meta_sk, -+ struct sk_buff *skb, bool rtx_queue) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct tcp_sock *master_tp = tcp_sk(meta_tp->mpcb->master_sk); -+ u32 new_mapping = meta_tp->write_seq - master_tp->snd_una; -+ -+ /* If the server only acknowledges partially the data sent in -+ * the SYN, we need to trim the acknowledged part because -+ * we don't want to retransmit this already received data. -+ * When we reach this point, tcp_ack() has already cleaned up -+ * fully acked segments. However, tcp trims partially acked -+ * segments only when retransmitting. Since MPTCP comes into -+ * play only now, we will fake an initial transmit, and -+ * retransmit_skb() will not be called. The following fragment -+ * comes from __tcp_retransmit_skb(). -+ */ -+ if (before(TCP_SKB_CB(skb)->seq, master_tp->snd_una)) { -+ BUG_ON(before(TCP_SKB_CB(skb)->end_seq, master_tp->snd_una)); -+ /* tcp_trim_head can only returns ENOMEM if skb is -+ * cloned. It is not the case here (see -+ * tcp_send_syn_data). -+ */ -+ BUG_ON(tcp_trim_head(meta_sk, skb, master_tp->snd_una - -+ TCP_SKB_CB(skb)->seq)); -+ } -+ -+ TCP_SKB_CB(skb)->seq += new_mapping; -+ TCP_SKB_CB(skb)->end_seq += new_mapping; -+ TCP_SKB_CB(skb)->sacked = 0; -+ -+ list_del(&skb->tcp_tsorted_anchor); -+ -+ if (rtx_queue) -+ tcp_rtx_queue_unlink(skb, meta_sk); -+ -+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); -+ -+ if (rtx_queue) -+ tcp_add_write_queue_tail(meta_sk, skb); -+} -+ -+/* In case of fastopen, some data can already be in the write queue. -+ * We need to update the sequence number of the segments as they -+ * were initially TCP sequence numbers. -+ */ -+static void mptcp_rcv_synsent_fastopen(struct sock *meta_sk) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct tcp_sock *master_tp = tcp_sk(meta_tp->mpcb->master_sk); -+ struct sk_buff *skb_write_head, *skb_rtx_head, *tmp; -+ -+ skb_write_head = tcp_write_queue_head(meta_sk); -+ skb_rtx_head = tcp_rtx_queue_head(meta_sk); -+ -+ if (!(skb_write_head || skb_rtx_head)) -+ return; -+ -+ /* There should only be one skb in {write, rtx} queue: the data not -+ * acknowledged in the SYN+ACK. In this case, we need to map -+ * this data to data sequence numbers. -+ */ -+ -+ BUG_ON(skb_write_head && skb_rtx_head); -+ -+ if (skb_write_head) { -+ skb_queue_walk_from_safe(&meta_sk->sk_write_queue, -+ skb_write_head, tmp) { -+ _mptcp_rcv_synsent_fastopen(meta_sk, skb_write_head, -+ false); -+ } -+ } -+ -+ if (skb_rtx_head) { -+ skb_rbtree_walk_from_safe(skb_rtx_head, tmp) { -+ _mptcp_rcv_synsent_fastopen(meta_sk, skb_rtx_head, -+ true); -+ } -+ } -+ -+ /* We can advance write_seq by the number of bytes unacknowledged -+ * and that were mapped in the previous loop. -+ */ -+ meta_tp->write_seq += master_tp->write_seq - master_tp->snd_una; -+ -+ /* The packets from the master_sk will be entailed to it later -+ * Until that time, its write queue is empty, and -+ * write_seq must align with snd_una -+ */ -+ master_tp->snd_nxt = master_tp->write_seq = master_tp->snd_una; -+ master_tp->packets_out = 0; -+ tcp_clear_retrans(meta_tp); -+ tcp_clear_retrans(master_tp); -+ tcp_set_ca_state(meta_tp->mpcb->master_sk, TCP_CA_Open); -+ tcp_set_ca_state(meta_sk, TCP_CA_Open); -+} -+ -+/* The skptr is needed, because if we become MPTCP-capable, we have to switch -+ * from meta-socket to master-socket. -+ * -+ * @return: 1 - we want to reset this connection -+ * 2 - we want to discard the received syn/ack -+ * 0 - everything is fine - continue -+ */ -+int mptcp_rcv_synsent_state_process(struct sock *sk, struct sock **skptr, -+ const struct sk_buff *skb, -+ const struct mptcp_options_received *mopt) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ if (mptcp(tp)) { -+ u8 hash_mac_check[SHA256_DIGEST_SIZE]; -+ struct mptcp_cb *mpcb = tp->mpcb; -+ -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_rem_key, -+ (u8 *)&mpcb->mptcp_loc_key, hash_mac_check, 2, -+ 4, (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce, -+ 4, (u8 *)&tp->mptcp->mptcp_loc_nonce); -+ if (memcmp(hash_mac_check, -+ (char *)&tp->mptcp->rx_opt.mptcp_recv_tmac, 8)) { -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKMAC); -+ mptcp_sub_force_close(sk); -+ return 1; -+ } -+ -+ /* Set this flag in order to postpone data sending -+ * until the 4th ack arrives. -+ */ -+ tp->mptcp->pre_established = 1; -+ tp->mptcp->rcv_low_prio = tp->mptcp->rx_opt.low_prio; -+ -+ mptcp_hmac(mpcb->mptcp_ver, (u8 *)&mpcb->mptcp_loc_key, -+ (u8 *)&mpcb->mptcp_rem_key, -+ tp->mptcp->sender_mac, 2, -+ 4, (u8 *)&tp->mptcp->mptcp_loc_nonce, -+ 4, (u8 *)&tp->mptcp->rx_opt.mptcp_recv_nonce); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); -+ } else if (mopt->saw_mpc) { -+ struct sock *meta_sk = sk; -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); -+ if (mopt->mptcp_ver > tcp_sk(sk)->mptcp_ver) -+ /* TODO Consider adding new MPTCP_INC_STATS entry */ -+ goto fallback; -+ if (tcp_sk(sk)->mptcp_ver == MPTCP_VERSION_1 && -+ mopt->mptcp_ver < MPTCP_VERSION_1) -+ /* TODO Consider adding new MPTCP_INC_STATS entry */ -+ /* TODO - record this in the cache - use v0 next time */ -+ goto fallback; -+ -+ if (mptcp_create_master_sk(sk, mopt->mptcp_sender_key, 1, -+ mopt->mptcp_ver, -+ ntohs(tcp_hdr(skb)->window))) -+ return 2; -+ -+ sk = tcp_sk(sk)->mpcb->master_sk; -+ *skptr = sk; -+ tp = tcp_sk(sk); -+ -+ /* If fastopen was used data might be in the send queue. We -+ * need to update their sequence number to MPTCP-level seqno. -+ * Note that it can happen in rare cases that fastopen_req is -+ * NULL and syn_data is 0 but fastopen indeed occurred and -+ * data has been queued in the write queue (but not sent). -+ * Example of such rare cases: connect is non-blocking and -+ * TFO is configured to work without cookies. -+ */ -+ mptcp_rcv_synsent_fastopen(meta_sk); -+ -+ /* -1, because the SYN consumed 1 byte. In case of TFO, we -+ * start the subflow-sequence number as if the data of the SYN -+ * is not part of any mapping. -+ */ -+ tp->mptcp->snt_isn = tp->snd_una - 1; -+ tp->mpcb->dss_csum = mopt->dss_csum; -+ if (tp->mpcb->dss_csum) -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CSUMENABLED); -+ -+ if (tp->mpcb->mptcp_ver >= MPTCP_VERSION_1) -+ tp->mpcb->send_mptcpv1_mpcapable = 1; -+ -+ tp->mptcp->include_mpc = 1; -+ -+ sk_set_socket(sk, meta_sk->sk_socket); -+ sk->sk_wq = meta_sk->sk_wq; -+ -+ bh_unlock_sock(sk); -+ /* hold in sk_clone_lock due to initialization to 2 */ -+ sock_put(sk); -+ } else { -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); -+fallback: -+ tp->request_mptcp = 0; -+ -+ if (tp->inside_tk_table) -+ mptcp_hash_remove_bh(tp); -+ } -+ -+ if (mptcp(tp)) -+ tp->mptcp->rcv_isn = TCP_SKB_CB(skb)->seq; -+ -+ return 0; -+} -+ -+/* Similar to tcp_should_expand_sndbuf */ -+bool mptcp_should_expand_sndbuf(const struct sock *sk) -+{ -+ const struct sock *meta_sk = mptcp_meta_sk(sk); -+ const struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ const struct mptcp_tcp_sock *mptcp; -+ -+ /* We circumvent this check in tcp_check_space, because we want to -+ * always call sk_write_space. So, we reproduce the check here. -+ */ -+ if (!meta_sk->sk_socket || -+ !test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags)) -+ return false; -+ -+ /* If the user specified a specific send buffer setting, do -+ * not modify it. -+ */ -+ if (meta_sk->sk_userlocks & SOCK_SNDBUF_LOCK) -+ return false; -+ -+ /* If we are under global TCP memory pressure, do not expand. */ -+ if (tcp_under_memory_pressure(meta_sk)) -+ return false; -+ -+ /* If we are under soft global TCP memory pressure, do not expand. */ -+ if (sk_memory_allocated(meta_sk) >= sk_prot_mem_limits(meta_sk, 0)) -+ return false; -+ -+ /* For MPTCP we look for a subsocket that could send data. -+ * If we found one, then we update the send-buffer. -+ */ -+ mptcp_for_each_sub(meta_tp->mpcb, mptcp) { -+ const struct sock *sk_it = mptcp_to_sock(mptcp); -+ const struct tcp_sock *tp_it = tcp_sk(sk_it); -+ -+ if (!mptcp_sk_can_send(sk_it)) -+ continue; -+ -+ if (tcp_packets_in_flight(tp_it) < tp_it->snd_cwnd) -+ return true; -+ } -+ -+ return false; -+} -+ -+void mptcp_tcp_set_rto(struct sock *sk) -+{ -+ tcp_set_rto(sk); -+ mptcp_set_rto(sk); -+} -diff --git a/net/mptcp/mptcp_ipv4.c b/net/mptcp/mptcp_ipv4.c -new file mode 100644 -index 000000000000..0370a7680d47 ---- /dev/null -+++ b/net/mptcp/mptcp_ipv4.c -@@ -0,0 +1,431 @@ -+/* -+ * MPTCP implementation - IPv4-specific functions -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) -+{ -+ return siphash_4u32((__force u32)saddr, (__force u32)daddr, -+ (__force u32)sport << 16 | (__force u32)dport, -+ mptcp_seed++, &mptcp_secret); -+} -+ -+u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, -+ u32 seed) -+{ -+ return siphash_2u64((__force u64)saddr << 32 | (__force u64)daddr, -+ (__force u64)seed << 32 | (__force u64)sport << 16 | (__force u64)dport, -+ &mptcp_secret); -+} -+ -+ -+static void mptcp_v4_reqsk_destructor(struct request_sock *req) -+{ -+ mptcp_reqsk_destructor(req); -+ -+ tcp_v4_reqsk_destructor(req); -+} -+ -+static int mptcp_v4_init_req(struct request_sock *req, const struct sock *sk, -+ struct sk_buff *skb, bool want_cookie) -+{ -+ tcp_request_sock_ipv4_ops.init_req(req, sk, skb, want_cookie); -+ -+ mptcp_rsk(req)->hash_entry.pprev = NULL; -+ mptcp_rsk(req)->is_sub = 0; -+ inet_rsk(req)->mptcp_rqsk = 1; -+ -+ /* In case of SYN-cookies, we wait for the isn to be generated - it is -+ * input to the key-generation. -+ */ -+ if (!want_cookie) -+ mptcp_reqsk_init(req, sk, skb, false); -+ -+ return 0; -+} -+ -+#ifdef CONFIG_SYN_COOKIES -+static u32 mptcp_v4_cookie_init_seq(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, __u16 *mssp) -+{ -+ __u32 isn = cookie_v4_init_sequence(req, sk, skb, mssp); -+ -+ tcp_rsk(req)->snt_isn = isn; -+ -+ mptcp_reqsk_init(req, sk, skb, true); -+ -+ return isn; -+} -+#endif -+ -+/* May be called without holding the meta-level lock */ -+static int mptcp_v4_join_init_req(struct request_sock *req, const struct sock *meta_sk, -+ struct sk_buff *skb, bool want_cookie) -+{ -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ union inet_addr addr; -+ int loc_id; -+ bool low_prio = false; -+ -+ if (!mpcb->rem_key_set) -+ return -1; -+ -+ /* We need to do this as early as possible. Because, if we fail later -+ * (e.g., get_local_id), then reqsk_free tries to remove the -+ * request-socket from the htb in mptcp_hash_request_remove as pprev -+ * may be different from NULL. -+ */ -+ mtreq->hash_entry.pprev = NULL; -+ -+ tcp_request_sock_ipv4_ops.init_req(req, meta_sk, skb, want_cookie); -+ -+ mtreq->mptcp_loc_nonce = mptcp_v4_get_nonce(ip_hdr(skb)->saddr, -+ ip_hdr(skb)->daddr, -+ tcp_hdr(skb)->source, -+ tcp_hdr(skb)->dest); -+ addr.ip = inet_rsk(req)->ir_loc_addr; -+ loc_id = mpcb->pm_ops->get_local_id(meta_sk, AF_INET, &addr, &low_prio); -+ if (loc_id == -1) -+ return -1; -+ mtreq->loc_id = loc_id; -+ mtreq->low_prio = low_prio; -+ -+ mptcp_join_reqsk_init(mpcb, req, skb); -+ -+ return 0; -+} -+ -+/* Similar to tcp_request_sock_ops */ -+struct request_sock_ops mptcp_request_sock_ops __read_mostly = { -+ .family = PF_INET, -+ .obj_size = sizeof(struct mptcp_request_sock), -+ .rtx_syn_ack = tcp_rtx_synack, -+ .send_ack = tcp_v4_reqsk_send_ack, -+ .destructor = mptcp_v4_reqsk_destructor, -+ .send_reset = tcp_v4_send_reset, -+ .syn_ack_timeout = tcp_syn_ack_timeout, -+}; -+ -+/* Similar to: tcp_v4_conn_request -+ * May be called without holding the meta-level lock -+ */ -+static int mptcp_v4_join_request(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ return tcp_conn_request(&mptcp_request_sock_ops, -+ &mptcp_join_request_sock_ipv4_ops, -+ meta_sk, skb); -+} -+ -+/* Similar to: tcp_v4_do_rcv -+ * We only process join requests here. (either the SYN or the final ACK) -+ */ -+int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ const struct tcphdr *th = tcp_hdr(skb); -+ const struct iphdr *iph = ip_hdr(skb); -+ struct sock *child, *rsk = NULL, *sk; -+ int ret; -+ -+ sk = inet_lookup_established(sock_net(meta_sk), &tcp_hashinfo, -+ iph->saddr, th->source, iph->daddr, -+ th->dest, inet_iif(skb)); -+ -+ if (!sk) -+ goto new_subflow; -+ -+ if (is_meta_sk(sk)) { -+ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -+ sock_put(sk); -+ goto discard; -+ } -+ -+ if (sk->sk_state == TCP_TIME_WAIT) { -+ inet_twsk_put(inet_twsk(sk)); -+ goto discard; -+ } -+ -+ if (sk->sk_state == TCP_NEW_SYN_RECV) { -+ struct request_sock *req = inet_reqsk(sk); -+ bool req_stolen; -+ -+ if (!mptcp_can_new_subflow(meta_sk)) -+ goto reset_and_discard; -+ -+ local_bh_disable(); -+ child = tcp_check_req(meta_sk, skb, req, false, &req_stolen); -+ if (!child) { -+ reqsk_put(req); -+ local_bh_enable(); -+ goto discard; -+ } -+ -+ if (child != meta_sk) { -+ ret = mptcp_finish_handshake(child, skb); -+ if (ret) { -+ rsk = child; -+ local_bh_enable(); -+ goto reset_and_discard; -+ } -+ -+ bh_unlock_sock(meta_sk); -+ local_bh_enable(); -+ return 0; -+ } -+ -+ /* tcp_check_req failed */ -+ reqsk_put(req); -+ -+ local_bh_enable(); -+ goto discard; -+ } -+ -+ ret = tcp_v4_do_rcv(sk, skb); -+ sock_put(sk); -+ -+ return ret; -+ -+new_subflow: -+ if (!mptcp_can_new_subflow(meta_sk)) -+ goto reset_and_discard; -+ -+ child = tcp_v4_cookie_check(meta_sk, skb); -+ if (!child) -+ goto discard; -+ -+ if (child != meta_sk) { -+ ret = mptcp_finish_handshake(child, skb); -+ if (ret) { -+ rsk = child; -+ goto reset_and_discard; -+ } -+ } -+ -+ if (tcp_hdr(skb)->syn) { -+ local_bh_disable(); -+ mptcp_v4_join_request(meta_sk, skb); -+ local_bh_enable(); -+ } -+ -+discard: -+ kfree_skb(skb); -+ return 0; -+ -+reset_and_discard: -+ tcp_v4_send_reset(rsk, skb); -+ goto discard; -+} -+ -+/* Create a new IPv4 subflow. -+ * -+ * We are in user-context and meta-sock-lock is hold. -+ */ -+int __mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc, -+ __be16 sport, struct mptcp_rem4 *rem, -+ struct sock **subsk) -+{ -+ struct tcp_sock *tp; -+ struct sock *sk; -+ struct sockaddr_in loc_in, rem_in; -+ struct socket_alloc sock_full; -+ struct socket *sock = (struct socket *)&sock_full; -+ int ret; -+ -+ /** First, create and prepare the new socket */ -+ memcpy(&sock_full, meta_sk->sk_socket, sizeof(sock_full)); -+ sock->state = SS_UNCONNECTED; -+ sock->ops = NULL; -+ -+ ret = inet_create(sock_net(meta_sk), sock, IPPROTO_TCP, 1); -+ if (unlikely(ret < 0)) { -+ net_err_ratelimited("%s inet_create failed ret: %d\n", -+ __func__, ret); -+ return ret; -+ } -+ -+ sk = sock->sk; -+ tp = tcp_sk(sk); -+ -+ /* All subsockets need the MPTCP-lock-class */ -+ lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, meta_slock_key_name); -+ lockdep_init_map(&(sk)->sk_lock.dep_map, meta_key_name, &meta_key, 0); -+ -+ ret = mptcp_add_sock(meta_sk, sk, loc->loc4_id, rem->rem4_id, GFP_KERNEL); -+ if (ret) { -+ net_err_ratelimited("%s mptcp_add_sock failed ret: %d\n", -+ __func__, ret); -+ goto error; -+ } -+ -+ tp->mptcp->slave_sk = 1; -+ tp->mptcp->low_prio = loc->low_prio; -+ -+ /* Initializing the timer for an MPTCP subflow */ -+ timer_setup(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, 0); -+ -+ /** Then, connect the socket to the peer */ -+ loc_in.sin_family = AF_INET; -+ rem_in.sin_family = AF_INET; -+ loc_in.sin_port = sport; -+ if (rem->port) -+ rem_in.sin_port = rem->port; -+ else -+ rem_in.sin_port = inet_sk(meta_sk)->inet_dport; -+ loc_in.sin_addr = loc->addr; -+ rem_in.sin_addr = rem->addr; -+ -+ if (loc->if_idx) -+ sk->sk_bound_dev_if = loc->if_idx; -+ -+ ret = kernel_bind(sock, (struct sockaddr *)&loc_in, -+ sizeof(struct sockaddr_in)); -+ if (ret < 0) { -+ net_err_ratelimited("%s: token %#x bind() to %pI4 index %d failed, error %d\n", -+ __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token, -+ &loc_in.sin_addr, loc->if_idx, ret); -+ goto error; -+ } -+ -+ mptcp_debug("%s: token %#x pi %d src_addr:%pI4:%d dst_addr:%pI4:%d ifidx: %d\n", -+ __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token, -+ tp->mptcp->path_index, &loc_in.sin_addr, -+ ntohs(loc_in.sin_port), &rem_in.sin_addr, -+ ntohs(rem_in.sin_port), loc->if_idx); -+ -+ if (tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v4) -+ tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v4(sk, rem->addr); -+ -+ ret = kernel_connect(sock, (struct sockaddr *)&rem_in, -+ sizeof(struct sockaddr_in), O_NONBLOCK); -+ if (ret < 0 && ret != -EINPROGRESS) { -+ net_err_ratelimited("%s: MPTCP subsocket connect() failed, error %d\n", -+ __func__, ret); -+ goto error; -+ } -+ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINSYNTX); -+ -+ sk_set_socket(sk, meta_sk->sk_socket); -+ sk->sk_wq = meta_sk->sk_wq; -+ -+ if (subsk) -+ *subsk = sk; -+ -+ return 0; -+ -+error: -+ /* May happen if mptcp_add_sock fails first */ -+ if (!mptcp(tp)) { -+ tcp_close(sk, 0); -+ } else { -+ local_bh_disable(); -+ mptcp_sub_force_close(sk); -+ local_bh_enable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(__mptcp_init4_subsockets); -+ -+const struct inet_connection_sock_af_ops mptcp_v4_specific = { -+ .queue_xmit = ip_queue_xmit, -+ .send_check = tcp_v4_send_check, -+ .rebuild_header = inet_sk_rebuild_header, -+ .sk_rx_dst_set = inet_sk_rx_dst_set, -+ .conn_request = mptcp_conn_request, -+ .syn_recv_sock = tcp_v4_syn_recv_sock, -+ .net_header_len = sizeof(struct iphdr), -+ .setsockopt = ip_setsockopt, -+ .getsockopt = ip_getsockopt, -+ .addr2sockaddr = inet_csk_addr2sockaddr, -+ .sockaddr_len = sizeof(struct sockaddr_in), -+#ifdef CONFIG_COMPAT -+ .compat_setsockopt = compat_ip_setsockopt, -+ .compat_getsockopt = compat_ip_getsockopt, -+#endif -+ .mtu_reduced = tcp_v4_mtu_reduced, -+}; -+ -+struct tcp_request_sock_ops mptcp_request_sock_ipv4_ops; -+struct tcp_request_sock_ops mptcp_join_request_sock_ipv4_ops; -+ -+/* General initialization of IPv4 for MPTCP */ -+int mptcp_pm_v4_init(void) -+{ -+ int ret = 0; -+ struct request_sock_ops *ops = &mptcp_request_sock_ops; -+ -+ mptcp_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; -+ mptcp_request_sock_ipv4_ops.init_req = mptcp_v4_init_req; -+#ifdef CONFIG_SYN_COOKIES -+ mptcp_request_sock_ipv4_ops.cookie_init_seq = mptcp_v4_cookie_init_seq; -+#endif -+ mptcp_join_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; -+ mptcp_join_request_sock_ipv4_ops.init_req = mptcp_v4_join_init_req; -+ -+ ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP"); -+ if (ops->slab_name == NULL) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0, -+ SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN, -+ NULL); -+ -+ if (ops->slab == NULL) { -+ ret = -ENOMEM; -+ goto err_reqsk_create; -+ } -+ -+out: -+ return ret; -+ -+err_reqsk_create: -+ kfree(ops->slab_name); -+ ops->slab_name = NULL; -+ goto out; -+} -+ -+void mptcp_pm_v4_undo(void) -+{ -+ kmem_cache_destroy(mptcp_request_sock_ops.slab); -+ kfree(mptcp_request_sock_ops.slab_name); -+} -diff --git a/net/mptcp/mptcp_ipv6.c b/net/mptcp/mptcp_ipv6.c -new file mode 100644 -index 000000000000..8af32df4fd5f ---- /dev/null -+++ b/net/mptcp/mptcp_ipv6.c -@@ -0,0 +1,479 @@ -+/* -+ * MPTCP implementation - IPv6-specific functions -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer: -+ * Jaakko Korkeaniemi -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr, -+ __be16 sport, __be16 dport) -+{ -+ const struct { -+ struct in6_addr saddr; -+ struct in6_addr daddr; -+ u32 seed; -+ __be16 sport; -+ __be16 dport; -+ } __aligned(SIPHASH_ALIGNMENT) combined = { -+ .saddr = *(struct in6_addr *)saddr, -+ .daddr = *(struct in6_addr *)daddr, -+ .seed = mptcp_seed++, -+ .sport = sport, -+ .dport = dport -+ }; -+ -+ return siphash(&combined, offsetofend(typeof(combined), dport), -+ &mptcp_secret); -+} -+ -+u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr, -+ __be16 sport, __be16 dport, u32 seed) -+{ -+ const struct { -+ struct in6_addr saddr; -+ struct in6_addr daddr; -+ u32 seed; -+ __be16 sport; -+ __be16 dport; -+ } __aligned(SIPHASH_ALIGNMENT) combined = { -+ .saddr = *(struct in6_addr *)saddr, -+ .daddr = *(struct in6_addr *)daddr, -+ .seed = seed, -+ .sport = sport, -+ .dport = dport -+ }; -+ -+ return siphash(&combined, offsetofend(typeof(combined), dport), -+ &mptcp_secret); -+} -+ -+static void mptcp_v6_reqsk_destructor(struct request_sock *req) -+{ -+ mptcp_reqsk_destructor(req); -+ -+ tcp_v6_reqsk_destructor(req); -+} -+ -+static int mptcp_v6_init_req(struct request_sock *req, const struct sock *sk, -+ struct sk_buff *skb, bool want_cookie) -+{ -+ tcp_request_sock_ipv6_ops.init_req(req, sk, skb, want_cookie); -+ -+ mptcp_rsk(req)->hash_entry.pprev = NULL; -+ mptcp_rsk(req)->is_sub = 0; -+ inet_rsk(req)->mptcp_rqsk = 1; -+ -+ /* In case of SYN-cookies, we wait for the isn to be generated - it is -+ * input to the key-generation. -+ */ -+ if (!want_cookie) -+ mptcp_reqsk_init(req, sk, skb, false); -+ -+ return 0; -+} -+ -+#ifdef CONFIG_SYN_COOKIES -+static u32 mptcp_v6_cookie_init_seq(struct request_sock *req, const struct sock *sk, -+ const struct sk_buff *skb, __u16 *mssp) -+{ -+ __u32 isn = cookie_v6_init_sequence(req, sk, skb, mssp); -+ -+ tcp_rsk(req)->snt_isn = isn; -+ -+ mptcp_reqsk_init(req, sk, skb, true); -+ -+ return isn; -+} -+#endif -+ -+/* May be called without holding the meta-level lock */ -+static int mptcp_v6_join_init_req(struct request_sock *req, const struct sock *meta_sk, -+ struct sk_buff *skb, bool want_cookie) -+{ -+ struct mptcp_request_sock *mtreq = mptcp_rsk(req); -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ union inet_addr addr; -+ int loc_id; -+ bool low_prio = false; -+ -+ if (!mpcb->rem_key_set) -+ return -1; -+ -+ /* We need to do this as early as possible. Because, if we fail later -+ * (e.g., get_local_id), then reqsk_free tries to remove the -+ * request-socket from the htb in mptcp_hash_request_remove as pprev -+ * may be different from NULL. -+ */ -+ mtreq->hash_entry.pprev = NULL; -+ -+ tcp_request_sock_ipv6_ops.init_req(req, meta_sk, skb, want_cookie); -+ -+ mtreq->mptcp_loc_nonce = mptcp_v6_get_nonce(ipv6_hdr(skb)->saddr.s6_addr32, -+ ipv6_hdr(skb)->daddr.s6_addr32, -+ tcp_hdr(skb)->source, -+ tcp_hdr(skb)->dest); -+ addr.in6 = inet_rsk(req)->ir_v6_loc_addr; -+ loc_id = mpcb->pm_ops->get_local_id(meta_sk, AF_INET6, &addr, &low_prio); -+ if (loc_id == -1) -+ return -1; -+ mtreq->loc_id = loc_id; -+ mtreq->low_prio = low_prio; -+ -+ mptcp_join_reqsk_init(mpcb, req, skb); -+ -+ return 0; -+} -+ -+/* Similar to tcp6_request_sock_ops */ -+struct request_sock_ops mptcp6_request_sock_ops __read_mostly = { -+ .family = AF_INET6, -+ .obj_size = sizeof(struct mptcp_request_sock), -+ .rtx_syn_ack = tcp_rtx_synack, -+ .send_ack = tcp_v6_reqsk_send_ack, -+ .destructor = mptcp_v6_reqsk_destructor, -+ .send_reset = tcp_v6_send_reset, -+ .syn_ack_timeout = tcp_syn_ack_timeout, -+}; -+ -+/* Similar to: tcp_v6_conn_request -+ * May be called without holding the meta-level lock -+ */ -+static int mptcp_v6_join_request(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ return tcp_conn_request(&mptcp6_request_sock_ops, -+ &mptcp_join_request_sock_ipv6_ops, -+ meta_sk, skb); -+} -+ -+int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ const struct tcphdr *th = tcp_hdr(skb); -+ const struct ipv6hdr *ip6h = ipv6_hdr(skb); -+ struct sock *child, *rsk = NULL, *sk; -+ int ret; -+ -+ sk = __inet6_lookup_established(sock_net(meta_sk), -+ &tcp_hashinfo, -+ &ip6h->saddr, th->source, -+ &ip6h->daddr, ntohs(th->dest), -+ tcp_v6_iif(skb), tcp_v6_sdif(skb)); -+ -+ if (!sk) -+ goto new_subflow; -+ -+ if (is_meta_sk(sk)) { -+ WARN("%s Did not find a sub-sk - did found the meta!\n", __func__); -+ sock_put(sk); -+ goto discard; -+ } -+ -+ if (sk->sk_state == TCP_TIME_WAIT) { -+ inet_twsk_put(inet_twsk(sk)); -+ goto discard; -+ } -+ -+ if (sk->sk_state == TCP_NEW_SYN_RECV) { -+ struct request_sock *req = inet_reqsk(sk); -+ bool req_stolen; -+ -+ if (!mptcp_can_new_subflow(meta_sk)) -+ goto reset_and_discard; -+ -+ local_bh_disable(); -+ child = tcp_check_req(meta_sk, skb, req, false, &req_stolen); -+ if (!child) { -+ reqsk_put(req); -+ local_bh_enable(); -+ goto discard; -+ } -+ -+ if (child != meta_sk) { -+ ret = mptcp_finish_handshake(child, skb); -+ if (ret) { -+ rsk = child; -+ local_bh_enable(); -+ goto reset_and_discard; -+ } -+ -+ bh_unlock_sock(meta_sk); -+ local_bh_enable(); -+ return 0; -+ } -+ -+ /* tcp_check_req failed */ -+ reqsk_put(req); -+ -+ local_bh_enable(); -+ goto discard; -+ } -+ -+ ret = tcp_v6_do_rcv(sk, skb); -+ sock_put(sk); -+ -+ return ret; -+ -+new_subflow: -+ if (!mptcp_can_new_subflow(meta_sk)) -+ goto reset_and_discard; -+ -+ child = tcp_v6_cookie_check(meta_sk, skb); -+ if (!child) -+ goto discard; -+ -+ if (child != meta_sk) { -+ ret = mptcp_finish_handshake(child, skb); -+ if (ret) { -+ rsk = child; -+ goto reset_and_discard; -+ } -+ } -+ -+ if (tcp_hdr(skb)->syn) { -+ local_bh_disable(); -+ mptcp_v6_join_request(meta_sk, skb); -+ local_bh_enable(); -+ } -+ -+discard: -+ kfree_skb(skb); -+ return 0; -+ -+reset_and_discard: -+ tcp_v6_send_reset(rsk, skb); -+ goto discard; -+} -+ -+/* Create a new IPv6 subflow. -+ * -+ * We are in user-context and meta-sock-lock is hold. -+ */ -+int __mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc, -+ __be16 sport, struct mptcp_rem6 *rem, -+ struct sock **subsk) -+{ -+ struct tcp_sock *tp; -+ struct sock *sk; -+ struct sockaddr_in6 loc_in, rem_in; -+ struct socket_alloc sock_full; -+ struct socket *sock = (struct socket *)&sock_full; -+ int ret; -+ -+ /** First, create and prepare the new socket */ -+ memcpy(&sock_full, meta_sk->sk_socket, sizeof(sock_full)); -+ sock->state = SS_UNCONNECTED; -+ sock->ops = NULL; -+ -+ ret = inet6_create(sock_net(meta_sk), sock, IPPROTO_TCP, 1); -+ if (unlikely(ret < 0)) { -+ net_err_ratelimited("%s inet6_create failed ret: %d\n", -+ __func__, ret); -+ return ret; -+ } -+ -+ sk = sock->sk; -+ tp = tcp_sk(sk); -+ -+ /* All subsockets need the MPTCP-lock-class */ -+ lockdep_set_class_and_name(&(sk)->sk_lock.slock, &meta_slock_key, meta_slock_key_name); -+ lockdep_init_map(&(sk)->sk_lock.dep_map, meta_key_name, &meta_key, 0); -+ -+ ret = mptcp_add_sock(meta_sk, sk, loc->loc6_id, rem->rem6_id, GFP_KERNEL); -+ if (ret) { -+ net_err_ratelimited("%s mptcp_add_sock failed ret: %d\n", -+ __func__, ret); -+ goto error; -+ } -+ -+ tp->mptcp->slave_sk = 1; -+ tp->mptcp->low_prio = loc->low_prio; -+ -+ /* Initializing the timer for an MPTCP subflow */ -+ timer_setup(&tp->mptcp->mptcp_ack_timer, mptcp_ack_handler, 0); -+ -+ /** Then, connect the socket to the peer */ -+ loc_in.sin6_family = AF_INET6; -+ rem_in.sin6_family = AF_INET6; -+ loc_in.sin6_port = sport; -+ if (rem->port) -+ rem_in.sin6_port = rem->port; -+ else -+ rem_in.sin6_port = inet_sk(meta_sk)->inet_dport; -+ loc_in.sin6_addr = loc->addr; -+ rem_in.sin6_addr = rem->addr; -+ -+ if (loc->if_idx) -+ sk->sk_bound_dev_if = loc->if_idx; -+ -+ ret = kernel_bind(sock, (struct sockaddr *)&loc_in, -+ sizeof(struct sockaddr_in6)); -+ if (ret < 0) { -+ net_err_ratelimited("%s: token %#x bind() to %pI6 index %d failed, error %d\n", -+ __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token, -+ &loc_in.sin6_addr, loc->if_idx, ret); -+ goto error; -+ } -+ -+ mptcp_debug("%s: token %#x pi %d src_addr:%pI6:%d dst_addr:%pI6:%d ifidx: %u\n", -+ __func__, tcp_sk(meta_sk)->mpcb->mptcp_loc_token, -+ tp->mptcp->path_index, &loc_in.sin6_addr, -+ ntohs(loc_in.sin6_port), &rem_in.sin6_addr, -+ ntohs(rem_in.sin6_port), loc->if_idx); -+ -+ if (tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v6) -+ tcp_sk(meta_sk)->mpcb->pm_ops->init_subsocket_v6(sk, rem->addr); -+ -+ ret = kernel_connect(sock, (struct sockaddr *)&rem_in, -+ sizeof(struct sockaddr_in6), O_NONBLOCK); -+ if (ret < 0 && ret != -EINPROGRESS) { -+ net_err_ratelimited("%s: MPTCP subsocket connect() failed, error %d\n", -+ __func__, ret); -+ goto error; -+ } -+ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_JOINSYNTX); -+ -+ sk_set_socket(sk, meta_sk->sk_socket); -+ sk->sk_wq = meta_sk->sk_wq; -+ -+ if (subsk) -+ *subsk = sk; -+ -+ return 0; -+ -+error: -+ /* May happen if mptcp_add_sock fails first */ -+ if (!mptcp(tp)) { -+ tcp_close(sk, 0); -+ } else { -+ local_bh_disable(); -+ mptcp_sub_force_close(sk); -+ local_bh_enable(); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(__mptcp_init6_subsockets); -+ -+const struct inet_connection_sock_af_ops mptcp_v6_specific = { -+ .queue_xmit = inet6_csk_xmit, -+ .send_check = tcp_v6_send_check, -+ .rebuild_header = inet6_sk_rebuild_header, -+ .sk_rx_dst_set = inet6_sk_rx_dst_set, -+ .conn_request = mptcp_conn_request, -+ .syn_recv_sock = tcp_v6_syn_recv_sock, -+ .net_header_len = sizeof(struct ipv6hdr), -+ .net_frag_header_len = sizeof(struct frag_hdr), -+ .setsockopt = ipv6_setsockopt, -+ .getsockopt = ipv6_getsockopt, -+ .addr2sockaddr = inet6_csk_addr2sockaddr, -+ .sockaddr_len = sizeof(struct sockaddr_in6), -+#ifdef CONFIG_COMPAT -+ .compat_setsockopt = compat_ipv6_setsockopt, -+ .compat_getsockopt = compat_ipv6_getsockopt, -+#endif -+ .mtu_reduced = tcp_v6_mtu_reduced, -+}; -+ -+const struct inet_connection_sock_af_ops mptcp_v6_mapped = { -+ .queue_xmit = ip_queue_xmit, -+ .send_check = tcp_v4_send_check, -+ .rebuild_header = inet_sk_rebuild_header, -+ .sk_rx_dst_set = inet_sk_rx_dst_set, -+ .conn_request = mptcp_conn_request, -+ .syn_recv_sock = tcp_v6_syn_recv_sock, -+ .net_header_len = sizeof(struct iphdr), -+ .setsockopt = ipv6_setsockopt, -+ .getsockopt = ipv6_getsockopt, -+ .addr2sockaddr = inet6_csk_addr2sockaddr, -+ .sockaddr_len = sizeof(struct sockaddr_in6), -+#ifdef CONFIG_COMPAT -+ .compat_setsockopt = compat_ipv6_setsockopt, -+ .compat_getsockopt = compat_ipv6_getsockopt, -+#endif -+ .mtu_reduced = tcp_v4_mtu_reduced, -+}; -+ -+struct tcp_request_sock_ops mptcp_request_sock_ipv6_ops; -+struct tcp_request_sock_ops mptcp_join_request_sock_ipv6_ops; -+ -+int mptcp_pm_v6_init(void) -+{ -+ int ret = 0; -+ struct request_sock_ops *ops = &mptcp6_request_sock_ops; -+ -+ mptcp_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; -+ mptcp_request_sock_ipv6_ops.init_req = mptcp_v6_init_req; -+#ifdef CONFIG_SYN_COOKIES -+ mptcp_request_sock_ipv6_ops.cookie_init_seq = mptcp_v6_cookie_init_seq; -+#endif -+ -+ mptcp_join_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; -+ mptcp_join_request_sock_ipv6_ops.init_req = mptcp_v6_join_init_req; -+ -+ ops->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", "MPTCP6"); -+ if (ops->slab_name == NULL) { -+ ret = -ENOMEM; -+ goto out; -+ } -+ -+ ops->slab = kmem_cache_create(ops->slab_name, ops->obj_size, 0, -+ SLAB_TYPESAFE_BY_RCU|SLAB_HWCACHE_ALIGN, -+ NULL); -+ -+ if (ops->slab == NULL) { -+ ret = -ENOMEM; -+ goto err_reqsk_create; -+ } -+ -+out: -+ return ret; -+ -+err_reqsk_create: -+ kfree(ops->slab_name); -+ ops->slab_name = NULL; -+ goto out; -+} -+ -+void mptcp_pm_v6_undo(void) -+{ -+ kmem_cache_destroy(mptcp6_request_sock_ops.slab); -+ kfree(mptcp6_request_sock_ops.slab_name); -+} -diff --git a/net/mptcp/mptcp_ndiffports.c b/net/mptcp/mptcp_ndiffports.c -new file mode 100644 -index 000000000000..cf019990447c ---- /dev/null -+++ b/net/mptcp/mptcp_ndiffports.c -@@ -0,0 +1,174 @@ -+#include -+ -+#include -+#include -+ -+#if IS_ENABLED(CONFIG_IPV6) -+#include -+#endif -+ -+struct ndiffports_priv { -+ /* Worker struct for subflow establishment */ -+ struct work_struct subflow_work; -+ -+ struct mptcp_cb *mpcb; -+}; -+ -+static int num_subflows __read_mostly = 2; -+module_param(num_subflows, int, 0644); -+MODULE_PARM_DESC(num_subflows, "choose the number of subflows per MPTCP connection"); -+ -+/** -+ * Create all new subflows, by doing calls to mptcp_initX_subsockets -+ * -+ * This function uses a goto next_subflow, to allow releasing the lock between -+ * new subflows and giving other processes a chance to do some work on the -+ * socket and potentially finishing the communication. -+ **/ -+static void create_subflow_worker(struct work_struct *work) -+{ -+ const struct ndiffports_priv *pm_priv = container_of(work, -+ struct ndiffports_priv, -+ subflow_work); -+ struct mptcp_cb *mpcb = pm_priv->mpcb; -+ struct sock *meta_sk = mpcb->meta_sk; -+ int iter = 0; -+ -+next_subflow: -+ if (iter) { -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ -+ cond_resched(); -+ } -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ if (!mptcp(tcp_sk(meta_sk))) -+ goto exit; -+ -+ iter++; -+ -+ if (sock_flag(meta_sk, SOCK_DEAD)) -+ goto exit; -+ -+ if (mpcb->master_sk && -+ !tcp_sk(mpcb->master_sk)->mptcp->fully_established) -+ goto exit; -+ -+ if (num_subflows > iter && num_subflows > mptcp_subflow_count(mpcb)) { -+ if (meta_sk->sk_family == AF_INET || -+ mptcp_v6_is_v4_mapped(meta_sk)) { -+ struct mptcp_loc4 loc; -+ struct mptcp_rem4 rem; -+ -+ loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr; -+ loc.loc4_id = 0; -+ loc.low_prio = 0; -+ if (mpcb->master_sk) -+ loc.if_idx = mpcb->master_sk->sk_bound_dev_if; -+ else -+ loc.if_idx = 0; -+ -+ rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr; -+ rem.port = inet_sk(meta_sk)->inet_dport; -+ rem.rem4_id = 0; /* Default 0 */ -+ -+ mptcp_init4_subsockets(meta_sk, &loc, &rem); -+ } else { -+#if IS_ENABLED(CONFIG_IPV6) -+ struct mptcp_loc6 loc; -+ struct mptcp_rem6 rem; -+ -+ loc.addr = inet6_sk(meta_sk)->saddr; -+ loc.loc6_id = 0; -+ loc.low_prio = 0; -+ if (mpcb->master_sk) -+ loc.if_idx = mpcb->master_sk->sk_bound_dev_if; -+ else -+ loc.if_idx = 0; -+ -+ rem.addr = meta_sk->sk_v6_daddr; -+ rem.port = inet_sk(meta_sk)->inet_dport; -+ rem.rem6_id = 0; /* Default 0 */ -+ -+ mptcp_init6_subsockets(meta_sk, &loc, &rem); -+#endif -+ } -+ goto next_subflow; -+ } -+ -+exit: -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ mptcp_mpcb_put(mpcb); -+ sock_put(meta_sk); -+} -+ -+static void ndiffports_new_session(const struct sock *meta_sk) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct ndiffports_priv *fmp = (struct ndiffports_priv *)&mpcb->mptcp_pm[0]; -+ -+ /* Initialize workqueue-struct */ -+ INIT_WORK(&fmp->subflow_work, create_subflow_worker); -+ fmp->mpcb = mpcb; -+} -+ -+static void ndiffports_create_subflows(struct sock *meta_sk) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct ndiffports_priv *pm_priv = (struct ndiffports_priv *)&mpcb->mptcp_pm[0]; -+ -+ if (mptcp_in_infinite_mapping_weak(mpcb) || -+ mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD)) -+ return; -+ -+ if (!work_pending(&pm_priv->subflow_work)) { -+ sock_hold(meta_sk); -+ refcount_inc(&mpcb->mpcb_refcnt); -+ queue_work(mptcp_wq, &pm_priv->subflow_work); -+ } -+} -+ -+static int ndiffports_get_local_id(const struct sock *meta_sk, -+ sa_family_t family, union inet_addr *addr, -+ bool *low_prio) -+{ -+ return 0; -+} -+ -+static struct mptcp_pm_ops ndiffports __read_mostly = { -+ .new_session = ndiffports_new_session, -+ .fully_established = ndiffports_create_subflows, -+ .get_local_id = ndiffports_get_local_id, -+ .name = "ndiffports", -+ .owner = THIS_MODULE, -+}; -+ -+/* General initialization of MPTCP_PM */ -+static int __init ndiffports_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct ndiffports_priv) > MPTCP_PM_SIZE); -+ -+ if (mptcp_register_path_manager(&ndiffports)) -+ goto exit; -+ -+ return 0; -+ -+exit: -+ return -1; -+} -+ -+static void ndiffports_unregister(void) -+{ -+ mptcp_unregister_path_manager(&ndiffports); -+} -+ -+module_init(ndiffports_register); -+module_exit(ndiffports_unregister); -+ -+MODULE_AUTHOR("Christoph Paasch"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("NDIFF-PORTS MPTCP"); -+MODULE_VERSION("0.88"); -diff --git a/net/mptcp/mptcp_netlink.c b/net/mptcp/mptcp_netlink.c -new file mode 100644 -index 000000000000..dd696841ea85 ---- /dev/null -+++ b/net/mptcp/mptcp_netlink.c -@@ -0,0 +1,1272 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* MPTCP implementation - Netlink Path Manager -+ * -+ * Analysis, Design and Implementation: -+ * - Gregory Detal -+ * - Sébastien Barré -+ * - Matthieu Baerts -+ * - Pau Espin Pedrol -+ * - Detlev Casanova -+ * - David Verbeiren -+ * - Frank Vanbever -+ * - Antoine Maes -+ * - Tim Froidcoeur -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+#include -+#include -+#include -+#include -+#include -+#if IS_ENABLED(CONFIG_IPV6) -+#include -+#endif -+ -+#define MPTCP_MAX_ADDR 8 -+ -+struct mptcp_nl_priv { -+ /* Unfortunately we need to store this to generate MP_JOINs in case -+ * of the peer generating a subflow (see get_local_id). -+ */ -+ u8 loc4_bits; -+ u8 announced4; -+ struct mptcp_loc4 locaddr4[MPTCP_MAX_ADDR]; -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ u8 loc6_bits; -+ u8 announced6; -+ struct mptcp_loc6 locaddr6[MPTCP_MAX_ADDR]; -+#endif -+ -+ u16 remove_addrs; -+ -+ bool is_closed; -+}; -+ -+static struct genl_family mptcp_genl_family; -+ -+#define MPTCP_GENL_EV_GRP_OFFSET 0 -+#define MPTCP_GENL_CMD_GRP_OFFSET 1 -+ -+static const struct genl_multicast_group mptcp_mcgrps[] = { -+ [MPTCP_GENL_EV_GRP_OFFSET] = { .name = MPTCP_GENL_EV_GRP_NAME, }, -+ [MPTCP_GENL_CMD_GRP_OFFSET] = { .name = MPTCP_GENL_CMD_GRP_NAME, }, -+}; -+ -+static const struct nla_policy mptcp_nl_genl_policy[MPTCP_ATTR_MAX + 1] = { -+ [MPTCP_ATTR_TOKEN] = { .type = NLA_U32, }, -+ [MPTCP_ATTR_FAMILY] = { .type = NLA_U16, }, -+ [MPTCP_ATTR_LOC_ID] = { .type = NLA_U8, }, -+ [MPTCP_ATTR_REM_ID] = { .type = NLA_U8, }, -+ [MPTCP_ATTR_SADDR4] = { .type = NLA_U32, }, -+ [MPTCP_ATTR_SADDR6] = { .type = NLA_BINARY, -+ .len = sizeof(struct in6_addr), }, -+ [MPTCP_ATTR_DADDR4] = { .type = NLA_U32, }, -+ [MPTCP_ATTR_DADDR6] = { .type = NLA_BINARY, -+ .len = sizeof(struct in6_addr), }, -+ [MPTCP_ATTR_SPORT] = { .type = NLA_U16, }, -+ [MPTCP_ATTR_DPORT] = { .type = NLA_U16, }, -+ [MPTCP_ATTR_BACKUP] = { .type = NLA_U8, }, -+ [MPTCP_ATTR_FLAGS] = { .type = NLA_U16, }, -+ [MPTCP_ATTR_TIMEOUT] = { .type = NLA_U32, }, -+ [MPTCP_ATTR_IF_IDX] = { .type = NLA_S32, }, -+}; -+ -+/* Defines the userspace PM filter on events. Set events are ignored. */ -+static u16 mptcp_nl_event_filter; -+ -+static inline struct mptcp_nl_priv * -+mptcp_nl_priv(const struct sock *meta_sk) -+{ -+ return (struct mptcp_nl_priv *)&tcp_sk(meta_sk)->mpcb->mptcp_pm[0]; -+} -+ -+static inline bool -+mptcp_nl_must_notify(u16 event, const struct sock *meta_sk) -+{ -+ struct mptcp_nl_priv *priv = mptcp_nl_priv(meta_sk); -+ -+ /* close_session() can be called before other events because it is -+ * also called when doing a fallback to TCP. We don't want to send -+ * events to the user-space after having sent the CLOSED event. -+ */ -+ if (priv->is_closed) -+ return false; -+ -+ if (event == MPTCPF_EVENT_CLOSED) -+ priv->is_closed = true; -+ -+ if (mptcp_nl_event_filter & event) -+ return false; -+ -+ if (!genl_has_listeners(&mptcp_genl_family, sock_net(meta_sk), 0)) -+ return false; -+ -+ return true; -+} -+ -+/* Find the first free index in the bitfield starting from 0 */ -+static int -+mptcp_nl_find_free_index(u8 bitfield) -+{ -+ int i; -+ -+ /* There are anyways no free bits... */ -+ if (bitfield == 0xff) -+ return -1; -+ -+ i = ffs(~bitfield) - 1; -+ if (i < 0) -+ return -1; -+ -+ return i; -+} -+ -+static inline int -+mptcp_nl_put_subsk(struct sk_buff *msg, struct sock *sk) -+{ -+ struct inet_sock *isk = inet_sk(sk); -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ u8 backup; -+ u8 sk_err; -+ -+ if (nla_put_u16(msg, MPTCP_ATTR_FAMILY, sk->sk_family)) -+ goto nla_put_failure; -+ -+ if (nla_put_u8(msg, MPTCP_ATTR_LOC_ID, tcp_sk(sk)->mptcp->loc_id)) -+ goto nla_put_failure; -+ -+ if (nla_put_u8(msg, MPTCP_ATTR_REM_ID, tcp_sk(sk)->mptcp->rem_id)) -+ goto nla_put_failure; -+ -+ switch (sk->sk_family) { -+ case AF_INET: -+ if (nla_put_u32(msg, MPTCP_ATTR_SADDR4, isk->inet_saddr)) -+ goto nla_put_failure; -+ -+ if (nla_put_u32(msg, MPTCP_ATTR_DADDR4, isk->inet_daddr)) -+ goto nla_put_failure; -+ break; -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: { -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ -+ if (nla_put(msg, MPTCP_ATTR_SADDR6, sizeof(np->saddr), -+ &np->saddr)) -+ goto nla_put_failure; -+ -+ if (nla_put(msg, MPTCP_ATTR_DADDR6, sizeof(sk->sk_v6_daddr), -+ &sk->sk_v6_daddr)) -+ goto nla_put_failure; -+ break; -+ } -+#endif -+ default: -+ goto nla_put_failure; -+ } -+ -+ if (nla_put_u16(msg, MPTCP_ATTR_SPORT, ntohs(isk->inet_sport))) -+ goto nla_put_failure; -+ -+ if (nla_put_u16(msg, MPTCP_ATTR_DPORT, ntohs(isk->inet_dport))) -+ goto nla_put_failure; -+ -+ backup = !!(tcp_sk(sk)->mptcp->rcv_low_prio || -+ tcp_sk(sk)->mptcp->low_prio); -+ -+ if (nla_put_u8(msg, MPTCP_ATTR_BACKUP, backup)) -+ goto nla_put_failure; -+ -+ if (nla_put_s32(msg, MPTCP_ATTR_IF_IDX, sk->sk_bound_dev_if)) -+ goto nla_put_failure; -+ -+ sk_err = sk->sk_err ? : tcp_sk(sk)->mptcp->sk_err; -+ if (unlikely(sk_err != 0) && meta_sk->sk_state == TCP_ESTABLISHED && -+ nla_put_u8(msg, MPTCP_ATTR_ERROR, sk_err)) -+ goto nla_put_failure; -+ -+ return 0; -+ -+nla_put_failure: -+ return -1; -+} -+ -+static inline struct sk_buff * -+mptcp_nl_mcast_prepare(struct mptcp_cb *mpcb, struct sock *sk, int cmd, -+ void **hdr) -+{ -+ struct sk_buff *msg; -+ -+ /* possible optimisation: use the needed size */ -+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); -+ if (!msg) -+ return NULL; -+ -+ *hdr = genlmsg_put(msg, 0, 0, &mptcp_genl_family, 0, cmd); -+ if (!*hdr) -+ goto free_msg; -+ -+ if (nla_put_u32(msg, MPTCP_ATTR_TOKEN, mpcb->mptcp_loc_token)) -+ goto nla_put_failure; -+ -+ if (sk && mptcp_nl_put_subsk(msg, sk)) -+ goto nla_put_failure; -+ -+ return msg; -+ -+nla_put_failure: -+ genlmsg_cancel(msg, *hdr); -+free_msg: -+ nlmsg_free(msg); -+ return NULL; -+} -+ -+static inline int -+mptcp_nl_mcast_send(struct mptcp_cb *mpcb, struct sk_buff *msg, void *hdr) -+{ -+ int ret; -+ struct sock *meta_sk = mpcb->meta_sk; -+ -+ genlmsg_end(msg, hdr); -+ -+ ret = genlmsg_multicast_netns(&mptcp_genl_family, sock_net(meta_sk), -+ msg, 0, MPTCP_GENL_EV_GRP_OFFSET, -+ GFP_ATOMIC); -+ if (ret && ret != -ESRCH) -+ pr_err("%s: genlmsg_multicast failed with %d\n", __func__, ret); -+ return ret; -+} -+ -+static inline void -+mptcp_nl_mcast(struct mptcp_cb *mpcb, struct sock *sk, int cmd) -+{ -+ void *hdr; -+ struct sk_buff *msg; -+ -+ msg = mptcp_nl_mcast_prepare(mpcb, sk, cmd, &hdr); -+ if (msg) -+ mptcp_nl_mcast_send(mpcb, msg, hdr); -+ else -+ pr_warn("%s: unable to prepare multicast message\n", __func__); -+} -+ -+static inline void -+mptcp_nl_mcast_fail(struct sk_buff *msg, void *hdr) -+{ -+ genlmsg_cancel(msg, hdr); -+ nlmsg_free(msg); -+} -+ -+static void -+mptcp_nl_new(const struct sock *meta_sk, bool established) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ -+ mptcp_nl_mcast(mpcb, mpcb->master_sk, -+ established ? MPTCP_EVENT_ESTABLISHED -+ : MPTCP_EVENT_CREATED); -+} -+ -+static void -+mptcp_nl_pm_new_session(const struct sock *meta_sk) -+{ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_CREATED, meta_sk)) -+ return; -+ -+ mptcp_nl_new(meta_sk, false); -+} -+ -+static inline int -+mptcp_nl_loc_id_to_index_lookup(struct sock *meta_sk, sa_family_t family, -+ u8 addr_id) -+{ -+ struct mptcp_nl_priv *priv = mptcp_nl_priv(meta_sk); -+ int i; -+ -+ switch (family) { -+ case AF_INET: -+ mptcp_for_each_bit_set(priv->loc4_bits, i) { -+ if (priv->locaddr4[i].loc4_id == addr_id) -+ return i; -+ } -+ break; -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: -+ mptcp_for_each_bit_set(priv->loc6_bits, i) { -+ if (priv->locaddr6[i].loc6_id == addr_id) -+ return i; -+ } -+ break; -+#endif -+ } -+ return -1; -+} -+ -+static inline void -+mptcp_nl_sk_setup_locaddr(struct sock *meta_sk, struct sock *sk) -+{ -+ struct mptcp_nl_priv *priv = mptcp_nl_priv(meta_sk); -+ bool backup = !!(tcp_sk(sk)->mptcp->rcv_low_prio || -+ tcp_sk(sk)->mptcp->low_prio); -+ sa_family_t family = mptcp_v6_is_v4_mapped(sk) ? AF_INET -+ : sk->sk_family; -+ u8 addr_id = tcp_sk(sk)->mptcp->loc_id; -+ int idx = mptcp_nl_loc_id_to_index_lookup(meta_sk, family, -+ addr_id); -+ -+ /* Same as in mptcp_fullmesh.c: exception for transparent sockets */ -+ int if_idx = inet_sk(sk)->transparent ? inet_sk(sk)->rx_dst_ifindex : -+ sk->sk_bound_dev_if; -+ -+ switch (family) { -+ case AF_INET: { -+ struct inet_sock *isk = inet_sk(sk); -+ -+ if (idx == -1) -+ idx = mptcp_nl_find_free_index(priv->loc4_bits); -+ if (idx == -1) { -+ pr_warn("No free index for sk loc_id v4\n"); -+ return; -+ } -+ priv->locaddr4[idx].addr.s_addr = isk->inet_saddr; -+ priv->locaddr4[idx].loc4_id = addr_id; -+ priv->locaddr4[idx].low_prio = backup; -+ priv->locaddr4[idx].if_idx = if_idx; -+ priv->loc4_bits |= 1 << idx; -+ priv->announced4 |= 1 << idx; -+ break; -+ } -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: { -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ -+ if (idx == -1) -+ idx = mptcp_nl_find_free_index(priv->loc6_bits); -+ if (idx == -1) { -+ pr_warn("No free index for sk loc_id v6\n"); -+ return; -+ } -+ priv->locaddr6[idx].addr = np->saddr; -+ priv->locaddr6[idx].loc6_id = addr_id; -+ priv->locaddr6[idx].low_prio = backup; -+ priv->locaddr6[idx].if_idx = if_idx; -+ priv->loc6_bits |= 1 << idx; -+ priv->announced6 |= 1 << idx; -+ break; -+ } -+#endif -+ } -+} -+ -+static void -+mptcp_nl_pm_fully_established(struct sock *meta_sk) -+{ -+ mptcp_nl_sk_setup_locaddr(meta_sk, tcp_sk(meta_sk)->mpcb->master_sk); -+ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_ESTABLISHED, meta_sk)) -+ return; -+ -+ mptcp_nl_new(meta_sk, true); -+} -+ -+static void -+mptcp_nl_pm_close_session(struct sock *meta_sk) -+{ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_CLOSED, meta_sk)) -+ return; -+ -+ mptcp_nl_mcast(tcp_sk(meta_sk)->mpcb, NULL, MPTCP_EVENT_CLOSED); -+} -+ -+static void -+mptcp_nl_pm_established_subflow(struct sock *sk) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ mptcp_nl_sk_setup_locaddr(meta_sk, sk); -+ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_SUB_ESTABLISHED, meta_sk)) -+ return; -+ -+ mptcp_nl_mcast(tcp_sk(meta_sk)->mpcb, sk, MPTCP_EVENT_SUB_ESTABLISHED); -+} -+ -+static void -+mptcp_nl_pm_delete_subflow(struct sock *sk) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_SUB_CLOSED, meta_sk)) -+ return; -+ -+ mptcp_nl_mcast(tcp_sk(meta_sk)->mpcb, sk, MPTCP_EVENT_SUB_CLOSED); -+} -+ -+static void -+mptcp_nl_pm_add_raddr(struct mptcp_cb *mpcb, const union inet_addr *addr, -+ sa_family_t family, __be16 port, u8 id) -+{ -+ struct sk_buff *msg; -+ void *hdr; -+ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_ANNOUNCED, mpcb->meta_sk)) -+ return; -+ -+ msg = mptcp_nl_mcast_prepare(mpcb, NULL, MPTCP_EVENT_ANNOUNCED, &hdr); -+ if (!msg) -+ return; -+ -+ if (nla_put_u8(msg, MPTCP_ATTR_REM_ID, id)) -+ goto nla_put_failure; -+ -+ if (nla_put_u16(msg, MPTCP_ATTR_FAMILY, family)) -+ goto nla_put_failure; -+ -+ switch (family) { -+ case AF_INET: -+ if (nla_put_u32(msg, MPTCP_ATTR_DADDR4, addr->ip)) -+ goto nla_put_failure; -+ break; -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: -+ if (nla_put(msg, MPTCP_ATTR_DADDR6, sizeof(addr->ip6), -+ &addr->ip6)) -+ goto nla_put_failure; -+ break; -+#endif -+ default: -+ goto nla_put_failure; -+ } -+ -+ if (nla_put_u16(msg, MPTCP_ATTR_DPORT, ntohs(port))) -+ goto nla_put_failure; -+ -+ mptcp_nl_mcast_send(mpcb, msg, hdr); -+ -+ return; -+ -+nla_put_failure: -+ mptcp_nl_mcast_fail(msg, hdr); -+} -+ -+static void -+mptcp_nl_pm_rem_raddr(struct mptcp_cb *mpcb, u8 id) -+{ -+ struct sk_buff *msg; -+ void *hdr; -+ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_REMOVED, mpcb->meta_sk)) -+ return; -+ -+ msg = mptcp_nl_mcast_prepare(mpcb, NULL, MPTCP_EVENT_REMOVED, &hdr); -+ -+ if (!msg) -+ return; -+ -+ if (nla_put_u8(msg, MPTCP_ATTR_REM_ID, id)) -+ goto nla_put_failure; -+ -+ mptcp_nl_mcast_send(mpcb, msg, hdr); -+ -+ return; -+ -+nla_put_failure: -+ mptcp_nl_mcast_fail(msg, hdr); -+} -+ -+static int -+mptcp_nl_pm_get_local_id(const struct sock *meta_sk, sa_family_t family, -+ union inet_addr *addr, bool *low_prio) -+{ -+ struct mptcp_nl_priv *priv = mptcp_nl_priv(meta_sk); -+ int i, id = 0; -+ -+ switch (family) { -+ case AF_INET: -+ mptcp_for_each_bit_set(priv->loc4_bits, i) { -+ if (addr->in.s_addr == priv->locaddr4[i].addr.s_addr) { -+ id = priv->locaddr4[i].loc4_id; -+ *low_prio = priv->locaddr4[i].low_prio; -+ goto out; -+ } -+ } -+ break; -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: -+ mptcp_for_each_bit_set(priv->loc6_bits, i) { -+ if (ipv6_addr_equal(&addr->in6, -+ &priv->locaddr6[i].addr)) { -+ id = priv->locaddr6[i].loc6_id; -+ *low_prio = priv->locaddr6[i].low_prio; -+ goto out; -+ } -+ } -+ break; -+#endif -+ } -+ return -1; -+ -+out: -+ return id; -+} -+ -+static void -+mptcp_nl_pm_addr_signal(struct sock *sk, unsigned *size, -+ struct tcp_out_options *opts, struct sk_buff *skb) -+{ -+ struct mptcp_nl_priv *priv = mptcp_nl_priv(sk); -+ struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ u8 unannounced; -+ int remove_addr_len; -+ -+ unannounced = (~priv->announced4) & priv->loc4_bits; -+ if (unannounced && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN) { -+ int i = mptcp_nl_find_free_index(~unannounced); -+ -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_ADD_ADDR; -+ opts->add_addr4.addr_id = priv->locaddr4[i].loc4_id; -+ opts->add_addr4.addr = priv->locaddr4[i].addr; -+ opts->add_addr_v4 = 1; -+ -+ if (skb) -+ priv->announced4 |= (1 << i); -+ *size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN; -+ } -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ unannounced = (~priv->announced6) & priv->loc6_bits; -+ if (unannounced && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN) { -+ int i = mptcp_nl_find_free_index(~unannounced); -+ -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_ADD_ADDR; -+ opts->add_addr6.addr_id = priv->locaddr6[i].loc6_id; -+ opts->add_addr6.addr = priv->locaddr6[i].addr; -+ opts->add_addr_v6 = 1; -+ -+ if (skb) -+ priv->announced6 |= (1 << i); -+ *size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN; -+ } -+#endif -+ -+ if (likely(!priv->remove_addrs)) -+ goto exit; -+ -+ remove_addr_len = mptcp_sub_len_remove_addr_align(priv->remove_addrs); -+ if (MAX_TCP_OPTION_SPACE - *size < remove_addr_len) -+ goto exit; -+ -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_REMOVE_ADDR; -+ opts->remove_addrs = priv->remove_addrs; -+ -+ if (skb) -+ priv->remove_addrs = 0; -+ *size += remove_addr_len; -+ -+exit: -+ mpcb->addr_signal = !!((~priv->announced4) & priv->loc4_bits || -+#if IS_ENABLED(CONFIG_IPV6) -+ (~priv->announced6) & priv->loc6_bits || -+#endif -+ priv->remove_addrs); -+} -+ -+static void -+mptcp_nl_pm_prio_changed(struct sock *sk, int low_prio) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ if (!mptcp_nl_must_notify(MPTCPF_EVENT_SUB_PRIORITY, meta_sk)) -+ return; -+ -+ mptcp_nl_mcast(tcp_sk(meta_sk)->mpcb, sk, MPTCP_EVENT_SUB_PRIORITY); -+} -+ -+static int -+mptcp_nl_genl_announce(struct sk_buff *skb, struct genl_info *info) -+{ -+ struct sock *meta_sk, *subsk; -+ struct mptcp_cb *mpcb; -+ struct mptcp_nl_priv *priv; -+ u32 token; -+ u8 addr_id, backup = 0; -+ u16 family; -+ int i, ret = 0; -+ union inet_addr saddr; -+ int if_idx = 0; -+ bool useless; /* unused out parameter "low_prio" */ -+ -+ if (!info->attrs[MPTCP_ATTR_TOKEN] || !info->attrs[MPTCP_ATTR_FAMILY] || -+ !info->attrs[MPTCP_ATTR_LOC_ID]) -+ return -EINVAL; -+ -+ token = nla_get_u32(info->attrs[MPTCP_ATTR_TOKEN]); -+ meta_sk = mptcp_hash_find(genl_info_net(info), token); -+ if (!meta_sk) -+ return -EINVAL; -+ -+ mpcb = tcp_sk(meta_sk)->mpcb; -+ priv = mptcp_nl_priv(meta_sk); -+ family = nla_get_u16(info->attrs[MPTCP_ATTR_FAMILY]); -+ addr_id = nla_get_u8(info->attrs[MPTCP_ATTR_LOC_ID]); -+ -+ if (info->attrs[MPTCP_ATTR_BACKUP]) -+ backup = nla_get_u8(info->attrs[MPTCP_ATTR_BACKUP]); -+ -+ if (info->attrs[MPTCP_ATTR_IF_IDX]) -+ if_idx = nla_get_s32(info->attrs[MPTCP_ATTR_IF_IDX]); -+ -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ switch (family) { -+ case AF_INET: -+ if (!info->attrs[MPTCP_ATTR_SADDR4]) { -+ ret = -EINVAL; -+ goto exit; -+ } -+ -+ saddr.in.s_addr = nla_get_u32(info->attrs[MPTCP_ATTR_SADDR4]); -+ i = mptcp_nl_pm_get_local_id(meta_sk, family, -+ &saddr, &useless); -+ if (i < 0) { -+ i = mptcp_nl_find_free_index(priv->loc4_bits); -+ if (i < 0) { -+ ret = -ENOBUFS; -+ goto exit; -+ } -+ } else if (i != addr_id) { -+ ret = -EINVAL; -+ goto exit; -+ } -+ -+ priv->locaddr4[i].addr.s_addr = saddr.in.s_addr; -+ priv->locaddr4[i].loc4_id = addr_id; -+ priv->locaddr4[i].low_prio = !!backup; -+ priv->locaddr4[i].if_idx = if_idx; -+ priv->loc4_bits |= 1 << i; -+ priv->announced4 &= ~(1 << i); -+ break; -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: -+ if (!info->attrs[MPTCP_ATTR_SADDR6]) { -+ ret = -EINVAL; -+ goto exit; -+ } -+ -+ saddr.in6 = *(struct in6_addr *) -+ nla_data(info->attrs[MPTCP_ATTR_SADDR6]); -+ i = mptcp_nl_pm_get_local_id(meta_sk, family, &saddr, &useless); -+ if (i < 0) { -+ i = mptcp_nl_find_free_index(priv->loc6_bits); -+ if (i < 0) { -+ ret = -ENOBUFS; -+ goto exit; -+ } -+ } else if (i != addr_id) { -+ ret = -EINVAL; -+ goto exit; -+ } -+ -+ priv->locaddr6[i].addr = saddr.in6; -+ priv->locaddr6[i].loc6_id = addr_id; -+ priv->locaddr6[i].low_prio = !!backup; -+ priv->locaddr6[i].if_idx = if_idx; -+ priv->loc6_bits |= 1 << i; -+ priv->announced6 &= ~(1 << i); -+ break; -+#endif -+ default: -+ ret = -EINVAL; -+ goto exit; -+ } -+ -+ mpcb->addr_signal = 1; -+ -+ rcu_read_lock_bh(); -+ subsk = mptcp_select_ack_sock(meta_sk); -+ if (subsk) -+ tcp_send_ack(subsk); -+ rcu_read_unlock_bh(); -+ -+exit: -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ sock_put(meta_sk); -+ return ret; -+} -+ -+static int -+mptcp_nl_genl_remove(struct sk_buff *skb, struct genl_info *info) -+{ -+ struct sock *meta_sk, *subsk; -+ struct mptcp_cb *mpcb; -+ struct mptcp_nl_priv *priv; -+ u32 token; -+ u8 addr_id; -+ int i; -+ int retcode; -+ bool found = false; -+ -+ if (!info->attrs[MPTCP_ATTR_TOKEN] || !info->attrs[MPTCP_ATTR_LOC_ID]) -+ return -EINVAL; -+ -+ token = nla_get_u32(info->attrs[MPTCP_ATTR_TOKEN]); -+ meta_sk = mptcp_hash_find(genl_info_net(info), token); -+ if (!meta_sk) -+ return -EINVAL; -+ -+ mpcb = tcp_sk(meta_sk)->mpcb; -+ priv = mptcp_nl_priv(meta_sk); -+ addr_id = nla_get_u8(info->attrs[MPTCP_ATTR_LOC_ID]); -+ -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ mptcp_for_each_bit_set(priv->loc4_bits, i) { -+ if (priv->locaddr4[i].loc4_id == addr_id) { -+ priv->loc4_bits &= ~(1 << i); -+ found = true; -+ break; -+ } -+ } -+ -+#if IS_ENABLED(CONFIG_IPV6) -+ if (!found) { -+ mptcp_for_each_bit_set(priv->loc6_bits, i) { -+ if (priv->locaddr6[i].loc6_id == addr_id) { -+ priv->loc6_bits &= ~(1 << i); -+ found = true; -+ break; -+ } -+ } -+ } -+#endif -+ -+ if (found) { -+ priv->remove_addrs |= 1 << addr_id; -+ mpcb->addr_signal = 1; -+ -+ rcu_read_lock_bh(); -+ subsk = mptcp_select_ack_sock(meta_sk); -+ if (subsk) -+ tcp_send_ack(subsk); -+ rcu_read_unlock_bh(); -+ retcode = 0; -+ } else { -+ retcode = -EINVAL; -+ } -+ -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ sock_put(meta_sk); -+ return retcode; -+} -+ -+static int -+mptcp_nl_genl_create(struct sk_buff *skb, struct genl_info *info) -+{ -+ struct sock *meta_sk, *subsk = NULL; -+ struct mptcp_cb *mpcb; -+ struct mptcp_nl_priv *priv; -+ u32 token; -+ u16 family, sport; -+ u8 loc_id, rem_id, backup = 0; -+ int i, ret = 0; -+ int if_idx; -+ -+ if (!info->attrs[MPTCP_ATTR_TOKEN] || !info->attrs[MPTCP_ATTR_FAMILY] || -+ !info->attrs[MPTCP_ATTR_LOC_ID] || !info->attrs[MPTCP_ATTR_REM_ID]) -+ return -EINVAL; -+ -+ token = nla_get_u32(info->attrs[MPTCP_ATTR_TOKEN]); -+ meta_sk = mptcp_hash_find(genl_info_net(info), token); -+ if (!meta_sk) -+ /* We use a more specific value than EINVAL here so that -+ * userspace can handle this specific case easily. This is -+ * useful to check the case in which userspace tries to create a -+ * subflow for a connection which was already destroyed recently -+ * in kernelspace, but userspace didn't have time to realize -+ * about it because there is a gap of time between kernel -+ * destroying the connection and userspace receiving the event -+ * through Netlink. It can easily happen for short life-time -+ * conns. -+ */ -+ return -EBADR; -+ -+ mpcb = tcp_sk(meta_sk)->mpcb; -+ -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ if (sock_flag(meta_sk, SOCK_DEAD)) { -+ /* Same as for the EBADR case. In this case, though, we know for -+ * sure the conn owner of the subflow existed at some point (no -+ * invalid token possibility) -+ */ -+ ret = -EOWNERDEAD; -+ goto unlock; -+ } -+ -+ if (!mptcp_can_new_subflow(meta_sk)) { -+ /* Same as for the EBADR and EOWNERDEAD case but here, the MPTCP -+ * session has just been stopped, it is no longer possible to -+ * create new subflows. -+ */ -+ ret = -ENOTCONN; -+ goto unlock; -+ } -+ -+ if (mpcb->master_sk && -+ !tcp_sk(mpcb->master_sk)->mptcp->fully_established) { -+ /* First condition is not only in there for safely purposes, it -+ * can also be triggered in the same scenario as in EBADR and -+ * EOWNERDEAD -+ */ -+ ret = -EAGAIN; -+ goto unlock; -+ } -+ -+ priv = mptcp_nl_priv(meta_sk); -+ -+ family = nla_get_u16(info->attrs[MPTCP_ATTR_FAMILY]); -+ loc_id = nla_get_u8(info->attrs[MPTCP_ATTR_LOC_ID]); -+ rem_id = nla_get_u8(info->attrs[MPTCP_ATTR_REM_ID]); -+ -+ sport = info->attrs[MPTCP_ATTR_SPORT] -+ ? htons(nla_get_u16(info->attrs[MPTCP_ATTR_SPORT])) : 0; -+ backup = info->attrs[MPTCP_ATTR_BACKUP] -+ ? nla_get_u8(info->attrs[MPTCP_ATTR_BACKUP]) : 0; -+ if_idx = info->attrs[MPTCP_ATTR_IF_IDX] -+ ? nla_get_s32(info->attrs[MPTCP_ATTR_IF_IDX]) : 0; -+ -+ switch (family) { -+ case AF_INET: { -+ struct mptcp_rem4 rem = { -+ .rem4_id = rem_id, -+ }; -+ struct mptcp_loc4 loc = { -+ .loc4_id = loc_id, -+ }; -+ -+ if (!info->attrs[MPTCP_ATTR_DADDR4] || -+ !info->attrs[MPTCP_ATTR_DPORT]) { -+ goto create_failed; -+ } else { -+ rem.addr.s_addr = -+ nla_get_u32(info->attrs[MPTCP_ATTR_DADDR4]); -+ rem.port = -+ ntohs(nla_get_u16(info->attrs[MPTCP_ATTR_DPORT])); -+ } -+ -+ if (!info->attrs[MPTCP_ATTR_SADDR4]) { -+ bool found = false; -+ -+ mptcp_for_each_bit_set(priv->loc4_bits, i) { -+ if (priv->locaddr4[i].loc4_id == loc_id) { -+ loc.addr = priv->locaddr4[i].addr; -+ loc.low_prio = -+ priv->locaddr4[i].low_prio; -+ loc.if_idx = -+ priv->locaddr4[i].if_idx; -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) -+ goto create_failed; -+ } else { -+ loc.addr.s_addr = -+ nla_get_u32(info->attrs[MPTCP_ATTR_SADDR4]); -+ loc.low_prio = backup; -+ loc.if_idx = if_idx; -+ } -+ -+ ret = __mptcp_init4_subsockets(meta_sk, &loc, sport, &rem, -+ &subsk); -+ if (ret < 0) -+ goto unlock; -+ break; -+ } -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: { -+ struct mptcp_rem6 rem = { -+ .rem6_id = rem_id, -+ }; -+ struct mptcp_loc6 loc = { -+ .loc6_id = loc_id, -+ }; -+ -+ if (!info->attrs[MPTCP_ATTR_DADDR6] || -+ !info->attrs[MPTCP_ATTR_DPORT]) { -+ goto create_failed; -+ } else { -+ rem.addr = *(struct in6_addr *) -+ nla_data(info->attrs[MPTCP_ATTR_DADDR6]); -+ rem.port = -+ ntohs(nla_get_u16(info->attrs[MPTCP_ATTR_DPORT])); -+ } -+ -+ if (!info->attrs[MPTCP_ATTR_SADDR6]) { -+ bool found = false; -+ -+ mptcp_for_each_bit_set(priv->loc6_bits, i) { -+ if (priv->locaddr6[i].loc6_id == loc_id) { -+ loc.addr = priv->locaddr6[i].addr; -+ loc.low_prio = -+ priv->locaddr6[i].low_prio; -+ loc.if_idx = -+ priv->locaddr6[i].if_idx; -+ -+ found = true; -+ break; -+ } -+ } -+ -+ if (!found) -+ goto create_failed; -+ } else { -+ loc.addr = *(struct in6_addr *) -+ nla_data(info->attrs[MPTCP_ATTR_SADDR6]); -+ loc.low_prio = backup; -+ loc.if_idx = if_idx; -+ } -+ -+ ret = __mptcp_init6_subsockets(meta_sk, &loc, sport, &rem, -+ &subsk); -+ if (ret < 0) -+ goto unlock; -+ break; -+ } -+#endif -+ default: -+ goto create_failed; -+ } -+ -+unlock: -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ sock_put(meta_sk); -+ return ret; -+ -+create_failed: -+ ret = -EINVAL; -+ goto unlock; -+} -+ -+static struct sock * -+mptcp_nl_subsk_lookup(struct mptcp_cb *mpcb, struct nlattr **attrs) -+{ -+ struct sock *sk; -+ struct mptcp_tcp_sock *mptcp; -+ struct hlist_node *tmp; -+ u16 family; -+ __be16 sport, dport; -+ -+ if (!attrs[MPTCP_ATTR_FAMILY] || !attrs[MPTCP_ATTR_SPORT] || -+ !attrs[MPTCP_ATTR_DPORT]) -+ goto exit; -+ -+ family = nla_get_u16(attrs[MPTCP_ATTR_FAMILY]); -+ sport = htons(nla_get_u16(attrs[MPTCP_ATTR_SPORT])); -+ dport = htons(nla_get_u16(attrs[MPTCP_ATTR_DPORT])); -+ -+ switch (family) { -+ case AF_INET: { -+ __be32 saddr, daddr; -+ -+ if (!attrs[MPTCP_ATTR_SADDR4] || !attrs[MPTCP_ATTR_DADDR4]) -+ break; -+ -+ saddr = nla_get_u32(attrs[MPTCP_ATTR_SADDR4]); -+ daddr = nla_get_u32(attrs[MPTCP_ATTR_DADDR4]); -+ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *subsk = mptcp_to_sock(mptcp); -+ struct inet_sock *isk = inet_sk(subsk); -+ -+ if (subsk->sk_family != AF_INET) -+ continue; -+ -+ if (isk->inet_saddr == saddr && -+ isk->inet_daddr == daddr && -+ isk->inet_sport == sport && -+ isk->inet_dport == dport) { -+ sk = subsk; -+ goto found; -+ } -+ } -+ break; -+ } -+#if IS_ENABLED(CONFIG_IPV6) -+ case AF_INET6: { -+ struct in6_addr saddr, daddr; -+ -+ if (!attrs[MPTCP_ATTR_SADDR6] || !attrs[MPTCP_ATTR_DADDR6]) -+ break; -+ -+ saddr = *(struct in6_addr *)nla_data(attrs[MPTCP_ATTR_SADDR6]); -+ daddr = *(struct in6_addr *)nla_data(attrs[MPTCP_ATTR_DADDR6]); -+ -+ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { -+ struct sock *subsk = mptcp_to_sock(mptcp); -+ struct inet_sock *isk = inet_sk(subsk); -+ struct ipv6_pinfo *np; -+ -+ if (subsk->sk_family != AF_INET6) -+ continue; -+ -+ np = inet6_sk(subsk); -+ if (ipv6_addr_equal(&saddr, &np->saddr) && -+ ipv6_addr_equal(&daddr, &subsk->sk_v6_daddr) && -+ isk->inet_sport == sport && -+ isk->inet_dport == dport) { -+ sk = subsk; -+ goto found; -+ } -+ } -+ break; -+ } -+#endif -+ } -+ -+exit: -+ sk = NULL; -+found: -+ return sk; -+} -+ -+static int -+mptcp_nl_genl_destroy(struct sk_buff *skb, struct genl_info *info) -+{ -+ struct sock *meta_sk, *subsk; -+ struct mptcp_cb *mpcb; -+ int ret = 0; -+ u32 token; -+ -+ if (!info->attrs[MPTCP_ATTR_TOKEN]) -+ return -EINVAL; -+ -+ token = nla_get_u32(info->attrs[MPTCP_ATTR_TOKEN]); -+ -+ meta_sk = mptcp_hash_find(genl_info_net(info), token); -+ if (!meta_sk) -+ return -EINVAL; -+ -+ mpcb = tcp_sk(meta_sk)->mpcb; -+ -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ subsk = mptcp_nl_subsk_lookup(mpcb, info->attrs); -+ if (subsk) { -+ local_bh_disable(); -+ mptcp_reinject_data(subsk, 0); -+ mptcp_send_reset(subsk); -+ local_bh_enable(); -+ } else { -+ ret = -EINVAL; -+ } -+ -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ sock_put(meta_sk); -+ return ret; -+} -+ -+static int -+mptcp_nl_genl_conn_exists(struct sk_buff *skb, struct genl_info *info) -+{ -+ struct sock *meta_sk; -+ u32 token; -+ -+ if (!info->attrs[MPTCP_ATTR_TOKEN]) -+ return -EINVAL; -+ -+ token = nla_get_u32(info->attrs[MPTCP_ATTR_TOKEN]); -+ -+ meta_sk = mptcp_hash_find(genl_info_net(info), token); -+ if (!meta_sk) -+ return -ENOTCONN; -+ -+ sock_put(meta_sk); -+ return 0; -+} -+ -+static int -+mptcp_nl_genl_priority(struct sk_buff *skb, struct genl_info *info) -+{ -+ struct sock *meta_sk, *subsk; -+ struct mptcp_cb *mpcb; -+ int ret = 0; -+ u32 token; -+ u8 backup = 0; -+ -+ if (!info->attrs[MPTCP_ATTR_TOKEN]) -+ return -EINVAL; -+ -+ token = nla_get_u32(info->attrs[MPTCP_ATTR_TOKEN]); -+ if (info->attrs[MPTCP_ATTR_BACKUP]) -+ backup = nla_get_u8(info->attrs[MPTCP_ATTR_BACKUP]); -+ -+ meta_sk = mptcp_hash_find(genl_info_net(info), token); -+ if (!meta_sk) -+ return -EINVAL; -+ -+ mpcb = tcp_sk(meta_sk)->mpcb; -+ -+ mutex_lock(&mpcb->mpcb_mutex); -+ lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); -+ -+ subsk = mptcp_nl_subsk_lookup(mpcb, info->attrs); -+ if (subsk) { -+ tcp_sk(subsk)->mptcp->send_mp_prio = 1; -+ tcp_sk(subsk)->mptcp->low_prio = !!backup; -+ -+ local_bh_disable(); -+ if (mptcp_sk_can_send_ack(subsk)) -+ tcp_send_ack(subsk); -+ else -+ ret = -ENOTCONN; -+ local_bh_enable(); -+ } else { -+ ret = -EINVAL; -+ } -+ -+ release_sock(meta_sk); -+ mutex_unlock(&mpcb->mpcb_mutex); -+ sock_put(meta_sk); -+ return ret; -+} -+ -+static int -+mptcp_nl_genl_set_filter(struct sk_buff *skb, struct genl_info *info) -+{ -+ u16 flags; -+ -+ if (!info->attrs[MPTCP_ATTR_FLAGS]) -+ return -EINVAL; -+ -+ flags = nla_get_u16(info->attrs[MPTCP_ATTR_FLAGS]); -+ -+ /* Only want to receive events that correspond to these flags */ -+ mptcp_nl_event_filter = ~flags; -+ -+ return 0; -+} -+ -+static struct genl_ops mptcp_genl_ops[] = { -+ { -+ .cmd = MPTCP_CMD_ANNOUNCE, -+ .doit = mptcp_nl_genl_announce, -+ .flags = GENL_ADMIN_PERM, -+ }, -+ { -+ .cmd = MPTCP_CMD_REMOVE, -+ .doit = mptcp_nl_genl_remove, -+ .flags = GENL_ADMIN_PERM, -+ }, -+ { -+ .cmd = MPTCP_CMD_SUB_CREATE, -+ .doit = mptcp_nl_genl_create, -+ .flags = GENL_ADMIN_PERM, -+ }, -+ { -+ .cmd = MPTCP_CMD_SUB_DESTROY, -+ .doit = mptcp_nl_genl_destroy, -+ .flags = GENL_ADMIN_PERM, -+ }, -+ { -+ .cmd = MPTCP_CMD_SUB_PRIORITY, -+ .doit = mptcp_nl_genl_priority, -+ .flags = GENL_ADMIN_PERM, -+ }, -+ { -+ .cmd = MPTCP_CMD_SET_FILTER, -+ .doit = mptcp_nl_genl_set_filter, -+ .flags = GENL_ADMIN_PERM, -+ }, -+ { -+ .cmd = MPTCP_CMD_EXIST, -+ .doit = mptcp_nl_genl_conn_exists, -+ .flags = GENL_ADMIN_PERM, -+ }, -+}; -+ -+static struct mptcp_pm_ops mptcp_nl_pm_ops = { -+ .new_session = mptcp_nl_pm_new_session, -+ .close_session = mptcp_nl_pm_close_session, -+ .fully_established = mptcp_nl_pm_fully_established, -+ .established_subflow = mptcp_nl_pm_established_subflow, -+ .delete_subflow = mptcp_nl_pm_delete_subflow, -+ .add_raddr = mptcp_nl_pm_add_raddr, -+ .rem_raddr = mptcp_nl_pm_rem_raddr, -+ .get_local_id = mptcp_nl_pm_get_local_id, -+ .addr_signal = mptcp_nl_pm_addr_signal, -+ .prio_changed = mptcp_nl_pm_prio_changed, -+ .name = "netlink", -+ .owner = THIS_MODULE, -+}; -+ -+static struct genl_family mptcp_genl_family = { -+ .hdrsize = 0, -+ .name = MPTCP_GENL_NAME, -+ .version = MPTCP_GENL_VER, -+ .maxattr = MPTCP_ATTR_MAX, -+ .policy = mptcp_nl_genl_policy, -+ .netnsok = true, -+ .module = THIS_MODULE, -+ .ops = mptcp_genl_ops, -+ .n_ops = ARRAY_SIZE(mptcp_genl_ops), -+ .mcgrps = mptcp_mcgrps, -+ .n_mcgrps = ARRAY_SIZE(mptcp_mcgrps), -+}; -+ -+static int __init -+mptcp_nl_init(void) -+{ -+ int ret; -+ -+ BUILD_BUG_ON(sizeof(struct mptcp_nl_priv) > MPTCP_PM_SIZE); -+ -+ ret = genl_register_family(&mptcp_genl_family); -+ if (ret) -+ goto out_genl; -+ -+ ret = mptcp_register_path_manager(&mptcp_nl_pm_ops); -+ if (ret) -+ goto out_pm; -+ -+ return 0; -+out_pm: -+ genl_unregister_family(&mptcp_genl_family); -+out_genl: -+ return ret; -+} -+ -+static void __exit -+mptcp_nl_exit(void) -+{ -+ mptcp_unregister_path_manager(&mptcp_nl_pm_ops); -+ genl_unregister_family(&mptcp_genl_family); -+} -+ -+module_init(mptcp_nl_init); -+module_exit(mptcp_nl_exit); -+ -+MODULE_AUTHOR("Gregory Detal "); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("MPTCP netlink-based path manager"); -+MODULE_ALIAS_GENL_FAMILY(MPTCP_GENL_NAME); -diff --git a/net/mptcp/mptcp_olia.c b/net/mptcp/mptcp_olia.c -new file mode 100644 -index 000000000000..c44eb9208581 ---- /dev/null -+++ b/net/mptcp/mptcp_olia.c -@@ -0,0 +1,318 @@ -+/* -+ * MPTCP implementation - OPPORTUNISTIC LINKED INCREASES CONGESTION CONTROL: -+ * -+ * Algorithm design: -+ * Ramin Khalili -+ * Nicolas Gast -+ * Jean-Yves Le Boudec -+ * -+ * Implementation: -+ * Ramin Khalili -+ * -+ * Ported to the official MPTCP-kernel: -+ * Christoph Paasch -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+ -+#include -+#include -+ -+#include -+ -+static int scale = 10; -+ -+struct mptcp_olia { -+ u32 mptcp_loss1; -+ u32 mptcp_loss2; -+ u32 mptcp_loss3; -+ int epsilon_num; -+ u32 epsilon_den; -+ int mptcp_snd_cwnd_cnt; -+}; -+ -+static inline int mptcp_olia_sk_can_send(const struct sock *sk) -+{ -+ return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us; -+} -+ -+static inline u64 mptcp_olia_scale(u64 val, int scale) -+{ -+ return (u64) val << scale; -+} -+ -+/* take care of artificially inflate (see RFC5681) -+ * of cwnd during fast-retransmit phase -+ */ -+static u32 mptcp_get_crt_cwnd(struct sock *sk) -+{ -+ const struct inet_connection_sock *icsk = inet_csk(sk); -+ -+ if (icsk->icsk_ca_state == TCP_CA_Recovery) -+ return tcp_sk(sk)->snd_ssthresh; -+ else -+ return tcp_sk(sk)->snd_cwnd; -+} -+ -+/* return the dominator of the first term of the increasing term */ -+static u64 mptcp_get_rate(const struct mptcp_cb *mpcb , u32 path_rtt) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ u64 rate = 1; /* We have to avoid a zero-rate because it is used as a divisor */ -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ u64 scaled_num; -+ u32 tmp_cwnd; -+ -+ if (!mptcp_olia_sk_can_send(sk)) -+ continue; -+ -+ tmp_cwnd = mptcp_get_crt_cwnd(sk); -+ scaled_num = mptcp_olia_scale(tmp_cwnd, scale) * path_rtt; -+ rate += div_u64(scaled_num , tp->srtt_us); -+ } -+ rate *= rate; -+ return rate; -+} -+ -+/* find the maximum cwnd, used to find set M */ -+static u32 mptcp_get_max_cwnd(const struct mptcp_cb *mpcb) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ u32 best_cwnd = 0; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ u32 tmp_cwnd; -+ -+ if (!mptcp_olia_sk_can_send(sk)) -+ continue; -+ -+ tmp_cwnd = mptcp_get_crt_cwnd(sk); -+ if (tmp_cwnd > best_cwnd) -+ best_cwnd = tmp_cwnd; -+ } -+ return best_cwnd; -+} -+ -+static void mptcp_get_epsilon(const struct mptcp_cb *mpcb) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ struct mptcp_olia *ca; -+ struct tcp_sock *tp; -+ struct sock *sk; -+ u64 tmp_int, tmp_rtt, best_int = 0, best_rtt = 1; -+ u32 max_cwnd, tmp_cwnd, established_cnt = 0; -+ u8 M = 0, B_not_M = 0; -+ -+ /* TODO - integrate this in the following loop - we just want to iterate once */ -+ -+ max_cwnd = mptcp_get_max_cwnd(mpcb); -+ -+ /* find the best path */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ sk = mptcp_to_sock(mptcp); -+ tp = tcp_sk(sk); -+ ca = inet_csk_ca(sk); -+ -+ if (!mptcp_olia_sk_can_send(sk)) -+ continue; -+ -+ established_cnt++; -+ -+ tmp_rtt = (u64)tp->srtt_us * tp->srtt_us; -+ /* TODO - check here and rename variables */ -+ tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2, -+ ca->mptcp_loss2 - ca->mptcp_loss1); -+ -+ if ((u64)tmp_int * best_rtt >= (u64)best_int * tmp_rtt) { -+ best_rtt = tmp_rtt; -+ best_int = tmp_int; -+ } -+ } -+ -+ /* TODO - integrate this here in mptcp_get_max_cwnd and in the previous loop */ -+ /* find the size of M and B_not_M */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ sk = mptcp_to_sock(mptcp); -+ tp = tcp_sk(sk); -+ ca = inet_csk_ca(sk); -+ -+ if (!mptcp_olia_sk_can_send(sk)) -+ continue; -+ -+ tmp_cwnd = mptcp_get_crt_cwnd(sk); -+ if (tmp_cwnd == max_cwnd) { -+ M++; -+ } else { -+ tmp_rtt = (u64)tp->srtt_us * tp->srtt_us; -+ tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2, -+ ca->mptcp_loss2 - ca->mptcp_loss1); -+ -+ if ((u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt) -+ B_not_M++; -+ } -+ } -+ -+ /* check if the path is in M or B_not_M and set the value of epsilon accordingly */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ sk = mptcp_to_sock(mptcp); -+ tp = tcp_sk(sk); -+ ca = inet_csk_ca(sk); -+ -+ if (!mptcp_olia_sk_can_send(sk)) -+ continue; -+ -+ if (B_not_M == 0) { -+ ca->epsilon_num = 0; -+ ca->epsilon_den = 1; -+ } else { -+ tmp_rtt = (u64)tp->srtt_us * tp->srtt_us; -+ tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2, -+ ca->mptcp_loss2 - ca->mptcp_loss1); -+ tmp_cwnd = mptcp_get_crt_cwnd(sk); -+ -+ if (tmp_cwnd < max_cwnd && -+ (u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt) { -+ ca->epsilon_num = 1; -+ ca->epsilon_den = established_cnt * B_not_M; -+ } else if (tmp_cwnd == max_cwnd) { -+ ca->epsilon_num = -1; -+ ca->epsilon_den = established_cnt * M; -+ } else { -+ ca->epsilon_num = 0; -+ ca->epsilon_den = 1; -+ } -+ } -+ } -+} -+ -+/* setting the initial values */ -+static void mptcp_olia_init(struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_olia *ca = inet_csk_ca(sk); -+ -+ if (mptcp(tp)) { -+ ca->mptcp_loss1 = tp->snd_una; -+ ca->mptcp_loss2 = tp->snd_una; -+ ca->mptcp_loss3 = tp->snd_una; -+ ca->mptcp_snd_cwnd_cnt = 0; -+ ca->epsilon_num = 0; -+ ca->epsilon_den = 1; -+ } -+} -+ -+/* updating inter-loss distance and ssthresh */ -+static void mptcp_olia_set_state(struct sock *sk, u8 new_state) -+{ -+ if (!mptcp(tcp_sk(sk))) -+ return; -+ -+ if (new_state == TCP_CA_Loss || -+ new_state == TCP_CA_Recovery || new_state == TCP_CA_CWR) { -+ struct mptcp_olia *ca = inet_csk_ca(sk); -+ -+ if (ca->mptcp_loss3 != ca->mptcp_loss2 && -+ !inet_csk(sk)->icsk_retransmits) { -+ ca->mptcp_loss1 = ca->mptcp_loss2; -+ ca->mptcp_loss2 = ca->mptcp_loss3; -+ } -+ } -+} -+ -+/* main algorithm */ -+static void mptcp_olia_cong_avoid(struct sock *sk, u32 ack, u32 acked) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_olia *ca = inet_csk_ca(sk); -+ const struct mptcp_cb *mpcb = tp->mpcb; -+ -+ u64 inc_num, inc_den, rate, cwnd_scaled; -+ -+ if (!mptcp(tp)) { -+ tcp_reno_cong_avoid(sk, ack, acked); -+ return; -+ } -+ -+ ca->mptcp_loss3 = tp->snd_una; -+ -+ if (!tcp_is_cwnd_limited(sk)) -+ return; -+ -+ /* slow start if it is in the safe area */ -+ if (tcp_in_slow_start(tp)) { -+ tcp_slow_start(tp, acked); -+ return; -+ } -+ -+ mptcp_get_epsilon(mpcb); -+ rate = mptcp_get_rate(mpcb, tp->srtt_us); -+ cwnd_scaled = mptcp_olia_scale(tp->snd_cwnd, scale); -+ inc_den = ca->epsilon_den * tp->snd_cwnd * rate ? : 1; -+ -+ /* calculate the increasing term, scaling is used to reduce the rounding effect */ -+ if (ca->epsilon_num == -1) { -+ if (ca->epsilon_den * cwnd_scaled * cwnd_scaled < rate) { -+ inc_num = rate - ca->epsilon_den * -+ cwnd_scaled * cwnd_scaled; -+ ca->mptcp_snd_cwnd_cnt -= div64_u64( -+ mptcp_olia_scale(inc_num , scale) , inc_den); -+ } else { -+ inc_num = ca->epsilon_den * -+ cwnd_scaled * cwnd_scaled - rate; -+ ca->mptcp_snd_cwnd_cnt += div64_u64( -+ mptcp_olia_scale(inc_num , scale) , inc_den); -+ } -+ } else { -+ inc_num = ca->epsilon_num * rate + -+ ca->epsilon_den * cwnd_scaled * cwnd_scaled; -+ ca->mptcp_snd_cwnd_cnt += div64_u64( -+ mptcp_olia_scale(inc_num , scale) , inc_den); -+ } -+ -+ -+ if (ca->mptcp_snd_cwnd_cnt >= (1 << scale) - 1) { -+ if (tp->snd_cwnd < tp->snd_cwnd_clamp) -+ tp->snd_cwnd++; -+ ca->mptcp_snd_cwnd_cnt = 0; -+ } else if (ca->mptcp_snd_cwnd_cnt <= 0 - (1 << scale) + 1) { -+ tp->snd_cwnd = max((int) 1 , (int) tp->snd_cwnd - 1); -+ ca->mptcp_snd_cwnd_cnt = 0; -+ } -+} -+ -+static struct tcp_congestion_ops mptcp_olia = { -+ .init = mptcp_olia_init, -+ .ssthresh = tcp_reno_ssthresh, -+ .cong_avoid = mptcp_olia_cong_avoid, -+ .undo_cwnd = tcp_reno_undo_cwnd, -+ .set_state = mptcp_olia_set_state, -+ .owner = THIS_MODULE, -+ .name = "olia", -+}; -+ -+static int __init mptcp_olia_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct mptcp_olia) > ICSK_CA_PRIV_SIZE); -+ return tcp_register_congestion_control(&mptcp_olia); -+} -+ -+static void __exit mptcp_olia_unregister(void) -+{ -+ tcp_unregister_congestion_control(&mptcp_olia); -+} -+ -+module_init(mptcp_olia_register); -+module_exit(mptcp_olia_unregister); -+ -+MODULE_AUTHOR("Ramin Khalili, Nicolas Gast, Jean-Yves Le Boudec"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL"); -+MODULE_VERSION("0.1"); -diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c -new file mode 100644 -index 000000000000..39eae2199802 ---- /dev/null -+++ b/net/mptcp/mptcp_output.c -@@ -0,0 +1,2009 @@ -+/* -+ * MPTCP implementation - Sending side -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+ -+static const int mptcp_dss_len = MPTCP_SUB_LEN_DSS_ALIGN + -+ MPTCP_SUB_LEN_ACK_ALIGN + -+ MPTCP_SUB_LEN_SEQ_ALIGN; -+ -+static inline int mptcp_sub_len_remove_addr(u16 bitfield) -+{ -+ unsigned int c; -+ for (c = 0; bitfield; c++) -+ bitfield &= bitfield - 1; -+ return MPTCP_SUB_LEN_REMOVE_ADDR + c - 1; -+} -+ -+int mptcp_sub_len_remove_addr_align(u16 bitfield) -+{ -+ return ALIGN(mptcp_sub_len_remove_addr(bitfield), 4); -+} -+EXPORT_SYMBOL(mptcp_sub_len_remove_addr_align); -+ -+/* get the data-seq and end-data-seq and store them again in the -+ * tcp_skb_cb -+ */ -+static bool mptcp_reconstruct_mapping(struct sk_buff *skb) -+{ -+ const struct mp_dss *mpdss = (struct mp_dss *)TCP_SKB_CB(skb)->dss; -+ __be32 *p32; -+ __be16 *p16; -+ -+ if (!mptcp_is_data_seq(skb)) -+ return false; -+ -+ if (!mpdss->M) -+ return false; -+ -+ /* Move the pointer to the data-seq */ -+ p32 = (__be32 *)mpdss; -+ p32++; -+ if (mpdss->A) { -+ p32++; -+ if (mpdss->a) -+ p32++; -+ } -+ -+ TCP_SKB_CB(skb)->seq = ntohl(*p32); -+ -+ /* Get the data_len to calculate the end_data_seq */ -+ p32++; -+ p32++; -+ p16 = (__be16 *)p32; -+ TCP_SKB_CB(skb)->end_seq = ntohs(*p16) + TCP_SKB_CB(skb)->seq; -+ -+ return true; -+} -+ -+static bool mptcp_is_reinjected(const struct sk_buff *skb) -+{ -+ return TCP_SKB_CB(skb)->mptcp_flags & MPTCP_REINJECT; -+} -+ -+static void mptcp_find_and_set_pathmask(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ struct rb_node **p = &meta_sk->tcp_rtx_queue.rb_node; -+ struct rb_node *parent; -+ struct sk_buff *skb_it; -+ -+ while (*p) { -+ parent = *p; -+ skb_it = rb_to_skb(parent); -+ if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb_it)->seq)) { -+ p = &parent->rb_left; -+ continue; -+ } -+ if (after(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb_it)->seq)) { -+ p = &parent->rb_right; -+ continue; -+ } -+ -+ TCP_SKB_CB(skb)->path_mask = TCP_SKB_CB(skb_it)->path_mask; -+ break; -+ } -+} -+ -+/* Reinject data from one TCP subflow to the meta_sk. If sk == NULL, we are -+ * coming from the meta-retransmit-timer -+ */ -+static void __mptcp_reinject_data(struct sk_buff *orig_skb, struct sock *meta_sk, -+ struct sock *sk, int clone_it, -+ enum tcp_queue tcp_queue) -+{ -+ struct sk_buff *skb, *skb1; -+ const struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ u32 seq, end_seq; -+ -+ if (clone_it) { -+ /* pskb_copy is necessary here, because the TCP/IP-headers -+ * will be changed when it's going to be reinjected on another -+ * subflow. -+ */ -+ tcp_skb_tsorted_save(orig_skb) { -+ skb = pskb_copy_for_clone(orig_skb, GFP_ATOMIC); -+ } tcp_skb_tsorted_restore(orig_skb); -+ } else { -+ if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) { -+ __skb_unlink(orig_skb, &sk->sk_write_queue); -+ } else { -+ list_del(&orig_skb->tcp_tsorted_anchor); -+ tcp_rtx_queue_unlink(orig_skb, sk); -+ INIT_LIST_HEAD(&orig_skb->tcp_tsorted_anchor); -+ } -+ sock_set_flag(sk, SOCK_QUEUE_SHRUNK); -+ sk->sk_wmem_queued -= orig_skb->truesize; -+ sk_mem_uncharge(sk, orig_skb->truesize); -+ skb = orig_skb; -+ } -+ if (unlikely(!skb)) -+ return; -+ -+ /* Make sure that this list is clean */ -+ tcp_skb_tsorted_anchor_cleanup(skb); -+ -+ if (sk && !mptcp_reconstruct_mapping(skb)) { -+ __kfree_skb(skb); -+ return; -+ } -+ -+ skb->sk = meta_sk; -+ -+ /* Reset subflow-specific TCP control-data */ -+ TCP_SKB_CB(skb)->sacked = 0; -+ TCP_SKB_CB(skb)->tcp_flags &= (TCPHDR_ACK | TCPHDR_PSH); -+ -+ /* If it reached already the destination, we don't have to reinject it */ -+ if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { -+ __kfree_skb(skb); -+ return; -+ } -+ -+ /* Only reinject segments that are fully covered by the mapping */ -+ if (skb->len + (mptcp_is_data_fin(skb) ? 1 : 0) != -+ TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) { -+ struct rb_node *parent, **p = &meta_sk->tcp_rtx_queue.rb_node; -+ u32 end_seq = TCP_SKB_CB(skb)->end_seq; -+ u32 seq = TCP_SKB_CB(skb)->seq; -+ -+ __kfree_skb(skb); -+ -+ /* Ok, now we have to look for the full mapping in the meta -+ * send-queue :S -+ */ -+ -+ /* First, find the first skb that covers us */ -+ while (*p) { -+ parent = *p; -+ skb = rb_to_skb(parent); -+ -+ /* Not yet at the mapping? */ -+ if (!after(end_seq, TCP_SKB_CB(skb)->seq)) { -+ p = &parent->rb_left; -+ continue; -+ } -+ -+ if (!before(seq, TCP_SKB_CB(skb)->end_seq)) { -+ p = &parent->rb_right; -+ continue; -+ } -+ -+ break; -+ } -+ -+ if (*p) { -+ /* We found it, now let's reinject everything */ -+ skb = rb_to_skb(*p); -+ -+ skb_rbtree_walk_from(skb) { -+ if (after(TCP_SKB_CB(skb)->end_seq, end_seq)) -+ return; -+ __mptcp_reinject_data(skb, meta_sk, NULL, 1, -+ TCP_FRAG_IN_RTX_QUEUE); -+ } -+ } -+ return; -+ } -+ -+ /* Segment goes back to the MPTCP-layer. So, we need to zero the -+ * path_mask/dss. -+ */ -+ memset(TCP_SKB_CB(skb)->dss, 0 , mptcp_dss_len); -+ -+ /* We need to find out the path-mask from the meta-write-queue -+ * to properly select a subflow. -+ */ -+ mptcp_find_and_set_pathmask(meta_sk, skb); -+ -+ /* If it's empty, just add */ -+ if (skb_queue_empty(&mpcb->reinject_queue)) { -+ skb_queue_head(&mpcb->reinject_queue, skb); -+ return; -+ } -+ -+ /* Find place to insert skb - or even we can 'drop' it, as the -+ * data is already covered by other skb's in the reinject-queue. -+ * -+ * This is inspired by code from tcp_data_queue. -+ */ -+ -+ skb1 = skb_peek_tail(&mpcb->reinject_queue); -+ seq = TCP_SKB_CB(skb)->seq; -+ while (1) { -+ if (!after(TCP_SKB_CB(skb1)->seq, seq)) -+ break; -+ if (skb_queue_is_first(&mpcb->reinject_queue, skb1)) { -+ skb1 = NULL; -+ break; -+ } -+ skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1); -+ } -+ -+ /* Do skb overlap to previous one? */ -+ end_seq = TCP_SKB_CB(skb)->end_seq; -+ if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { -+ if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { -+ /* All the bits are present. Don't reinject */ -+ __kfree_skb(skb); -+ return; -+ } -+ if (seq == TCP_SKB_CB(skb1)->seq) { -+ if (skb_queue_is_first(&mpcb->reinject_queue, skb1)) -+ skb1 = NULL; -+ else -+ skb1 = skb_queue_prev(&mpcb->reinject_queue, skb1); -+ } -+ } -+ if (!skb1) -+ __skb_queue_head(&mpcb->reinject_queue, skb); -+ else -+ __skb_queue_after(&mpcb->reinject_queue, skb1, skb); -+ -+ /* And clean segments covered by new one as whole. */ -+ while (!skb_queue_is_last(&mpcb->reinject_queue, skb)) { -+ skb1 = skb_queue_next(&mpcb->reinject_queue, skb); -+ -+ if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) -+ break; -+ -+ if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) -+ break; -+ -+ __skb_unlink(skb1, &mpcb->reinject_queue); -+ __kfree_skb(skb1); -+ } -+ return; -+} -+ -+/* Inserts data into the reinject queue */ -+void mptcp_reinject_data(struct sock *sk, int clone_it) -+{ -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct sk_buff *skb_it, *tmp; -+ enum tcp_queue tcp_queue; -+ -+ /* It has already been closed - there is really no point in reinjecting */ -+ if (meta_sk->sk_state == TCP_CLOSE) -+ return; -+ -+ skb_queue_walk_safe(&sk->sk_write_queue, skb_it, tmp) { -+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb_it); -+ /* Subflow syn's and fin's are not reinjected. -+ * -+ * As well as empty subflow-fins with a data-fin. -+ * They are reinjected below (without the subflow-fin-flag) -+ */ -+ if (tcb->tcp_flags & TCPHDR_SYN || -+ (tcb->tcp_flags & TCPHDR_FIN && !mptcp_is_data_fin(skb_it)) || -+ (tcb->tcp_flags & TCPHDR_FIN && mptcp_is_data_fin(skb_it) && !skb_it->len)) -+ continue; -+ -+ if (mptcp_is_reinjected(skb_it)) -+ continue; -+ -+ tcb->mptcp_flags |= MPTCP_REINJECT; -+ __mptcp_reinject_data(skb_it, meta_sk, sk, clone_it, -+ TCP_FRAG_IN_WRITE_QUEUE); -+ } -+ -+ skb_it = tcp_rtx_queue_head(sk); -+ skb_rbtree_walk_from_safe(skb_it, tmp) { -+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb_it); -+ -+ /* Subflow syn's and fin's are not reinjected. -+ * -+ * As well as empty subflow-fins with a data-fin. -+ * They are reinjected below (without the subflow-fin-flag) -+ */ -+ if (tcb->tcp_flags & TCPHDR_SYN || -+ (tcb->tcp_flags & TCPHDR_FIN && !mptcp_is_data_fin(skb_it)) || -+ (tcb->tcp_flags & TCPHDR_FIN && mptcp_is_data_fin(skb_it) && !skb_it->len)) -+ continue; -+ -+ if (mptcp_is_reinjected(skb_it)) -+ continue; -+ -+ tcb->mptcp_flags |= MPTCP_REINJECT; -+ __mptcp_reinject_data(skb_it, meta_sk, sk, clone_it, -+ TCP_FRAG_IN_RTX_QUEUE); -+ } -+ -+ skb_it = tcp_write_queue_tail(meta_sk); -+ tcp_queue = TCP_FRAG_IN_WRITE_QUEUE; -+ -+ if (!skb_it) { -+ skb_it = skb_rb_last(&meta_sk->tcp_rtx_queue); -+ tcp_queue = TCP_FRAG_IN_RTX_QUEUE; -+ } -+ -+ /* If sk has sent the empty data-fin, we have to reinject it too. */ -+ if (skb_it && mptcp_is_data_fin(skb_it) && skb_it->len == 0 && -+ TCP_SKB_CB(skb_it)->path_mask & mptcp_pi_to_flag(tcp_sk(sk)->mptcp->path_index)) { -+ __mptcp_reinject_data(skb_it, meta_sk, NULL, 1, tcp_queue); -+ } -+ -+ tcp_sk(sk)->pf = 1; -+ -+ mptcp_push_pending_frames(meta_sk); -+} -+EXPORT_SYMBOL(mptcp_reinject_data); -+ -+static void mptcp_combine_dfin(const struct sk_buff *skb, -+ const struct sock *meta_sk, -+ struct sock *subsk) -+{ -+ const struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ const struct mptcp_cb *mpcb = meta_tp->mpcb; -+ -+ /* In infinite mapping we always try to combine */ -+ if (mpcb->infinite_mapping_snd) -+ goto combine; -+ -+ /* Don't combine, if they didn't combine when closing - otherwise we end -+ * up in TIME_WAIT, even if our app is smart enough to avoid it. -+ */ -+ if (!mptcp_sk_can_recv(meta_sk) && !mpcb->dfin_combined) -+ return; -+ -+ /* Don't combine if there is still outstanding data that remains to be -+ * DATA_ACKed, because otherwise we may never be able to deliver this. -+ */ -+ if (meta_tp->snd_una != TCP_SKB_CB(skb)->seq) -+ return; -+ -+combine: -+ if (tcp_close_state(subsk)) { -+ subsk->sk_shutdown |= SEND_SHUTDOWN; -+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; -+ } -+} -+ -+static int mptcp_write_dss_mapping(const struct tcp_sock *tp, const struct sk_buff *skb, -+ __be32 *ptr) -+{ -+ const struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -+ __be32 *start = ptr; -+ __u16 data_len; -+ -+ *ptr++ = htonl(tcb->seq); /* data_seq */ -+ -+ /* If it's a non-data DATA_FIN, we set subseq to 0 (draft v7) */ -+ if (mptcp_is_data_fin(skb) && skb->len == 0) -+ *ptr++ = 0; /* subseq */ -+ else -+ *ptr++ = htonl(tp->write_seq - tp->mptcp->snt_isn); /* subseq */ -+ -+ if (tcb->mptcp_flags & MPTCPHDR_INF) -+ data_len = 0; -+ else -+ data_len = tcb->end_seq - tcb->seq; -+ -+ if (tp->mpcb->dss_csum && data_len) { -+ __sum16 *p16 = (__sum16 *)ptr; -+ __be32 hdseq = mptcp_get_highorder_sndbits(skb, tp->mpcb); -+ __wsum csum; -+ -+ *ptr = htonl(((data_len) << 16) | -+ (TCPOPT_EOL << 8) | -+ (TCPOPT_EOL)); -+ csum = csum_partial(ptr - 2, 12, skb->csum); -+ p16++; -+ *p16++ = csum_fold(csum_partial(&hdseq, sizeof(hdseq), csum)); -+ } else { -+ *ptr++ = htonl(((data_len) << 16) | -+ (TCPOPT_NOP << 8) | -+ (TCPOPT_NOP)); -+ } -+ -+ return ptr - start; -+} -+ -+static int mptcp_write_dss_data_ack(const struct tcp_sock *tp, const struct sk_buff *skb, -+ __be32 *ptr) -+{ -+ struct mp_dss *mdss = (struct mp_dss *)ptr; -+ __be32 *start = ptr; -+ -+ mdss->kind = TCPOPT_MPTCP; -+ mdss->sub = MPTCP_SUB_DSS; -+ mdss->rsv1 = 0; -+ mdss->rsv2 = 0; -+ mdss->F = mptcp_is_data_fin(skb) ? 1 : 0; -+ mdss->m = 0; -+ mdss->M = mptcp_is_data_seq(skb) ? 1 : 0; -+ mdss->a = 0; -+ mdss->A = 1; -+ mdss->len = mptcp_sub_len_dss(mdss, tp->mpcb->dss_csum); -+ ptr++; -+ -+ *ptr++ = htonl(mptcp_meta_tp(tp)->rcv_nxt); -+ -+ return ptr - start; -+} -+ -+/* RFC6824 states that once a particular subflow mapping has been sent -+ * out it must never be changed. However, packets may be split while -+ * they are in the retransmission queue (due to SACK or ACKs) and that -+ * arguably means that we would change the mapping (e.g. it splits it, -+ * our sends out a subset of the initial mapping). -+ * -+ * Furthermore, the skb checksum is not always preserved across splits -+ * (e.g. mptcp_fragment) which would mean that we need to recompute -+ * the DSS checksum in this case. -+ * -+ * To avoid this we save the initial DSS mapping which allows us to -+ * send the same DSS mapping even for fragmented retransmits. -+ */ -+static void mptcp_save_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *skb) -+{ -+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); -+ __be32 *ptr = (__be32 *)tcb->dss; -+ -+ tcb->mptcp_flags |= MPTCPHDR_SEQ; -+ -+ ptr += mptcp_write_dss_data_ack(tp, skb, ptr); -+ ptr += mptcp_write_dss_mapping(tp, skb, ptr); -+} -+ -+/* Write the MP_CAPABLE with data-option */ -+static int mptcp_write_mpcapable_data(const struct tcp_sock *tp, -+ struct sk_buff *skb, -+ __be32 *ptr) -+{ -+ struct mp_capable *mpc = (struct mp_capable *)ptr; -+ u8 length; -+ -+ if (tp->mpcb->dss_csum) -+ length = MPTCPV1_SUB_LEN_CAPABLE_DATA_CSUM; -+ else -+ length = MPTCPV1_SUB_LEN_CAPABLE_DATA; -+ -+ mpc->kind = TCPOPT_MPTCP; -+ mpc->len = length; -+ mpc->sub = MPTCP_SUB_CAPABLE; -+ mpc->ver = MPTCP_VERSION_1; -+ mpc->a = tp->mpcb->dss_csum; -+ mpc->b = 0; -+ mpc->rsv = 0; -+ mpc->h = 1; -+ -+ ptr++; -+ memcpy(ptr, TCP_SKB_CB(skb)->dss, mptcp_dss_len); -+ -+ mpc->sender_key = tp->mpcb->mptcp_loc_key; -+ mpc->receiver_key = tp->mpcb->mptcp_rem_key; -+ -+ /* dss is in a union with inet_skb_parm and -+ * the IP layer expects zeroed IPCB fields. -+ */ -+ memset(TCP_SKB_CB(skb)->dss, 0, mptcp_dss_len); -+ -+ return MPTCPV1_SUB_LEN_CAPABLE_DATA_ALIGN / sizeof(*ptr); -+} -+ -+/* Write the saved DSS mapping to the header */ -+static int mptcp_write_dss_data_seq(const struct tcp_sock *tp, struct sk_buff *skb, -+ __be32 *ptr) -+{ -+ int length; -+ __be32 *start = ptr; -+ -+ if (tp->mpcb->rem_key_set) { -+ memcpy(ptr, TCP_SKB_CB(skb)->dss, mptcp_dss_len); -+ -+ /* update the data_ack */ -+ start[1] = htonl(mptcp_meta_tp(tp)->rcv_nxt); -+ -+ length = mptcp_dss_len / sizeof(*ptr); -+ } else { -+ memcpy(ptr, TCP_SKB_CB(skb)->dss, MPTCP_SUB_LEN_DSS_ALIGN); -+ -+ ptr++; -+ memcpy(ptr, TCP_SKB_CB(skb)->dss + 2, MPTCP_SUB_LEN_SEQ_ALIGN); -+ -+ length = (MPTCP_SUB_LEN_DSS_ALIGN + MPTCP_SUB_LEN_SEQ_ALIGN) / sizeof(*ptr); -+ } -+ -+ /* dss is in a union with inet_skb_parm and -+ * the IP layer expects zeroed IPCB fields. -+ */ -+ memset(TCP_SKB_CB(skb)->dss, 0 , mptcp_dss_len); -+ -+ return length; -+} -+ -+static bool mptcp_skb_entail(struct sock *sk, struct sk_buff *skb, int reinject) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ const struct sock *meta_sk = mptcp_meta_sk(sk); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ struct tcp_skb_cb *tcb; -+ struct sk_buff *subskb = NULL; -+ -+ if (!reinject) -+ TCP_SKB_CB(skb)->mptcp_flags |= (mpcb->snd_hiseq_index ? -+ MPTCPHDR_SEQ64_INDEX : 0); -+ -+ tcp_skb_tsorted_save(skb) { -+ subskb = pskb_copy_for_clone(skb, GFP_ATOMIC); -+ } tcp_skb_tsorted_restore(skb); -+ if (!subskb) -+ return false; -+ -+ /* At the subflow-level we need to call again tcp_init_tso_segs. We -+ * force this, by setting pcount to 0. It has been set to 1 prior to -+ * the call to mptcp_skb_entail. -+ */ -+ tcp_skb_pcount_set(subskb, 0); -+ -+ TCP_SKB_CB(skb)->path_mask |= mptcp_pi_to_flag(tp->mptcp->path_index); -+ -+ /* Compute checksum */ -+ if (tp->mpcb->dss_csum) -+ subskb->csum = skb->csum = skb_checksum(skb, 0, skb->len, 0); -+ -+ tcb = TCP_SKB_CB(subskb); -+ -+ if (tp->mpcb->send_infinite_mapping && -+ !tp->mpcb->infinite_mapping_snd && -+ !before(tcb->seq, mptcp_meta_tp(tp)->snd_nxt)) { -+ tp->mptcp->fully_established = 1; -+ tp->mpcb->infinite_mapping_snd = 1; -+ tp->mptcp->infinite_cutoff_seq = tp->write_seq; -+ tcb->mptcp_flags |= MPTCPHDR_INF; -+ } -+ -+ if (mptcp_is_data_fin(subskb)) -+ mptcp_combine_dfin(subskb, meta_sk, sk); -+ -+ mptcp_save_dss_data_seq(tp, subskb); -+ -+ if (mpcb->send_mptcpv1_mpcapable) { -+ TCP_SKB_CB(subskb)->mptcp_flags |= MPTCPHDR_MPC_DATA; -+ mpcb->send_mptcpv1_mpcapable = 0; -+ } -+ -+ tcb->seq = tp->write_seq; -+ -+ /* Take into account seg len */ -+ tp->write_seq += subskb->len + ((tcb->tcp_flags & TCPHDR_FIN) ? 1 : 0); -+ tcb->end_seq = tp->write_seq; -+ -+ /* txstamp_ack is handled at the meta-level */ -+ tcb->txstamp_ack = 0; -+ -+ /* If it's a non-payload DATA_FIN (also no subflow-fin), the -+ * segment is not part of the subflow but on a meta-only-level. -+ */ -+ if (!mptcp_is_data_fin(subskb) || tcb->end_seq != tcb->seq) { -+ /* Make sure that this list is clean */ -+ INIT_LIST_HEAD(&subskb->tcp_tsorted_anchor); -+ -+ tcp_add_write_queue_tail(sk, subskb); -+ sk->sk_wmem_queued += subskb->truesize; -+ sk_mem_charge(sk, subskb->truesize); -+ } else { -+ /* Necessary to initialize for tcp_transmit_skb. mss of 1, as -+ * skb->len = 0 will force tso_segs to 1. -+ */ -+ tcp_init_tso_segs(subskb, 1); -+ -+ /* Empty data-fins are sent immediatly on the subflow */ -+ if (tcp_transmit_skb(sk, subskb, 0, GFP_ATOMIC)) -+ return false; -+ } -+ -+ if (!tp->mptcp->fully_established) { -+ tp->mptcp->second_packet = 1; -+ tp->mptcp->last_end_data_seq = TCP_SKB_CB(skb)->end_seq; -+ } -+ -+ return true; -+} -+ -+/* Fragment an skb and update the mptcp meta-data. Due to reinject, we -+ * might need to undo some operations done by tcp_fragment. -+ * -+ * Be careful, the skb may come from 3 different places: -+ * - The send-queue (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) -+ * - The retransmit-queue (tcp_queue == TCP_FRAG_IN_RTX_QUEUE) -+ * - The reinject-queue (reinject == -1) -+ */ -+static int mptcp_fragment(struct sock *meta_sk, enum tcp_queue tcp_queue, -+ struct sk_buff *skb, u32 len, -+ gfp_t gfp, int reinject) -+{ -+ int ret, diff, old_factor; -+ struct sk_buff *buff; -+ u8 flags; -+ -+ if (skb_headlen(skb) < len) -+ diff = skb->len - len; -+ else -+ diff = skb->data_len; -+ old_factor = tcp_skb_pcount(skb); -+ -+ /* The mss_now in tcp_fragment is used to set the tso_segs of the skb. -+ * At the MPTCP-level we do not care about the absolute value. All we -+ * care about is that it is set to 1 for accurate packets_out -+ * accounting. -+ */ -+ ret = tcp_fragment(meta_sk, tcp_queue, skb, len, UINT_MAX, gfp); -+ if (ret) -+ return ret; -+ -+ if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) -+ buff = skb->next; -+ else -+ buff = skb_rb_next(skb); -+ -+ flags = TCP_SKB_CB(skb)->mptcp_flags; -+ TCP_SKB_CB(skb)->mptcp_flags = flags & ~(MPTCPHDR_FIN); -+ TCP_SKB_CB(buff)->mptcp_flags = flags; -+ TCP_SKB_CB(buff)->path_mask = TCP_SKB_CB(skb)->path_mask; -+ -+ /* If reinject == 1, the buff will be added to the reinject -+ * queue, which is currently not part of memory accounting. So -+ * undo the changes done by tcp_fragment and update the -+ * reinject queue. Also, undo changes to the packet counters. -+ */ -+ if (reinject == 1) { -+ int undo = buff->truesize - diff; -+ meta_sk->sk_wmem_queued -= undo; -+ sk_mem_uncharge(meta_sk, undo); -+ -+ tcp_sk(meta_sk)->mpcb->reinject_queue.qlen++; -+ if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) -+ meta_sk->sk_write_queue.qlen--; -+ -+ if (!before(tcp_sk(meta_sk)->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { -+ undo = old_factor - tcp_skb_pcount(skb) - -+ tcp_skb_pcount(buff); -+ if (undo) -+ tcp_adjust_pcount(meta_sk, skb, -undo); -+ } -+ -+ /* tcp_fragment's call to sk_stream_alloc_skb initializes the -+ * tcp_tsorted_anchor. We need to revert this as it clashes -+ * with the refdst pointer. -+ */ -+ tcp_skb_tsorted_anchor_cleanup(buff); -+ } -+ -+ return 0; -+} -+ -+/* Inspired by tcp_write_wakeup */ -+int mptcp_write_wakeup(struct sock *meta_sk, int mib) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct sk_buff *skb; -+ int ans = 0; -+ -+ if (meta_sk->sk_state == TCP_CLOSE) -+ return -1; -+ -+ skb = tcp_send_head(meta_sk); -+ if (skb && -+ before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(meta_tp))) { -+ unsigned int mss; -+ unsigned int seg_size = tcp_wnd_end(meta_tp) - TCP_SKB_CB(skb)->seq; -+ struct sock *subsk = meta_tp->mpcb->sched_ops->get_subflow(meta_sk, skb, true); -+ struct tcp_sock *subtp; -+ -+ WARN_ON(TCP_SKB_CB(skb)->sacked); -+ -+ if (!subsk) -+ goto window_probe; -+ subtp = tcp_sk(subsk); -+ mss = tcp_current_mss(subsk); -+ -+ seg_size = min(tcp_wnd_end(meta_tp) - TCP_SKB_CB(skb)->seq, -+ tcp_wnd_end(subtp) - subtp->write_seq); -+ -+ if (before(meta_tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) -+ meta_tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; -+ -+ /* We are probing the opening of a window -+ * but the window size is != 0 -+ * must have been a result SWS avoidance ( sender ) -+ */ -+ if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || -+ skb->len > mss) { -+ seg_size = min(seg_size, mss); -+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; -+ if (mptcp_fragment(meta_sk, TCP_FRAG_IN_WRITE_QUEUE, -+ skb, seg_size, GFP_ATOMIC, 0)) -+ return -1; -+ } else if (!tcp_skb_pcount(skb)) { -+ /* see mptcp_write_xmit on why we use UINT_MAX */ -+ tcp_set_skb_tso_segs(skb, UINT_MAX); -+ } -+ -+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; -+ if (!mptcp_skb_entail(subsk, skb, 0)) -+ return -1; -+ -+ mptcp_check_sndseq_wrap(meta_tp, TCP_SKB_CB(skb)->end_seq - -+ TCP_SKB_CB(skb)->seq); -+ tcp_event_new_data_sent(meta_sk, skb); -+ -+ __tcp_push_pending_frames(subsk, mss, TCP_NAGLE_PUSH); -+ tcp_update_skb_after_send(meta_sk, skb, meta_tp->tcp_wstamp_ns); -+ meta_tp->lsndtime = tcp_jiffies32; -+ -+ return 0; -+ } else { -+ struct mptcp_tcp_sock *mptcp; -+ -+window_probe: -+ if (between(meta_tp->snd_up, meta_tp->snd_una + 1, -+ meta_tp->snd_una + 0xFFFF)) { -+ mptcp_for_each_sub(meta_tp->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ -+ if (mptcp_sk_can_send_ack(sk_it)) -+ tcp_xmit_probe_skb(sk_it, 1, mib); -+ } -+ } -+ -+ /* At least one of the tcp_xmit_probe_skb's has to succeed */ -+ mptcp_for_each_sub(meta_tp->mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ int ret; -+ -+ if (!mptcp_sk_can_send_ack(sk_it)) -+ continue; -+ -+ ret = tcp_xmit_probe_skb(sk_it, 0, mib); -+ if (unlikely(ret > 0)) -+ ans = ret; -+ } -+ return ans; -+ } -+} -+ -+bool mptcp_write_xmit(struct sock *meta_sk, unsigned int mss_now, int nonagle, -+ int push_one, gfp_t gfp) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk), *subtp; -+ bool is_rwnd_limited = false; -+ struct mptcp_tcp_sock *mptcp; -+ struct sock *subsk = NULL; -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct sk_buff *skb; -+ int reinject = 0; -+ unsigned int sublimit; -+ __u32 path_mask = 0; -+ -+ tcp_mstamp_refresh(meta_tp); -+ -+ if (inet_csk(meta_sk)->icsk_retransmits) { -+ /* If the timer already once fired, retransmit the head of the -+ * queue to unblock us ASAP. -+ */ -+ if (meta_tp->packets_out && !mpcb->infinite_mapping_snd) -+ mptcp_retransmit_skb(meta_sk, tcp_rtx_queue_head(meta_sk)); -+ } -+ -+ while ((skb = mpcb->sched_ops->next_segment(meta_sk, &reinject, &subsk, -+ &sublimit))) { -+ enum tcp_queue tcp_queue = TCP_FRAG_IN_WRITE_QUEUE; -+ unsigned int limit; -+ -+ WARN(TCP_SKB_CB(skb)->sacked, "sacked: %u reinject: %u", -+ TCP_SKB_CB(skb)->sacked, reinject); -+ -+ subtp = tcp_sk(subsk); -+ mss_now = tcp_current_mss(subsk); -+ -+ if (reinject == 1) { -+ if (!after(TCP_SKB_CB(skb)->end_seq, meta_tp->snd_una)) { -+ /* Segment already reached the peer, take the next one */ -+ __skb_unlink(skb, &mpcb->reinject_queue); -+ __kfree_skb(skb); -+ continue; -+ } -+ } else if (reinject == -1) { -+ tcp_queue = TCP_FRAG_IN_RTX_QUEUE; -+ } -+ -+ /* If the segment was cloned (e.g. a meta retransmission), -+ * the header must be expanded/copied so that there is no -+ * corruption of TSO information. -+ */ -+ if (skb_unclone(skb, GFP_ATOMIC)) -+ break; -+ -+ if (unlikely(!tcp_snd_wnd_test(meta_tp, skb, mss_now))) { -+ is_rwnd_limited = true; -+ break; -+ } -+ -+ /* Force tso_segs to 1 by using UINT_MAX. -+ * We actually don't care about the exact number of segments -+ * emitted on the subflow. We need just to set tso_segs, because -+ * we still need an accurate packets_out count in -+ * tcp_event_new_data_sent. -+ */ -+ tcp_set_skb_tso_segs(skb, UINT_MAX); -+ -+ /* Check for nagle, irregardless of tso_segs. If the segment is -+ * actually larger than mss_now (TSO segment), then -+ * tcp_nagle_check will have partial == false and always trigger -+ * the transmission. -+ * tcp_write_xmit has a TSO-level nagle check which is not -+ * subject to the MPTCP-level. It is based on the properties of -+ * the subflow, not the MPTCP-level. -+ * When the segment is a reinjection or redundant scheduled -+ * segment, nagle check at meta-level may prevent -+ * sending. This could hurt with certain schedulers, as they -+ * to reinjection to recover from a window-stall or reduce latency. -+ * Therefore, Nagle check should be disabled in that case. -+ */ -+ if (!reinject && -+ unlikely(!tcp_nagle_test(meta_tp, skb, mss_now, -+ (tcp_skb_is_last(meta_sk, skb) ? -+ nonagle : TCP_NAGLE_PUSH)))) -+ break; -+ -+ limit = mss_now; -+ /* skb->len > mss_now is the equivalent of tso_segs > 1 in -+ * tcp_write_xmit. Otherwise split-point would return 0. -+ */ -+ if (skb->len > mss_now && !tcp_urg_mode(meta_tp)) -+ /* We limit the size of the skb so that it fits into the -+ * window. Call tcp_mss_split_point to avoid duplicating -+ * code. -+ * We really only care about fitting the skb into the -+ * window. That's why we use UINT_MAX. If the skb does -+ * not fit into the cwnd_quota or the NIC's max-segs -+ * limitation, it will be split by the subflow's -+ * tcp_write_xmit which does the appropriate call to -+ * tcp_mss_split_point. -+ */ -+ limit = tcp_mss_split_point(meta_sk, skb, mss_now, -+ UINT_MAX / mss_now, -+ nonagle); -+ -+ if (sublimit) -+ limit = min(limit, sublimit); -+ -+ if (skb->len > limit && -+ unlikely(mptcp_fragment(meta_sk, tcp_queue, -+ skb, limit, gfp, reinject))) -+ break; -+ -+ if (!mptcp_skb_entail(subsk, skb, reinject)) -+ break; -+ -+ if (reinject <= 0) -+ tcp_update_skb_after_send(meta_sk, skb, meta_tp->tcp_wstamp_ns); -+ meta_tp->lsndtime = tcp_jiffies32; -+ -+ path_mask |= mptcp_pi_to_flag(subtp->mptcp->path_index); -+ -+ if (!reinject) { -+ mptcp_check_sndseq_wrap(meta_tp, -+ TCP_SKB_CB(skb)->end_seq - -+ TCP_SKB_CB(skb)->seq); -+ tcp_event_new_data_sent(meta_sk, skb); -+ } -+ -+ tcp_minshall_update(meta_tp, mss_now, skb); -+ -+ if (reinject > 0) { -+ __skb_unlink(skb, &mpcb->reinject_queue); -+ kfree_skb(skb); -+ } -+ -+ if (push_one) -+ break; -+ } -+ -+ if (is_rwnd_limited) -+ tcp_chrono_start(meta_sk, TCP_CHRONO_RWND_LIMITED); -+ else -+ tcp_chrono_stop(meta_sk, TCP_CHRONO_RWND_LIMITED); -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ subsk = mptcp_to_sock(mptcp); -+ subtp = tcp_sk(subsk); -+ -+ if (!(path_mask & mptcp_pi_to_flag(subtp->mptcp->path_index))) -+ continue; -+ -+ mss_now = tcp_current_mss(subsk); -+ -+ /* Nagle is handled at the MPTCP-layer, so -+ * always push on the subflow -+ */ -+ __tcp_push_pending_frames(subsk, mss_now, TCP_NAGLE_PUSH); -+ } -+ -+ return !meta_tp->packets_out && tcp_send_head(meta_sk); -+} -+ -+void mptcp_write_space(struct sock *sk) -+{ -+ mptcp_push_pending_frames(mptcp_meta_sk(sk)); -+} -+ -+u32 __mptcp_select_window(struct sock *sk) -+{ -+ struct inet_connection_sock *icsk = inet_csk(sk); -+ struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp); -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ int mss, free_space, full_space, window; -+ -+ /* MSS for the peer's data. Previous versions used mss_clamp -+ * here. I don't know if the value based on our guesses -+ * of peer's MSS is better for the performance. It's more correct -+ * but may be worse for the performance because of rcv_mss -+ * fluctuations. --SAW 1998/11/1 -+ */ -+ mss = icsk->icsk_ack.rcv_mss; -+ free_space = tcp_space(meta_sk); -+ full_space = min_t(int, meta_tp->window_clamp, -+ tcp_full_space(meta_sk)); -+ -+ if (mss > full_space) -+ mss = full_space; -+ -+ if (free_space < (full_space >> 1)) { -+ /* If free_space is decreasing due to mostly meta-level -+ * out-of-order packets, don't turn off the quick-ack mode. -+ */ -+ if (meta_tp->rcv_nxt - meta_tp->copied_seq > ((full_space - free_space) >> 1)) -+ icsk->icsk_ack.quick = 0; -+ -+ if (tcp_memory_pressure) -+ /* TODO this has to be adapted when we support different -+ * MSS's among the subflows. -+ */ -+ meta_tp->rcv_ssthresh = min(meta_tp->rcv_ssthresh, -+ 4U * meta_tp->advmss); -+ -+ if (free_space < mss) -+ return 0; -+ } -+ -+ if (free_space > meta_tp->rcv_ssthresh) -+ free_space = meta_tp->rcv_ssthresh; -+ -+ /* Don't do rounding if we are using window scaling, since the -+ * scaled window will not line up with the MSS boundary anyway. -+ */ -+ window = meta_tp->rcv_wnd; -+ if (tp->rx_opt.rcv_wscale) { -+ window = free_space; -+ -+ /* Advertise enough space so that it won't get scaled away. -+ * Import case: prevent zero window announcement if -+ * 1< mss. -+ */ -+ if (((window >> tp->rx_opt.rcv_wscale) << tp-> -+ rx_opt.rcv_wscale) != window) -+ window = (((window >> tp->rx_opt.rcv_wscale) + 1) -+ << tp->rx_opt.rcv_wscale); -+ } else { -+ /* Get the largest window that is a nice multiple of mss. -+ * Window clamp already applied above. -+ * If our current window offering is within 1 mss of the -+ * free space we just keep it. This prevents the divide -+ * and multiply from happening most of the time. -+ * We also don't do any window rounding when the free space -+ * is too small. -+ */ -+ if (window <= free_space - mss || window > free_space) -+ window = (free_space / mss) * mss; -+ else if (mss == full_space && -+ free_space > window + (full_space >> 1)) -+ window = free_space; -+ } -+ -+ return window; -+} -+ -+void mptcp_syn_options(const struct sock *sk, struct tcp_out_options *opts, -+ unsigned *remaining) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ -+ opts->options |= OPTION_MPTCP; -+ if (is_master_tp(tp)) { -+ opts->mptcp_options |= OPTION_MP_CAPABLE | OPTION_TYPE_SYN; -+ opts->mptcp_ver = tp->mptcp_ver; -+ -+ if (tp->mptcp_ver >= MPTCP_VERSION_1) -+ *remaining -= MPTCPV1_SUB_LEN_CAPABLE_SYN_ALIGN; -+ else -+ *remaining -= MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN; -+ -+ opts->mp_capable.sender_key = tp->mptcp_loc_key; -+ opts->dss_csum = !!sysctl_mptcp_checksum; -+ } else { -+ const struct mptcp_cb *mpcb = tp->mpcb; -+ -+ opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_SYN; -+ *remaining -= MPTCP_SUB_LEN_JOIN_SYN_ALIGN; -+ opts->mp_join_syns.token = mpcb->mptcp_rem_token; -+ opts->mp_join_syns.low_prio = tp->mptcp->low_prio; -+ opts->addr_id = tp->mptcp->loc_id; -+ opts->mp_join_syns.sender_nonce = tp->mptcp->mptcp_loc_nonce; -+ } -+} -+ -+void mptcp_synack_options(struct request_sock *req, -+ struct tcp_out_options *opts, unsigned *remaining) -+{ -+ struct mptcp_request_sock *mtreq; -+ mtreq = mptcp_rsk(req); -+ -+ opts->options |= OPTION_MPTCP; -+ /* MPCB not yet set - thus it's a new MPTCP-session */ -+ if (!mtreq->is_sub) { -+ opts->mptcp_options |= OPTION_MP_CAPABLE | OPTION_TYPE_SYNACK; -+ opts->mptcp_ver = mtreq->mptcp_ver; -+ opts->mp_capable.sender_key = mtreq->mptcp_loc_key; -+ opts->dss_csum = !!sysctl_mptcp_checksum || mtreq->dss_csum; -+ if (mtreq->mptcp_ver >= MPTCP_VERSION_1) { -+ *remaining -= MPTCPV1_SUB_LEN_CAPABLE_SYNACK_ALIGN; -+ } else { -+ *remaining -= MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN; -+ } -+ } else { -+ opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_SYNACK; -+ opts->mp_join_syns.sender_truncated_mac = -+ mtreq->mptcp_hash_tmac; -+ opts->mp_join_syns.sender_nonce = mtreq->mptcp_loc_nonce; -+ opts->mp_join_syns.low_prio = mtreq->low_prio; -+ opts->addr_id = mtreq->loc_id; -+ *remaining -= MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN; -+ } -+} -+ -+void mptcp_established_options(struct sock *sk, struct sk_buff *skb, -+ struct tcp_out_options *opts, unsigned *size) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_cb *mpcb = tp->mpcb; -+ const struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; -+ -+ /* We are coming from tcp_current_mss with the meta_sk as an argument. -+ * It does not make sense to check for the options, because when the -+ * segment gets sent, another subflow will be chosen. -+ */ -+ if (!skb && is_meta_sk(sk)) -+ return; -+ -+ if (unlikely(tp->send_mp_fclose)) { -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_MP_FCLOSE; -+ opts->mp_capable.receiver_key = mpcb->mptcp_rem_key; -+ *size += MPTCP_SUB_LEN_FCLOSE_ALIGN; -+ return; -+ } -+ -+ /* 1. If we are the sender of the infinite-mapping, we need the -+ * MPTCPHDR_INF-flag, because a retransmission of the -+ * infinite-announcment still needs the mptcp-option. -+ * -+ * We need infinite_cutoff_seq, because retransmissions from before -+ * the infinite-cutoff-moment still need the MPTCP-signalling to stay -+ * consistent. -+ * -+ * 2. If we are the receiver of the infinite-mapping, we always skip -+ * mptcp-options, because acknowledgments from before the -+ * infinite-mapping point have already been sent out. -+ * -+ * I know, the whole infinite-mapping stuff is ugly... -+ * -+ * TODO: Handle wrapped data-sequence numbers -+ * (even if it's very unlikely) -+ */ -+ if (unlikely(mpcb->infinite_mapping_snd) && -+ ((mpcb->send_infinite_mapping && tcb && -+ mptcp_is_data_seq(skb) && -+ !(tcb->mptcp_flags & MPTCPHDR_INF) && -+ !before(tcb->seq, tp->mptcp->infinite_cutoff_seq)) || -+ !mpcb->send_infinite_mapping)) -+ return; -+ -+ if (unlikely(tp->mptcp->include_mpc)) { -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_MP_CAPABLE | -+ OPTION_TYPE_ACK; -+ -+ if (mpcb->mptcp_ver >= MPTCP_VERSION_1) -+ *size += MPTCPV1_SUB_LEN_CAPABLE_ACK_ALIGN; -+ else -+ *size += MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN; -+ -+ opts->mptcp_ver = mpcb->mptcp_ver; -+ opts->mp_capable.sender_key = mpcb->mptcp_loc_key; -+ opts->mp_capable.receiver_key = mpcb->mptcp_rem_key; -+ opts->dss_csum = mpcb->dss_csum; -+ -+ if (skb) -+ tp->mptcp->include_mpc = 0; -+ } -+ if (unlikely(tp->mptcp->pre_established) && -+ (!skb || !(tcb->tcp_flags & (TCPHDR_FIN | TCPHDR_RST)))) { -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_MP_JOIN | OPTION_TYPE_ACK; -+ *size += MPTCP_SUB_LEN_JOIN_ACK_ALIGN; -+ } -+ -+ if (unlikely(mpcb->addr_signal) && mpcb->pm_ops->addr_signal && -+ mpcb->mptcp_ver >= MPTCP_VERSION_1 && skb && !mptcp_is_data_seq(skb)) { -+ mpcb->pm_ops->addr_signal(sk, size, opts, skb); -+ -+ if (opts->add_addr_v6) -+ /* Skip subsequent options */ -+ return; -+ } -+ -+ if (!tp->mptcp->include_mpc && !tp->mptcp->pre_established) { -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_DATA_ACK; -+ /* If !skb, we come from tcp_current_mss and thus we always -+ * assume that the DSS-option will be set for the data-packet. -+ */ -+ if (skb && !mptcp_is_data_seq(skb) && mpcb->rem_key_set) { -+ *size += MPTCP_SUB_LEN_ACK_ALIGN; -+ } else if ((skb && mptcp_is_data_mpcapable(skb)) || -+ (!skb && tp->mpcb->send_mptcpv1_mpcapable)) { -+ *size += MPTCPV1_SUB_LEN_CAPABLE_DATA_ALIGN; -+ } else { -+ /* Doesn't matter, if csum included or not. It will be -+ * either 10 or 12, and thus aligned = 12 -+ */ -+ if (mpcb->rem_key_set) -+ *size += MPTCP_SUB_LEN_ACK_ALIGN + -+ MPTCP_SUB_LEN_SEQ_ALIGN; -+ else -+ *size += MPTCP_SUB_LEN_SEQ_ALIGN; -+ } -+ -+ *size += MPTCP_SUB_LEN_DSS_ALIGN; -+ } -+ -+ /* In fallback mp_fail-mode, we have to repeat it until the fallback -+ * has been done by the sender -+ */ -+ if (unlikely(tp->mptcp->send_mp_fail) && skb && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_FAIL) { -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_MP_FAIL; -+ *size += MPTCP_SUB_LEN_FAIL; -+ } -+ -+ if (unlikely(mpcb->addr_signal) && mpcb->pm_ops->addr_signal && -+ mpcb->mptcp_ver < MPTCP_VERSION_1) -+ mpcb->pm_ops->addr_signal(sk, size, opts, skb); -+ -+ if (unlikely(tp->mptcp->send_mp_prio) && -+ MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_PRIO_ALIGN) { -+ opts->options |= OPTION_MPTCP; -+ opts->mptcp_options |= OPTION_MP_PRIO; -+ if (skb) -+ tp->mptcp->send_mp_prio = 0; -+ *size += MPTCP_SUB_LEN_PRIO_ALIGN; -+ } -+ -+ return; -+} -+ -+u16 mptcp_select_window(struct sock *sk) -+{ -+ u16 new_win = tcp_select_window(sk); -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct tcp_sock *meta_tp = mptcp_meta_tp(tp); -+ -+ meta_tp->rcv_wnd = tp->rcv_wnd; -+ meta_tp->rcv_wup = meta_tp->rcv_nxt; -+ /* no need to use tcp_update_rcv_right_edge, because at the meta level -+ * right edge cannot go back -+ */ -+ meta_tp->rcv_right_edge = meta_tp->rcv_wnd + meta_tp->rcv_wup; -+ -+ return new_win; -+} -+ -+void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp, -+ const struct tcp_out_options *opts, -+ struct sk_buff *skb) -+{ -+ if (unlikely(OPTION_MP_CAPABLE & opts->mptcp_options)) { -+ struct mp_capable *mpc = (struct mp_capable *)ptr; -+ -+ mpc->kind = TCPOPT_MPTCP; -+ -+ if (OPTION_TYPE_SYN & opts->mptcp_options) { -+ mpc->ver = opts->mptcp_ver; -+ -+ if (mpc->ver >= MPTCP_VERSION_1) { -+ mpc->len = MPTCPV1_SUB_LEN_CAPABLE_SYN; -+ ptr += MPTCPV1_SUB_LEN_CAPABLE_SYN_ALIGN >> 2; -+ } else { -+ mpc->sender_key = opts->mp_capable.sender_key; -+ mpc->len = MPTCP_SUB_LEN_CAPABLE_SYN; -+ ptr += MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN >> 2; -+ } -+ } else if (OPTION_TYPE_SYNACK & opts->mptcp_options) { -+ mpc->ver = opts->mptcp_ver; -+ -+ if (mpc->ver >= MPTCP_VERSION_1) { -+ mpc->len = MPTCPV1_SUB_LEN_CAPABLE_SYNACK; -+ ptr += MPTCPV1_SUB_LEN_CAPABLE_SYNACK_ALIGN >> 2; -+ } else { -+ mpc->len = MPTCP_SUB_LEN_CAPABLE_SYN; -+ ptr += MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN >> 2; -+ } -+ -+ mpc->sender_key = opts->mp_capable.sender_key; -+ } else if (OPTION_TYPE_ACK & opts->mptcp_options) { -+ mpc->len = MPTCP_SUB_LEN_CAPABLE_ACK; -+ mpc->ver = opts->mptcp_ver; -+ ptr += MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN >> 2; -+ -+ mpc->sender_key = opts->mp_capable.sender_key; -+ mpc->receiver_key = opts->mp_capable.receiver_key; -+ } -+ -+ mpc->sub = MPTCP_SUB_CAPABLE; -+ mpc->a = opts->dss_csum; -+ mpc->b = 0; -+ mpc->rsv = 0; -+ mpc->h = 1; -+ } -+ if (unlikely(OPTION_MP_JOIN & opts->mptcp_options)) { -+ struct mp_join *mpj = (struct mp_join *)ptr; -+ -+ mpj->kind = TCPOPT_MPTCP; -+ mpj->sub = MPTCP_SUB_JOIN; -+ mpj->rsv = 0; -+ -+ if (OPTION_TYPE_SYN & opts->mptcp_options) { -+ mpj->len = MPTCP_SUB_LEN_JOIN_SYN; -+ mpj->u.syn.token = opts->mp_join_syns.token; -+ mpj->u.syn.nonce = opts->mp_join_syns.sender_nonce; -+ mpj->b = opts->mp_join_syns.low_prio; -+ mpj->addr_id = opts->addr_id; -+ ptr += MPTCP_SUB_LEN_JOIN_SYN_ALIGN >> 2; -+ } else if (OPTION_TYPE_SYNACK & opts->mptcp_options) { -+ mpj->len = MPTCP_SUB_LEN_JOIN_SYNACK; -+ mpj->u.synack.mac = -+ opts->mp_join_syns.sender_truncated_mac; -+ mpj->u.synack.nonce = opts->mp_join_syns.sender_nonce; -+ mpj->b = opts->mp_join_syns.low_prio; -+ mpj->addr_id = opts->addr_id; -+ ptr += MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN >> 2; -+ } else if (OPTION_TYPE_ACK & opts->mptcp_options) { -+ mpj->len = MPTCP_SUB_LEN_JOIN_ACK; -+ mpj->addr_id = 0; /* addr_id is rsv (RFC 6824, p. 21) */ -+ memcpy(mpj->u.ack.mac, &tp->mptcp->sender_mac[0], 20); -+ ptr += MPTCP_SUB_LEN_JOIN_ACK_ALIGN >> 2; -+ } -+ } -+ if (unlikely(OPTION_ADD_ADDR & opts->mptcp_options)) { -+ struct mp_add_addr *mpadd = (struct mp_add_addr *)ptr; -+ struct mptcp_cb *mpcb = tp->mpcb; -+ -+ mpadd->kind = TCPOPT_MPTCP; -+ if (opts->add_addr_v4) { -+ mpadd->addr_id = opts->add_addr4.addr_id; -+ mpadd->u.v4.addr = opts->add_addr4.addr; -+ if (mpcb->mptcp_ver < MPTCP_VERSION_1) { -+ mpadd->u_bit.v0.sub = MPTCP_SUB_ADD_ADDR; -+ mpadd->u_bit.v0.ipver = 4; -+ mpadd->len = MPTCP_SUB_LEN_ADD_ADDR4; -+ ptr += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN >> 2; -+ } else { -+ mpadd->u_bit.v1.sub = MPTCP_SUB_ADD_ADDR; -+ mpadd->u_bit.v1.rsv = 0; -+ mpadd->u_bit.v1.echo = 0; -+ memcpy((char *)mpadd->u.v4.mac - 2, -+ (char *)&opts->add_addr4.trunc_mac, 8); -+ mpadd->len = MPTCP_SUB_LEN_ADD_ADDR4_VER1; -+ ptr += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1 >> 2; -+ } -+ } else if (opts->add_addr_v6) { -+ mpadd->addr_id = opts->add_addr6.addr_id; -+ memcpy(&mpadd->u.v6.addr, &opts->add_addr6.addr, -+ sizeof(mpadd->u.v6.addr)); -+ if (mpcb->mptcp_ver < MPTCP_VERSION_1) { -+ mpadd->u_bit.v0.sub = MPTCP_SUB_ADD_ADDR; -+ mpadd->u_bit.v0.ipver = 6; -+ mpadd->len = MPTCP_SUB_LEN_ADD_ADDR6; -+ ptr += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN >> 2; -+ } else { -+ mpadd->u_bit.v1.sub = MPTCP_SUB_ADD_ADDR; -+ mpadd->u_bit.v1.rsv = 0; -+ mpadd->u_bit.v1.echo = 0; -+ memcpy((char *)mpadd->u.v6.mac - 2, -+ (char *)&opts->add_addr6.trunc_mac, 8); -+ mpadd->len = MPTCP_SUB_LEN_ADD_ADDR6_VER1; -+ ptr += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1 >> 2; -+ } -+ } -+ -+ MPTCP_INC_STATS(sock_net((struct sock *)tp), MPTCP_MIB_ADDADDRTX); -+ } -+ if (unlikely(OPTION_REMOVE_ADDR & opts->mptcp_options)) { -+ struct mp_remove_addr *mprem = (struct mp_remove_addr *)ptr; -+ u8 *addrs_id; -+ int id, len, len_align; -+ -+ len = mptcp_sub_len_remove_addr(opts->remove_addrs); -+ len_align = mptcp_sub_len_remove_addr_align(opts->remove_addrs); -+ -+ mprem->kind = TCPOPT_MPTCP; -+ mprem->len = len; -+ mprem->sub = MPTCP_SUB_REMOVE_ADDR; -+ mprem->rsv = 0; -+ addrs_id = &mprem->addrs_id; -+ -+ mptcp_for_each_bit_set(opts->remove_addrs, id) -+ *(addrs_id++) = id; -+ -+ /* Fill the rest with NOP's */ -+ if (len_align > len) { -+ int i; -+ for (i = 0; i < len_align - len; i++) -+ *(addrs_id++) = TCPOPT_NOP; -+ } -+ -+ ptr += len_align >> 2; -+ -+ MPTCP_INC_STATS(sock_net((struct sock *)tp), MPTCP_MIB_REMADDRTX); -+ } -+ if (unlikely(OPTION_MP_FAIL & opts->mptcp_options)) { -+ struct mp_fail *mpfail = (struct mp_fail *)ptr; -+ -+ mpfail->kind = TCPOPT_MPTCP; -+ mpfail->len = MPTCP_SUB_LEN_FAIL; -+ mpfail->sub = MPTCP_SUB_FAIL; -+ mpfail->rsv1 = 0; -+ mpfail->rsv2 = 0; -+ mpfail->data_seq = htonll(tp->mpcb->csum_cutoff_seq); -+ -+ ptr += MPTCP_SUB_LEN_FAIL_ALIGN >> 2; -+ } -+ if (unlikely(OPTION_MP_FCLOSE & opts->mptcp_options)) { -+ struct mp_fclose *mpfclose = (struct mp_fclose *)ptr; -+ -+ mpfclose->kind = TCPOPT_MPTCP; -+ mpfclose->len = MPTCP_SUB_LEN_FCLOSE; -+ mpfclose->sub = MPTCP_SUB_FCLOSE; -+ mpfclose->rsv1 = 0; -+ mpfclose->rsv2 = 0; -+ mpfclose->key = opts->mp_capable.receiver_key; -+ -+ ptr += MPTCP_SUB_LEN_FCLOSE_ALIGN >> 2; -+ } -+ -+ if (OPTION_DATA_ACK & opts->mptcp_options) { -+ if (!mptcp_is_data_seq(skb) && tp->mpcb->rem_key_set) -+ ptr += mptcp_write_dss_data_ack(tp, skb, ptr); -+ else if (mptcp_is_data_mpcapable(skb)) -+ ptr += mptcp_write_mpcapable_data(tp, skb, ptr); -+ else -+ ptr += mptcp_write_dss_data_seq(tp, skb, ptr); -+ } -+ if (unlikely(OPTION_MP_PRIO & opts->mptcp_options)) { -+ struct mp_prio *mpprio = (struct mp_prio *)ptr; -+ -+ mpprio->kind = TCPOPT_MPTCP; -+ mpprio->len = MPTCP_SUB_LEN_PRIO; -+ mpprio->sub = MPTCP_SUB_PRIO; -+ mpprio->rsv = 0; -+ mpprio->b = tp->mptcp->low_prio; -+ mpprio->addr_id = TCPOPT_NOP; -+ -+ ptr += MPTCP_SUB_LEN_PRIO_ALIGN >> 2; -+ } -+} -+ -+/* Sends the datafin */ -+void mptcp_send_fin(struct sock *meta_sk) -+{ -+ struct sk_buff *skb, *tskb = tcp_write_queue_tail(meta_sk); -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ int mss_now; -+ -+ if ((1 << meta_sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) -+ meta_tp->mpcb->passive_close = 1; -+ -+ /* Optimization, tack on the FIN if we have a queue of -+ * unsent frames. But be careful about outgoing SACKS -+ * and IP options. -+ */ -+ mss_now = mptcp_current_mss(meta_sk); -+ -+ if (tskb) { -+ TCP_SKB_CB(tskb)->mptcp_flags |= MPTCPHDR_FIN; -+ TCP_SKB_CB(tskb)->end_seq++; -+ meta_tp->write_seq++; -+ } else { -+ /* Socket is locked, keep trying until memory is available. */ -+ for (;;) { -+ skb = alloc_skb_fclone(MAX_TCP_HEADER, -+ meta_sk->sk_allocation); -+ if (skb) -+ break; -+ yield(); -+ } -+ /* Reserve space for headers and prepare control bits. */ -+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); -+ skb_reserve(skb, MAX_TCP_HEADER); -+ -+ tcp_init_nondata_skb(skb, meta_tp->write_seq, TCPHDR_ACK); -+ TCP_SKB_CB(skb)->end_seq++; -+ TCP_SKB_CB(skb)->mptcp_flags |= MPTCPHDR_FIN; -+ tcp_queue_skb(meta_sk, skb); -+ } -+ __tcp_push_pending_frames(meta_sk, mss_now, TCP_NAGLE_OFF); -+} -+ -+void mptcp_send_active_reset(struct sock *meta_sk, gfp_t priority) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct sock *sk; -+ -+ if (hlist_empty(&mpcb->conn_list)) -+ return; -+ -+ WARN_ON(meta_tp->send_mp_fclose); -+ -+ /* First - select a socket */ -+ sk = mptcp_select_ack_sock(meta_sk); -+ -+ /* May happen if no subflow is in an appropriate state, OR -+ * we are in infinite mode or about to go there - just send a reset -+ */ -+ if (!sk || mptcp_in_infinite_mapping_weak(mpcb)) { -+ /* tcp_done must be handled with bh disabled */ -+ if (!in_serving_softirq()) -+ local_bh_disable(); -+ -+ mptcp_sub_force_close_all(mpcb, NULL); -+ -+ if (!in_serving_softirq()) -+ local_bh_enable(); -+ return; -+ } -+ -+ tcp_mstamp_refresh(meta_tp); -+ -+ tcp_sk(sk)->send_mp_fclose = 1; -+ /** Reset all other subflows */ -+ -+ /* tcp_done must be handled with bh disabled */ -+ if (!in_serving_softirq()) -+ local_bh_disable(); -+ -+ mptcp_sub_force_close_all(mpcb, sk); -+ -+ tcp_set_state(sk, TCP_RST_WAIT); -+ -+ if (!in_serving_softirq()) -+ local_bh_enable(); -+ -+ tcp_send_ack(sk); -+ tcp_clear_xmit_timers(sk); -+ inet_csk_reset_keepalive_timer(sk, inet_csk(sk)->icsk_rto); -+ -+ meta_tp->send_mp_fclose = 1; -+ inet_csk(sk)->icsk_retransmits = 0; -+ -+ /* Prevent exp backoff reverting on ICMP dest unreachable */ -+ inet_csk(sk)->icsk_backoff = 0; -+ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_FASTCLOSETX); -+} -+ -+static void mptcp_ack_retransmit_timer(struct sock *sk) -+{ -+ struct inet_connection_sock *icsk = inet_csk(sk); -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct net *net = sock_net(sk); -+ struct sk_buff *skb; -+ -+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) -+ goto out; /* Routing failure or similar */ -+ -+ tcp_mstamp_refresh(tp); -+ -+ if (tcp_write_timeout(sk)) { -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKRTO); -+ tp->mptcp->pre_established = 0; -+ sk_stop_timer(sk, &tp->mptcp->mptcp_ack_timer); -+ tp->ops->send_active_reset(sk, GFP_ATOMIC); -+ goto out; -+ } -+ -+ skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); -+ if (skb == NULL) { -+ sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer, -+ jiffies + icsk->icsk_rto); -+ return; -+ } -+ -+ /* Reserve space for headers and prepare control bits */ -+ skb_reserve(skb, MAX_TCP_HEADER); -+ tcp_init_nondata_skb(skb, tp->snd_una, TCPHDR_ACK); -+ -+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKRXMIT); -+ -+ if (tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC) > 0) { -+ /* Retransmission failed because of local congestion, -+ * do not backoff. -+ */ -+ if (!icsk->icsk_retransmits) -+ icsk->icsk_retransmits = 1; -+ sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer, -+ jiffies + icsk->icsk_rto); -+ return; -+ } -+ -+ if (!tp->retrans_stamp) -+ tp->retrans_stamp = tcp_time_stamp(tp) ? : 1; -+ -+ icsk->icsk_retransmits++; -+ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); -+ sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer, -+ jiffies + icsk->icsk_rto); -+ if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) -+ __sk_dst_reset(sk); -+ -+out:; -+} -+ -+void mptcp_ack_handler(struct timer_list *t) -+{ -+ struct mptcp_tcp_sock *mptcp = from_timer(mptcp, t, mptcp_ack_timer); -+ struct sock *sk = (struct sock *)mptcp->tp; -+ struct sock *meta_sk = mptcp_meta_sk(sk); -+ -+ bh_lock_sock(meta_sk); -+ if (sock_owned_by_user(meta_sk)) { -+ /* Try again later */ -+ sk_reset_timer(sk, &tcp_sk(sk)->mptcp->mptcp_ack_timer, -+ jiffies + (HZ / 20)); -+ goto out_unlock; -+ } -+ -+ if (sk->sk_state == TCP_CLOSE) -+ goto out_unlock; -+ if (!tcp_sk(sk)->mptcp->pre_established) -+ goto out_unlock; -+ -+ mptcp_ack_retransmit_timer(sk); -+ -+ sk_mem_reclaim(sk); -+ -+out_unlock: -+ bh_unlock_sock(meta_sk); -+ sock_put(sk); -+} -+ -+/* Similar to tcp_retransmit_skb -+ * -+ * The diff is that we handle the retransmission-stats (retrans_stamp) at the -+ * meta-level. -+ */ -+int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct sock *subsk; -+ unsigned int limit, mss_now; -+ int err = -1; -+ -+ WARN_ON(TCP_SKB_CB(skb)->sacked); -+ -+ /* Do not sent more than we queued. 1/4 is reserved for possible -+ * copying overhead: fragmentation, tunneling, mangling etc. -+ * -+ * This is a meta-retransmission thus we check on the meta-socket. -+ */ -+ if (refcount_read(&meta_sk->sk_wmem_alloc) > -+ min(meta_sk->sk_wmem_queued + (meta_sk->sk_wmem_queued >> 2), meta_sk->sk_sndbuf)) { -+ return -EAGAIN; -+ } -+ -+ /* We need to make sure that the retransmitted segment can be sent on a -+ * subflow right now. If it is too big, it needs to be fragmented. -+ */ -+ subsk = meta_tp->mpcb->sched_ops->get_subflow(meta_sk, skb, false); -+ if (!subsk) { -+ /* We want to increase icsk_retransmits, thus return 0, so that -+ * mptcp_meta_retransmit_timer enters the desired branch. -+ */ -+ err = 0; -+ goto failed; -+ } -+ mss_now = tcp_current_mss(subsk); -+ -+ /* If the segment was cloned (e.g. a meta retransmission), the header -+ * must be expanded/copied so that there is no corruption of TSO -+ * information. -+ */ -+ if (skb_unclone(skb, GFP_ATOMIC)) { -+ err = -ENOMEM; -+ goto failed; -+ } -+ -+ /* Must have been set by mptcp_write_xmit before */ -+ BUG_ON(!tcp_skb_pcount(skb)); -+ -+ limit = mss_now; -+ /* skb->len > mss_now is the equivalent of tso_segs > 1 in -+ * tcp_write_xmit. Otherwise split-point would return 0. -+ */ -+ if (skb->len > mss_now && !tcp_urg_mode(meta_tp)) -+ limit = tcp_mss_split_point(meta_sk, skb, mss_now, -+ UINT_MAX / mss_now, -+ TCP_NAGLE_OFF); -+ -+ limit = min(limit, tcp_wnd_end(meta_tp) - TCP_SKB_CB(skb)->seq); -+ -+ if (skb->len > limit && -+ unlikely(mptcp_fragment(meta_sk, TCP_FRAG_IN_RTX_QUEUE, skb, -+ limit, GFP_ATOMIC, 0))) -+ goto failed; -+ -+ if (!mptcp_skb_entail(subsk, skb, -1)) -+ goto failed; -+ -+ /* Update global TCP statistics. */ -+ MPTCP_INC_STATS(sock_net(meta_sk), MPTCP_MIB_RETRANSSEGS); -+ -+ /* Diff to tcp_retransmit_skb */ -+ -+ /* Save stamp of the first retransmit. */ -+ if (!meta_tp->retrans_stamp) { -+ tcp_mstamp_refresh(meta_tp); -+ meta_tp->retrans_stamp = tcp_time_stamp(meta_tp); -+ } -+ -+ __tcp_push_pending_frames(subsk, mss_now, TCP_NAGLE_PUSH); -+ tcp_update_skb_after_send(meta_sk, skb, meta_tp->tcp_wstamp_ns); -+ meta_tp->lsndtime = tcp_jiffies32; -+ -+ return 0; -+ -+failed: -+ NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPRETRANSFAIL); -+ return err; -+} -+ -+/* Similar to tcp_retransmit_timer -+ * -+ * The diff is that we have to handle retransmissions of the FAST_CLOSE-message -+ * and that we don't have an srtt estimation at the meta-level. -+ */ -+void mptcp_meta_retransmit_timer(struct sock *meta_sk) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct inet_connection_sock *meta_icsk = inet_csk(meta_sk); -+ int err; -+ -+ /* In fallback, retransmission is handled at the subflow-level */ -+ if (!meta_tp->packets_out || mpcb->infinite_mapping_snd) -+ return; -+ -+ WARN_ON(tcp_rtx_queue_empty(meta_sk)); -+ -+ if (!meta_tp->snd_wnd && !sock_flag(meta_sk, SOCK_DEAD) && -+ !((1 << meta_sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { -+ /* Receiver dastardly shrinks window. Our retransmits -+ * become zero probes, but we should not timeout this -+ * connection. If the socket is an orphan, time it out, -+ * we cannot allow such beasts to hang infinitely. -+ */ -+ struct inet_sock *meta_inet = inet_sk(meta_sk); -+ if (meta_sk->sk_family == AF_INET) { -+ net_dbg_ratelimited("MPTCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", -+ &meta_inet->inet_daddr, -+ ntohs(meta_inet->inet_dport), -+ meta_inet->inet_num, meta_tp->snd_una, -+ meta_tp->snd_nxt); -+ } -+#if IS_ENABLED(CONFIG_IPV6) -+ else if (meta_sk->sk_family == AF_INET6) { -+ net_dbg_ratelimited("MPTCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", -+ &meta_sk->sk_v6_daddr, -+ ntohs(meta_inet->inet_dport), -+ meta_inet->inet_num, meta_tp->snd_una, -+ meta_tp->snd_nxt); -+ } -+#endif -+ if (tcp_jiffies32 - meta_tp->rcv_tstamp > TCP_RTO_MAX) { -+ tcp_write_err(meta_sk); -+ return; -+ } -+ -+ mptcp_retransmit_skb(meta_sk, tcp_rtx_queue_head(meta_sk)); -+ goto out_reset_timer; -+ } -+ -+ if (tcp_write_timeout(meta_sk)) -+ return; -+ -+ if (meta_icsk->icsk_retransmits == 0) -+ NET_INC_STATS(sock_net(meta_sk), LINUX_MIB_TCPTIMEOUTS); -+ -+ meta_icsk->icsk_ca_state = TCP_CA_Loss; -+ -+ err = mptcp_retransmit_skb(meta_sk, tcp_rtx_queue_head(meta_sk)); -+ if (err > 0) { -+ /* Retransmission failed because of local congestion, -+ * do not backoff. -+ */ -+ if (!meta_icsk->icsk_retransmits) -+ meta_icsk->icsk_retransmits = 1; -+ inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS, -+ min(meta_icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), -+ TCP_RTO_MAX); -+ return; -+ } -+ -+ /* Increase the timeout each time we retransmit. Note that -+ * we do not increase the rtt estimate. rto is initialized -+ * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests -+ * that doubling rto each time is the least we can get away with. -+ * In KA9Q, Karn uses this for the first few times, and then -+ * goes to quadratic. netBSD doubles, but only goes up to *64, -+ * and clamps at 1 to 64 sec afterwards. Note that 120 sec is -+ * defined in the protocol as the maximum possible RTT. I guess -+ * we'll have to use something other than TCP to talk to the -+ * University of Mars. -+ * -+ * PAWS allows us longer timeouts and large windows, so once -+ * implemented ftp to mars will work nicely. We will have to fix -+ * the 120 second clamps though! -+ */ -+ meta_icsk->icsk_backoff++; -+ meta_icsk->icsk_retransmits++; -+ -+out_reset_timer: -+ /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is -+ * used to reset timer, set to 0. Recalculate 'icsk_rto' as this -+ * might be increased if the stream oscillates between thin and thick, -+ * thus the old value might already be too high compared to the value -+ * set by 'tcp_set_rto' in tcp_input.c which resets the rto without -+ * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating -+ * exponential backoff behaviour to avoid continue hammering -+ * linear-timeout retransmissions into a black hole -+ */ -+ if (meta_sk->sk_state == TCP_ESTABLISHED && -+ (meta_tp->thin_lto || sock_net(meta_sk)->ipv4.sysctl_tcp_thin_linear_timeouts) && -+ tcp_stream_is_thin(meta_tp) && -+ meta_icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { -+ meta_icsk->icsk_backoff = 0; -+ /* We cannot do the same as in tcp_write_timer because the -+ * srtt is not set here. -+ */ -+ mptcp_set_rto(meta_sk); -+ } else { -+ /* Use normal (exponential) backoff */ -+ meta_icsk->icsk_rto = min(meta_icsk->icsk_rto << 1, TCP_RTO_MAX); -+ } -+ inet_csk_reset_xmit_timer(meta_sk, ICSK_TIME_RETRANS, meta_icsk->icsk_rto, TCP_RTO_MAX); -+ -+ return; -+} -+ -+void mptcp_sub_retransmit_timer(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ -+ tcp_retransmit_timer(sk); -+ -+ if (!tp->fastopen_rsk) { -+ mptcp_reinject_data(sk, 1); -+ mptcp_set_rto(sk); -+ } -+} -+ -+/* Modify values to an mptcp-level for the initial window of new subflows */ -+void mptcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, -+ __u32 *rcv_wnd, __u32 *window_clamp, -+ int wscale_ok, __u8 *rcv_wscale, -+ __u32 init_rcv_wnd) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(sk)->mpcb; -+ -+ *window_clamp = mpcb->orig_window_clamp; -+ __space = tcp_win_from_space(sk, mpcb->orig_sk_rcvbuf); -+ -+ tcp_select_initial_window(sk, __space, mss, rcv_wnd, window_clamp, -+ wscale_ok, rcv_wscale, init_rcv_wnd); -+} -+ -+static inline u64 mptcp_calc_rate(const struct sock *meta_sk, unsigned int mss) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ u64 rate = 0; -+ -+ mptcp_for_each_sub(tcp_sk(meta_sk)->mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ int this_mss; -+ u64 this_rate; -+ -+ if (!mptcp_sk_can_send(sk)) -+ continue; -+ -+ /* Do not consider subflows without a RTT estimation yet -+ * otherwise this_rate >>> rate. -+ */ -+ if (unlikely(!tp->srtt_us)) -+ continue; -+ -+ this_mss = tcp_current_mss(sk); -+ -+ /* If this_mss is smaller than mss, it means that a segment will -+ * be splitted in two (or more) when pushed on this subflow. If -+ * you consider that mss = 1428 and this_mss = 1420 then two -+ * segments will be generated: a 1420-byte and 8-byte segment. -+ * The latter will introduce a large overhead as for a single -+ * data segment 2 slots will be used in the congestion window. -+ * Therefore reducing by ~2 the potential throughput of this -+ * subflow. Indeed, 1428 will be send while 2840 could have been -+ * sent if mss == 1420 reducing the throughput by 2840 / 1428. -+ * -+ * The following algorithm take into account this overhead -+ * when computing the potential throughput that MPTCP can -+ * achieve when generating mss-byte segments. -+ * -+ * The formulae is the following: -+ * \sum_{\forall sub} ratio * \frac{mss * cwnd_sub}{rtt_sub} -+ * Where ratio is computed as follows: -+ * \frac{mss}{\ceil{mss / mss_sub} * mss_sub} -+ * -+ * ratio gives the reduction factor of the theoretical -+ * throughput a subflow can achieve if MPTCP uses a specific -+ * MSS value. -+ */ -+ this_rate = div64_u64((u64)mss * mss * (USEC_PER_SEC << 3) * -+ max(tp->snd_cwnd, tp->packets_out), -+ (u64)tp->srtt_us * -+ DIV_ROUND_UP(mss, this_mss) * this_mss); -+ rate += this_rate; -+ } -+ -+ return rate; -+} -+ -+static unsigned int __mptcp_current_mss(const struct sock *meta_sk) -+{ -+ struct mptcp_tcp_sock *mptcp; -+ unsigned int mss = 0; -+ u64 rate = 0; -+ -+ mptcp_for_each_sub(tcp_sk(meta_sk)->mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ int this_mss; -+ u64 this_rate; -+ -+ if (!mptcp_sk_can_send(sk)) -+ continue; -+ -+ this_mss = tcp_current_mss(sk); -+ -+ /* Same mss values will produce the same throughput. */ -+ if (this_mss == mss) -+ continue; -+ -+ /* See whether using this mss value can theoretically improve -+ * the performances. -+ */ -+ this_rate = mptcp_calc_rate(meta_sk, this_mss); -+ if (this_rate >= rate) { -+ mss = this_mss; -+ rate = this_rate; -+ } -+ } -+ -+ return mss; -+} -+ -+unsigned int mptcp_current_mss(struct sock *meta_sk) -+{ -+ unsigned int mss = __mptcp_current_mss(meta_sk); -+ -+ /* If no subflow is available, we take a default-mss from the -+ * meta-socket. -+ */ -+ return !mss ? tcp_current_mss(meta_sk) : mss; -+} -+ -+int mptcp_check_snd_buf(const struct tcp_sock *tp) -+{ -+ const struct mptcp_tcp_sock *mptcp; -+ u32 rtt_max = tp->srtt_us; -+ u64 bw_est; -+ -+ if (!tp->srtt_us) -+ return tp->reordering + 1; -+ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ const struct sock *sk = mptcp_to_sock(mptcp); -+ -+ if (!mptcp_sk_can_send(sk)) -+ continue; -+ -+ if (rtt_max < tcp_sk(sk)->srtt_us) -+ rtt_max = tcp_sk(sk)->srtt_us; -+ } -+ -+ bw_est = div64_u64(((u64)tp->snd_cwnd * rtt_max) << 16, -+ (u64)tp->srtt_us); -+ -+ return max_t(unsigned int, (u32)(bw_est >> 16), -+ tp->reordering + 1); -+} -+ -+unsigned int mptcp_xmit_size_goal(const struct sock *meta_sk, u32 mss_now, -+ int large_allowed) -+{ -+ u32 xmit_size_goal = 0; -+ -+ if (large_allowed && !tcp_sk(meta_sk)->mpcb->dss_csum) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(tcp_sk(meta_sk)->mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ int this_size_goal; -+ -+ if (!mptcp_sk_can_send(sk)) -+ continue; -+ -+ this_size_goal = tcp_xmit_size_goal(sk, mss_now, 1); -+ if (this_size_goal > xmit_size_goal) -+ xmit_size_goal = this_size_goal; -+ } -+ } -+ -+ return max(xmit_size_goal, mss_now); -+} -+ -diff --git a/net/mptcp/mptcp_pm.c b/net/mptcp/mptcp_pm.c -new file mode 100644 -index 000000000000..0e24e0aaa70a ---- /dev/null -+++ b/net/mptcp/mptcp_pm.c -@@ -0,0 +1,226 @@ -+/* -+ * MPTCP implementation - MPTCP-subflow-management -+ * -+ * Initial Design & Implementation: -+ * Sébastien Barré -+ * -+ * Current Maintainer & Author: -+ * Christoph Paasch -+ * -+ * Additional authors: -+ * Jaakko Korkeaniemi -+ * Gregory Detal -+ * Fabien Duchêne -+ * Andreas Seelinger -+ * Lavkesh Lahngir -+ * Andreas Ripke -+ * Vlad Dogaru -+ * Octavian Purdila -+ * John Ronan -+ * Catalin Nicutar -+ * Brandon Heller -+ * -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+ -+#include -+#include -+ -+static DEFINE_SPINLOCK(mptcp_pm_list_lock); -+static LIST_HEAD(mptcp_pm_list); -+ -+static int mptcp_default_id(const struct sock *meta_sk, sa_family_t family, -+ union inet_addr *addr, bool *low_prio) -+{ -+ return 0; -+} -+ -+struct mptcp_pm_ops mptcp_pm_default = { -+ .get_local_id = mptcp_default_id, /* We do not care */ -+ .name = "default", -+ .owner = THIS_MODULE, -+}; -+ -+static struct mptcp_pm_ops *mptcp_pm_find(const char *name) -+{ -+ struct mptcp_pm_ops *e; -+ -+ list_for_each_entry_rcu(e, &mptcp_pm_list, list) { -+ if (strcmp(e->name, name) == 0) -+ return e; -+ } -+ -+ return NULL; -+} -+ -+int mptcp_register_path_manager(struct mptcp_pm_ops *pm) -+{ -+ int ret = 0; -+ -+ if (!pm->get_local_id) -+ return -EINVAL; -+ -+ spin_lock(&mptcp_pm_list_lock); -+ if (mptcp_pm_find(pm->name)) { -+ pr_notice("%s already registered\n", pm->name); -+ ret = -EEXIST; -+ } else { -+ list_add_tail_rcu(&pm->list, &mptcp_pm_list); -+ pr_info("%s registered\n", pm->name); -+ } -+ spin_unlock(&mptcp_pm_list_lock); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(mptcp_register_path_manager); -+ -+void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm) -+{ -+ spin_lock(&mptcp_pm_list_lock); -+ list_del_rcu(&pm->list); -+ spin_unlock(&mptcp_pm_list_lock); -+ -+ /* Wait for outstanding readers to complete before the -+ * module gets removed entirely. -+ * -+ * A try_module_get() should fail by now as our module is -+ * in "going" state since no refs are held anymore and -+ * module_exit() handler being called. -+ */ -+ synchronize_rcu(); -+} -+EXPORT_SYMBOL_GPL(mptcp_unregister_path_manager); -+ -+void mptcp_get_default_path_manager(char *name) -+{ -+ struct mptcp_pm_ops *pm; -+ -+ BUG_ON(list_empty(&mptcp_pm_list)); -+ -+ rcu_read_lock(); -+ pm = list_entry(mptcp_pm_list.next, struct mptcp_pm_ops, list); -+ strncpy(name, pm->name, MPTCP_PM_NAME_MAX); -+ rcu_read_unlock(); -+} -+ -+int mptcp_set_default_path_manager(const char *name) -+{ -+ struct mptcp_pm_ops *pm; -+ int ret = -ENOENT; -+ -+ spin_lock(&mptcp_pm_list_lock); -+ pm = mptcp_pm_find(name); -+#ifdef CONFIG_MODULES -+ if (!pm && capable(CAP_NET_ADMIN)) { -+ spin_unlock(&mptcp_pm_list_lock); -+ -+ request_module("mptcp_%s", name); -+ spin_lock(&mptcp_pm_list_lock); -+ pm = mptcp_pm_find(name); -+ } -+#endif -+ -+ if (pm) { -+ list_move(&pm->list, &mptcp_pm_list); -+ ret = 0; -+ } else { -+ pr_info("%s is not available\n", name); -+ } -+ spin_unlock(&mptcp_pm_list_lock); -+ -+ return ret; -+} -+ -+static struct mptcp_pm_ops *__mptcp_pm_find_autoload(const char *name) -+{ -+ struct mptcp_pm_ops *pm = mptcp_pm_find(name); -+#ifdef CONFIG_MODULES -+ if (!pm && capable(CAP_NET_ADMIN)) { -+ rcu_read_unlock(); -+ request_module("mptcp_%s", name); -+ rcu_read_lock(); -+ pm = mptcp_pm_find(name); -+ } -+#endif -+ return pm; -+} -+ -+void mptcp_init_path_manager(struct mptcp_cb *mpcb) -+{ -+ struct mptcp_pm_ops *pm; -+ struct sock *meta_sk = mpcb->meta_sk; -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ -+ rcu_read_lock(); -+ /* if path manager was set using socket option */ -+ if (meta_tp->mptcp_pm_setsockopt) { -+ pm = __mptcp_pm_find_autoload(meta_tp->mptcp_pm_name); -+ if (pm && try_module_get(pm->owner)) { -+ mpcb->pm_ops = pm; -+ goto out; -+ } -+ } -+ -+ list_for_each_entry_rcu(pm, &mptcp_pm_list, list) { -+ if (try_module_get(pm->owner)) { -+ mpcb->pm_ops = pm; -+ break; -+ } -+ } -+out: -+ rcu_read_unlock(); -+} -+ -+/* Change path manager for socket */ -+int mptcp_set_path_manager(struct sock *sk, const char *name) -+{ -+ struct mptcp_pm_ops *pm; -+ int err = 0; -+ -+ rcu_read_lock(); -+ pm = __mptcp_pm_find_autoload(name); -+ -+ if (!pm) { -+ err = -ENOENT; -+ } else if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { -+ err = -EPERM; -+ } else { -+ strcpy(tcp_sk(sk)->mptcp_pm_name, name); -+ tcp_sk(sk)->mptcp_pm_setsockopt = 1; -+ } -+ rcu_read_unlock(); -+ -+ return err; -+} -+ -+/* Manage refcounts on socket close. */ -+void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb) -+{ -+ module_put(mpcb->pm_ops->owner); -+} -+ -+/* Fallback to the default path-manager. */ -+void mptcp_fallback_default(struct mptcp_cb *mpcb) -+{ -+ struct mptcp_pm_ops *pm; -+ -+ mptcp_cleanup_path_manager(mpcb); -+ pm = mptcp_pm_find("default"); -+ -+ /* Cannot fail - it's the default module */ -+ try_module_get(pm->owner); -+ mpcb->pm_ops = pm; -+} -+EXPORT_SYMBOL_GPL(mptcp_fallback_default); -+ -+/* Set default value from kernel configuration at bootup */ -+static int __init mptcp_path_manager_default(void) -+{ -+ return mptcp_set_default_path_manager(CONFIG_DEFAULT_MPTCP_PM); -+} -+late_initcall(mptcp_path_manager_default); -diff --git a/net/mptcp/mptcp_redundant.c b/net/mptcp/mptcp_redundant.c -new file mode 100644 -index 000000000000..3db4e69acef2 ---- /dev/null -+++ b/net/mptcp/mptcp_redundant.c -@@ -0,0 +1,395 @@ -+/* -+ * MPTCP Scheduler to reduce latency and jitter. -+ * -+ * This scheduler sends all packets redundantly on all available subflows. -+ * -+ * Initial Design & Implementation: -+ * Tobias Erbshaeusser -+ * Alexander Froemmgen -+ * -+ * Initial corrections & modifications: -+ * Christian Pinedo -+ * Igor Lopez -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+ -+/* Struct to store the data of a single subflow */ -+struct redsched_priv { -+ /* The skb or NULL */ -+ struct sk_buff *skb; -+ /* Start/end sequence number of the skb. This number should be checked -+ * to be valid before the skb field is used -+ */ -+ u32 skb_start_seq; -+ u32 skb_end_seq; -+}; -+ -+/* Struct to store the data of the control block */ -+struct redsched_cb { -+ /* The next subflow where a skb should be sent or NULL */ -+ struct tcp_sock *next_subflow; -+}; -+ -+/* Returns the socket data from a given subflow socket */ -+static struct redsched_priv *redsched_get_priv(struct tcp_sock *tp) -+{ -+ return (struct redsched_priv *)&tp->mptcp->mptcp_sched[0]; -+} -+ -+/* Returns the control block data from a given meta socket */ -+static struct redsched_cb *redsched_get_cb(struct tcp_sock *tp) -+{ -+ return (struct redsched_cb *)&tp->mpcb->mptcp_sched[0]; -+} -+ -+static bool redsched_get_active_valid_sks(struct sock *meta_sk) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct mptcp_tcp_sock *mptcp; -+ int active_valid_sks = 0; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ -+ if (subflow_is_active((struct tcp_sock *)sk) && -+ !mptcp_is_def_unavailable(sk)) -+ active_valid_sks++; -+ } -+ -+ return active_valid_sks; -+} -+ -+static bool redsched_use_subflow(struct sock *meta_sk, -+ int active_valid_sks, -+ struct tcp_sock *tp, -+ struct sk_buff *skb) -+{ -+ if (!skb || !mptcp_is_available((struct sock *)tp, skb, false)) -+ return false; -+ -+ if (TCP_SKB_CB(skb)->path_mask != 0) -+ return subflow_is_active(tp); -+ -+ if (TCP_SKB_CB(skb)->path_mask == 0) { -+ if (active_valid_sks == -1) -+ active_valid_sks = redsched_get_active_valid_sks(meta_sk); -+ -+ if (subflow_is_backup(tp) && active_valid_sks > 0) -+ return false; -+ else -+ return true; -+ } -+ -+ return false; -+} -+ -+#define mptcp_entry_next_rcu(__mptcp) \ -+ hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ -+ &(__mptcp)->node)), struct mptcp_tcp_sock, node) -+ -+static void redsched_update_next_subflow(struct tcp_sock *tp, -+ struct redsched_cb *red_cb) -+{ -+ struct mptcp_tcp_sock *mptcp = mptcp_entry_next_rcu(tp->mptcp); -+ -+ if (mptcp) -+ red_cb->next_subflow = mptcp->tp; -+ else -+ red_cb->next_subflow = NULL; -+} -+ -+static struct sock *red_get_available_subflow(struct sock *meta_sk, -+ struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct redsched_cb *red_cb = redsched_get_cb(meta_tp); -+ struct tcp_sock *first_tp = red_cb->next_subflow, *tp; -+ struct mptcp_tcp_sock *mptcp; -+ int found = 0; -+ -+ /* Answer data_fin on same subflow */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && -+ skb && mptcp_is_data_fin(skb)) { -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(sk)->mptcp->path_index == -+ mpcb->dfin_path_index && -+ mptcp_is_available(sk, skb, zero_wnd_test)) -+ return sk; -+ } -+ } -+ -+ if (!first_tp && !hlist_empty(&mpcb->conn_list)) { -+ first_tp = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&mpcb->conn_list)), -+ struct mptcp_tcp_sock, node)->tp; -+ } -+ tp = first_tp; -+ -+ /* still NULL (no subflow in conn_list?) */ -+ if (!first_tp) -+ return NULL; -+ -+ /* Search for a subflow to send it. -+ * -+ * We want to pick a subflow that is after 'first_tp' in the list of subflows. -+ * Thus, the first mptcp_for_each_sub()-loop tries to walk the list up -+ * to the subflow 'tp' and then checks whether any one of the remaining -+ * ones is eligible to send. -+ * The second mptcp_for_each-sub()-loop is then iterating from the -+ * beginning of the list up to 'first_tp'. -+ */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ /* We go up to the subflow 'tp' and start from there */ -+ if (tp == mptcp->tp) -+ found = 1; -+ -+ if (!found) -+ continue; -+ tp = mptcp->tp; -+ -+ if (mptcp_is_available((struct sock *)tp, skb, -+ zero_wnd_test)) { -+ redsched_update_next_subflow(tp, red_cb); -+ return (struct sock *)tp; -+ } -+ } -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ tp = mptcp->tp; -+ -+ if (tp == first_tp) -+ break; -+ -+ if (mptcp_is_available((struct sock *)tp, skb, -+ zero_wnd_test)) { -+ redsched_update_next_subflow(tp, red_cb); -+ return (struct sock *)tp; -+ } -+ } -+ -+ /* No space */ -+ return NULL; -+} -+ -+/* Corrects the stored skb pointers if they are invalid */ -+static void redsched_correct_skb_pointers(struct sock *meta_sk, -+ struct redsched_priv *red_p) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ -+ if (red_p->skb && -+ (!after(red_p->skb_start_seq, meta_tp->snd_una) || -+ after(red_p->skb_end_seq, meta_tp->snd_nxt))) -+ red_p->skb = NULL; -+} -+ -+/* Returns the next skb from the queue */ -+static struct sk_buff *redsched_next_skb_from_queue(struct sk_buff_head *queue, -+ struct sk_buff *previous, -+ struct sock *meta_sk) -+{ -+ struct sk_buff *skb; -+ -+ if (!previous) -+ return tcp_rtx_queue_head(meta_sk) ? : skb_peek(queue); -+ -+ /* sk_data->skb stores the last scheduled packet for this subflow. -+ * If sk_data->skb was scheduled but not sent (e.g., due to nagle), -+ * we have to schedule it again. -+ * -+ * For the redundant scheduler, there are two cases: -+ * 1. sk_data->skb was not sent on another subflow: -+ * we have to schedule it again to ensure that we do not -+ * skip this packet. -+ * 2. sk_data->skb was already sent on another subflow: -+ * with regard to the redundant semantic, we have to -+ * schedule it again. However, we keep it simple and ignore it, -+ * as it was already sent by another subflow. -+ * This might be changed in the future. -+ * -+ * For case 1, send_head is equal previous, as only a single -+ * packet can be skipped. -+ */ -+ if (tcp_send_head(meta_sk) == previous) -+ return tcp_send_head(meta_sk); -+ -+ skb = skb_rb_next(previous); -+ if (skb) -+ return skb; -+ -+ return tcp_send_head(meta_sk); -+} -+ -+static struct sk_buff *mptcp_red_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit) -+{ -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ struct mptcp_cb *mpcb = meta_tp->mpcb; -+ struct redsched_cb *red_cb = redsched_get_cb(meta_tp); -+ struct tcp_sock *first_tp = red_cb->next_subflow, *tp; -+ struct mptcp_tcp_sock *mptcp; -+ int active_valid_sks = -1; -+ struct sk_buff *skb; -+ int found = 0; -+ -+ /* As we set it, we have to reset it as well. */ -+ *limit = 0; -+ -+ if (skb_queue_empty(&mpcb->reinject_queue) && -+ skb_queue_empty(&meta_sk->sk_write_queue) && -+ tcp_rtx_queue_empty(meta_sk)) -+ /* Nothing to send */ -+ return NULL; -+ -+ /* First try reinjections */ -+ skb = skb_peek(&mpcb->reinject_queue); -+ if (skb) { -+ *subsk = get_available_subflow(meta_sk, skb, false); -+ if (!*subsk) -+ return NULL; -+ *reinject = 1; -+ return skb; -+ } -+ -+ /* Then try indistinctly redundant and normal skbs */ -+ -+ if (!first_tp && !hlist_empty(&mpcb->conn_list)) { -+ first_tp = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&mpcb->conn_list)), -+ struct mptcp_tcp_sock, node)->tp; -+ } -+ -+ /* still NULL (no subflow in conn_list?) */ -+ if (!first_tp) -+ return NULL; -+ -+ tp = first_tp; -+ -+ *reinject = 0; -+ active_valid_sks = redsched_get_active_valid_sks(meta_sk); -+ -+ /* We want to pick a subflow that is after 'first_tp' in the list of subflows. -+ * Thus, the first mptcp_for_each_sub()-loop tries to walk the list up -+ * to the subflow 'tp' and then checks whether any one of the remaining -+ * ones can send a segment. -+ * The second mptcp_for_each-sub()-loop is then iterating from the -+ * beginning of the list up to 'first_tp'. -+ */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct redsched_priv *red_p; -+ -+ if (tp == mptcp->tp) -+ found = 1; -+ -+ if (!found) -+ continue; -+ -+ tp = mptcp->tp; -+ -+ /* Correct the skb pointers of the current subflow */ -+ red_p = redsched_get_priv(tp); -+ redsched_correct_skb_pointers(meta_sk, red_p); -+ -+ skb = redsched_next_skb_from_queue(&meta_sk->sk_write_queue, -+ red_p->skb, meta_sk); -+ if (skb && redsched_use_subflow(meta_sk, active_valid_sks, tp, -+ skb)) { -+ red_p->skb = skb; -+ red_p->skb_start_seq = TCP_SKB_CB(skb)->seq; -+ red_p->skb_end_seq = TCP_SKB_CB(skb)->end_seq; -+ redsched_update_next_subflow(tp, red_cb); -+ *subsk = (struct sock *)tp; -+ -+ if (TCP_SKB_CB(skb)->path_mask) -+ *reinject = -1; -+ return skb; -+ } -+ } -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct redsched_priv *red_p; -+ -+ tp = mptcp->tp; -+ -+ if (tp == first_tp) -+ break; -+ -+ /* Correct the skb pointers of the current subflow */ -+ red_p = redsched_get_priv(tp); -+ redsched_correct_skb_pointers(meta_sk, red_p); -+ -+ skb = redsched_next_skb_from_queue(&meta_sk->sk_write_queue, -+ red_p->skb, meta_sk); -+ if (skb && redsched_use_subflow(meta_sk, active_valid_sks, tp, -+ skb)) { -+ red_p->skb = skb; -+ red_p->skb_start_seq = TCP_SKB_CB(skb)->seq; -+ red_p->skb_end_seq = TCP_SKB_CB(skb)->end_seq; -+ redsched_update_next_subflow(tp, red_cb); -+ *subsk = (struct sock *)tp; -+ -+ if (TCP_SKB_CB(skb)->path_mask) -+ *reinject = -1; -+ return skb; -+ } -+ } -+ -+ /* Nothing to send */ -+ return NULL; -+} -+ -+static void redsched_release(struct sock *sk) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct redsched_cb *red_cb = redsched_get_cb(tp); -+ -+ /* Check if the next subflow would be the released one. If yes correct -+ * the pointer -+ */ -+ if (red_cb->next_subflow == tp) -+ redsched_update_next_subflow(tp, red_cb); -+} -+ -+static struct mptcp_sched_ops mptcp_sched_red = { -+ .get_subflow = red_get_available_subflow, -+ .next_segment = mptcp_red_next_segment, -+ .release = redsched_release, -+ .name = "redundant", -+ .owner = THIS_MODULE, -+}; -+ -+static int __init red_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct redsched_priv) > MPTCP_SCHED_SIZE); -+ BUILD_BUG_ON(sizeof(struct redsched_cb) > MPTCP_SCHED_DATA_SIZE); -+ -+ if (mptcp_register_scheduler(&mptcp_sched_red)) -+ return -1; -+ -+ return 0; -+} -+ -+static void red_unregister(void) -+{ -+ mptcp_unregister_scheduler(&mptcp_sched_red); -+} -+ -+module_init(red_register); -+module_exit(red_unregister); -+ -+MODULE_AUTHOR("Tobias Erbshaeusser, Alexander Froemmgen"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("REDUNDANT MPTCP"); -+MODULE_VERSION("0.90"); -diff --git a/net/mptcp/mptcp_rr.c b/net/mptcp/mptcp_rr.c -new file mode 100644 -index 000000000000..396e8aaf4762 ---- /dev/null -+++ b/net/mptcp/mptcp_rr.c -@@ -0,0 +1,309 @@ -+/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ -+ -+#include -+#include -+ -+static unsigned char num_segments __read_mostly = 1; -+module_param(num_segments, byte, 0644); -+MODULE_PARM_DESC(num_segments, "The number of consecutive segments that are part of a burst"); -+ -+static bool cwnd_limited __read_mostly = 1; -+module_param(cwnd_limited, bool, 0644); -+MODULE_PARM_DESC(cwnd_limited, "if set to 1, the scheduler tries to fill the congestion-window on all subflows"); -+ -+struct rrsched_priv { -+ unsigned char quota; -+}; -+ -+static struct rrsched_priv *rrsched_get_priv(const struct tcp_sock *tp) -+{ -+ return (struct rrsched_priv *)&tp->mptcp->mptcp_sched[0]; -+} -+ -+/* If the sub-socket sk available to send the skb? */ -+static bool mptcp_rr_is_available(const struct sock *sk, const struct sk_buff *skb, -+ bool zero_wnd_test, bool cwnd_test) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ unsigned int space, in_flight; -+ -+ /* Set of states for which we are allowed to send data */ -+ if (!mptcp_sk_can_send(sk)) -+ return false; -+ -+ /* We do not send data on this subflow unless it is -+ * fully established, i.e. the 4th ack has been received. -+ */ -+ if (tp->mptcp->pre_established) -+ return false; -+ -+ if (tp->pf) -+ return false; -+ -+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) { -+ /* If SACK is disabled, and we got a loss, TCP does not exit -+ * the loss-state until something above high_seq has been acked. -+ * (see tcp_try_undo_recovery) -+ * -+ * high_seq is the snd_nxt at the moment of the RTO. As soon -+ * as we have an RTO, we won't push data on the subflow. -+ * Thus, snd_una can never go beyond high_seq. -+ */ -+ if (!tcp_is_reno(tp)) -+ return false; -+ else if (tp->snd_una != tp->high_seq) -+ return false; -+ } -+ -+ if (!tp->mptcp->fully_established) { -+ /* Make sure that we send in-order data */ -+ if (skb && tp->mptcp->second_packet && -+ tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq) -+ return false; -+ } -+ -+ if (!cwnd_test) -+ goto zero_wnd_test; -+ -+ in_flight = tcp_packets_in_flight(tp); -+ /* Not even a single spot in the cwnd */ -+ if (in_flight >= tp->snd_cwnd) -+ return false; -+ -+ /* Now, check if what is queued in the subflow's send-queue -+ * already fills the cwnd. -+ */ -+ space = (tp->snd_cwnd - in_flight) * tp->mss_cache; -+ -+ if (tp->write_seq - tp->snd_nxt > space) -+ return false; -+ -+zero_wnd_test: -+ if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp))) -+ return false; -+ -+ return true; -+} -+ -+/* Are we not allowed to reinject this skb on tp? */ -+static int mptcp_rr_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb) -+{ -+ /* If the skb has already been enqueued in this sk, try to find -+ * another one. -+ */ -+ return skb && -+ /* Has the skb already been enqueued into this subsocket? */ -+ mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask; -+} -+ -+/* We just look for any subflow that is available */ -+static struct sock *rr_get_available_subflow(struct sock *meta_sk, -+ struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sock *sk = NULL, *bestsk = NULL, *backupsk = NULL; -+ struct mptcp_tcp_sock *mptcp; -+ -+ /* Answer data_fin on same subflow!!! */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && -+ skb && mptcp_is_data_fin(skb)) { -+ mptcp_for_each_sub(mpcb, mptcp) { -+ sk = mptcp_to_sock(mptcp); -+ if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index && -+ mptcp_rr_is_available(sk, skb, zero_wnd_test, true)) -+ return sk; -+ } -+ } -+ -+ /* First, find the best subflow */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct tcp_sock *tp; -+ -+ sk = mptcp_to_sock(mptcp); -+ tp = tcp_sk(sk); -+ -+ if (!mptcp_rr_is_available(sk, skb, zero_wnd_test, true)) -+ continue; -+ -+ if (mptcp_rr_dont_reinject_skb(tp, skb)) { -+ backupsk = sk; -+ continue; -+ } -+ -+ bestsk = sk; -+ } -+ -+ if (bestsk) { -+ sk = bestsk; -+ } else if (backupsk) { -+ /* It has been sent on all subflows once - let's give it a -+ * chance again by restarting its pathmask. -+ */ -+ if (skb) -+ TCP_SKB_CB(skb)->path_mask = 0; -+ sk = backupsk; -+ } -+ -+ return sk; -+} -+ -+/* Returns the next segment to be sent from the mptcp meta-queue. -+ * (chooses the reinject queue if any segment is waiting in it, otherwise, -+ * chooses the normal write queue). -+ * Sets *@reinject to 1 if the returned segment comes from the -+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, -+ * and sets it to -1 if it is a meta-level retransmission to optimize the -+ * receive-buffer. -+ */ -+static struct sk_buff *__mptcp_rr_next_segment(const struct sock *meta_sk, int *reinject) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sk_buff *skb = NULL; -+ -+ *reinject = 0; -+ -+ /* If we are in fallback-mode, just take from the meta-send-queue */ -+ if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) -+ return tcp_send_head(meta_sk); -+ -+ skb = skb_peek(&mpcb->reinject_queue); -+ -+ if (skb) -+ *reinject = 1; -+ else -+ skb = tcp_send_head(meta_sk); -+ return skb; -+} -+ -+static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sock *choose_sk = NULL; -+ struct mptcp_tcp_sock *mptcp; -+ struct sk_buff *skb = __mptcp_rr_next_segment(meta_sk, reinject); -+ unsigned char split = num_segments; -+ unsigned char iter = 0, full_subs = 0; -+ -+ /* As we set it, we have to reset it as well. */ -+ *limit = 0; -+ -+ if (!skb) -+ return NULL; -+ -+ if (*reinject) { -+ *subsk = rr_get_available_subflow(meta_sk, skb, false); -+ if (!*subsk) -+ return NULL; -+ -+ return skb; -+ } -+ -+retry: -+ -+ /* First, we look for a subflow who is currently being used */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp_it = tcp_sk(sk_it); -+ struct rrsched_priv *rr_p = rrsched_get_priv(tp_it); -+ -+ if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited)) -+ continue; -+ -+ iter++; -+ -+ /* Is this subflow currently being used? */ -+ if (rr_p->quota > 0 && rr_p->quota < num_segments) { -+ split = num_segments - rr_p->quota; -+ choose_sk = sk_it; -+ goto found; -+ } -+ -+ /* Or, it's totally unused */ -+ if (!rr_p->quota) { -+ split = num_segments; -+ choose_sk = sk_it; -+ } -+ -+ /* Or, it must then be fully used */ -+ if (rr_p->quota >= num_segments) -+ full_subs++; -+ } -+ -+ /* All considered subflows have a full quota, and we considered at -+ * least one. -+ */ -+ if (iter && iter == full_subs) { -+ /* So, we restart this round by setting quota to 0 and retry -+ * to find a subflow. -+ */ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk_it = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp_it = tcp_sk(sk_it); -+ struct rrsched_priv *rr_p = rrsched_get_priv(tp_it); -+ -+ if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited)) -+ continue; -+ -+ rr_p->quota = 0; -+ } -+ -+ goto retry; -+ } -+ -+found: -+ if (choose_sk) { -+ unsigned int mss_now; -+ struct tcp_sock *choose_tp = tcp_sk(choose_sk); -+ struct rrsched_priv *rr_p = rrsched_get_priv(choose_tp); -+ -+ if (!mptcp_rr_is_available(choose_sk, skb, false, true)) -+ return NULL; -+ -+ *subsk = choose_sk; -+ mss_now = tcp_current_mss(*subsk); -+ *limit = split * mss_now; -+ -+ if (skb->len > mss_now) -+ rr_p->quota += DIV_ROUND_UP(skb->len, mss_now); -+ else -+ rr_p->quota++; -+ -+ return skb; -+ } -+ -+ return NULL; -+} -+ -+static struct mptcp_sched_ops mptcp_sched_rr = { -+ .get_subflow = rr_get_available_subflow, -+ .next_segment = mptcp_rr_next_segment, -+ .name = "roundrobin", -+ .owner = THIS_MODULE, -+}; -+ -+static int __init rr_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct rrsched_priv) > MPTCP_SCHED_SIZE); -+ -+ if (mptcp_register_scheduler(&mptcp_sched_rr)) -+ return -1; -+ -+ return 0; -+} -+ -+static void rr_unregister(void) -+{ -+ mptcp_unregister_scheduler(&mptcp_sched_rr); -+} -+ -+module_init(rr_register); -+module_exit(rr_unregister); -+ -+MODULE_AUTHOR("Christoph Paasch"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("ROUNDROBIN MPTCP"); -+MODULE_VERSION("0.89"); -diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c -new file mode 100644 -index 000000000000..eed9bfb44b59 ---- /dev/null -+++ b/net/mptcp/mptcp_sched.c -@@ -0,0 +1,677 @@ -+/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ -+ -+#include -+#include -+#include -+#include -+ -+static DEFINE_SPINLOCK(mptcp_sched_list_lock); -+static LIST_HEAD(mptcp_sched_list); -+ -+struct defsched_priv { -+ u32 last_rbuf_opti; -+}; -+ -+static struct defsched_priv *defsched_get_priv(const struct tcp_sock *tp) -+{ -+ return (struct defsched_priv *)&tp->mptcp->mptcp_sched[0]; -+} -+ -+bool mptcp_is_def_unavailable(struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ -+ /* Set of states for which we are allowed to send data */ -+ if (!mptcp_sk_can_send(sk)) -+ return true; -+ -+ /* We do not send data on this subflow unless it is -+ * fully established, i.e. the 4th ack has been received. -+ */ -+ if (tp->mptcp->pre_established) -+ return true; -+ -+ if (tp->pf) -+ return true; -+ -+ return false; -+} -+EXPORT_SYMBOL_GPL(mptcp_is_def_unavailable); -+ -+/* estimate number of segments currently in flight + unsent in -+ * the subflow socket. -+ */ -+static int mptcp_subflow_queued(struct sock *sk, u32 max_tso_segs) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ unsigned int queued; -+ -+ /* estimate the max number of segments in the write queue -+ * this is an overestimation, avoiding to iterate over the queue -+ * to make a better estimation. -+ * Having only one skb in the queue however might trigger tso deferral, -+ * delaying the sending of a tso segment in the hope that skb_entail -+ * will append more data to the skb soon. -+ * Therefore, in the case only one skb is in the queue, we choose to -+ * potentially underestimate, risking to schedule one skb too many onto -+ * the subflow rather than not enough. -+ */ -+ if (sk->sk_write_queue.qlen > 1) -+ queued = sk->sk_write_queue.qlen * max_tso_segs; -+ else -+ queued = sk->sk_write_queue.qlen; -+ -+ return queued + tcp_packets_in_flight(tp); -+} -+ -+static bool mptcp_is_temp_unavailable(struct sock *sk, -+ const struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ unsigned int mss_now; -+ -+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) { -+ /* If SACK is disabled, and we got a loss, TCP does not exit -+ * the loss-state until something above high_seq has been -+ * acked. (see tcp_try_undo_recovery) -+ * -+ * high_seq is the snd_nxt at the moment of the RTO. As soon -+ * as we have an RTO, we won't push data on the subflow. -+ * Thus, snd_una can never go beyond high_seq. -+ */ -+ if (!tcp_is_reno(tp)) -+ return true; -+ else if (tp->snd_una != tp->high_seq) -+ return true; -+ } -+ -+ if (!tp->mptcp->fully_established) { -+ /* Make sure that we send in-order data */ -+ if (skb && tp->mptcp->second_packet && -+ tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq) -+ return true; -+ } -+ -+ mss_now = tcp_current_mss(sk); -+ -+ /* Not even a single spot in the cwnd */ -+ if (mptcp_subflow_queued(sk, tcp_tso_segs(sk, mss_now)) >= tp->snd_cwnd) -+ return true; -+ -+ if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp))) -+ return true; -+ -+ /* Don't send on this subflow if we bypass the allowed send-window at -+ * the per-subflow level. Similar to tcp_snd_wnd_test, but manually -+ * calculated end_seq (because here at this point end_seq is still at -+ * the meta-level). -+ */ -+ if (skb && zero_wnd_test && -+ after(tp->write_seq + min(skb->len, mss_now), tcp_wnd_end(tp))) -+ return true; -+ -+ return false; -+} -+ -+/* Is the sub-socket sk available to send the skb? */ -+bool mptcp_is_available(struct sock *sk, const struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ return !mptcp_is_def_unavailable(sk) && -+ !mptcp_is_temp_unavailable(sk, skb, zero_wnd_test); -+} -+EXPORT_SYMBOL_GPL(mptcp_is_available); -+ -+/* Are we not allowed to reinject this skb on tp? */ -+static int mptcp_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb) -+{ -+ /* If the skb has already been enqueued in this sk, try to find -+ * another one. -+ */ -+ return skb && -+ /* Has the skb already been enqueued into this subsocket? */ -+ mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask; -+} -+ -+bool subflow_is_backup(const struct tcp_sock *tp) -+{ -+ return tp->mptcp->rcv_low_prio || tp->mptcp->low_prio; -+} -+EXPORT_SYMBOL_GPL(subflow_is_backup); -+ -+bool subflow_is_active(const struct tcp_sock *tp) -+{ -+ return !tp->mptcp->rcv_low_prio && !tp->mptcp->low_prio; -+} -+EXPORT_SYMBOL_GPL(subflow_is_active); -+ -+/* Generic function to iterate over used and unused subflows and to select the -+ * best one -+ */ -+static struct sock -+*get_subflow_from_selectors(struct mptcp_cb *mpcb, struct sk_buff *skb, -+ bool (*selector)(const struct tcp_sock *), -+ bool zero_wnd_test, bool *force) -+{ -+ struct sock *bestsk = NULL; -+ u32 min_srtt = 0xffffffff; -+ bool found_unused = false; -+ bool found_unused_una = false; -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sk = mptcp_to_sock(mptcp); -+ struct tcp_sock *tp = tcp_sk(sk); -+ bool unused = false; -+ -+ /* First, we choose only the wanted sks */ -+ if (!(*selector)(tp)) -+ continue; -+ -+ if (!mptcp_dont_reinject_skb(tp, skb)) -+ unused = true; -+ else if (found_unused) -+ /* If a unused sk was found previously, we continue - -+ * no need to check used sks anymore. -+ */ -+ continue; -+ -+ if (mptcp_is_def_unavailable(sk)) -+ continue; -+ -+ if (mptcp_is_temp_unavailable(sk, skb, zero_wnd_test)) { -+ if (unused) -+ found_unused_una = true; -+ continue; -+ } -+ -+ if (unused) { -+ if (!found_unused) { -+ /* It's the first time we encounter an unused -+ * sk - thus we reset the bestsk (which might -+ * have been set to a used sk). -+ */ -+ min_srtt = 0xffffffff; -+ bestsk = NULL; -+ } -+ found_unused = true; -+ } -+ -+ if (tp->srtt_us < min_srtt) { -+ min_srtt = tp->srtt_us; -+ bestsk = sk; -+ } -+ } -+ -+ if (bestsk) { -+ /* The force variable is used to mark the returned sk as -+ * previously used or not-used. -+ */ -+ if (found_unused) -+ *force = true; -+ else -+ *force = false; -+ } else { -+ /* The force variable is used to mark if there are temporally -+ * unavailable not-used sks. -+ */ -+ if (found_unused_una) -+ *force = true; -+ else -+ *force = false; -+ } -+ -+ return bestsk; -+} -+ -+/* This is the scheduler. This function decides on which flow to send -+ * a given MSS. If all subflows are found to be busy, NULL is returned -+ * The flow is selected based on the shortest RTT. -+ * If all paths have full cong windows, we simply return NULL. -+ * -+ * Additionally, this function is aware of the backup-subflows. -+ */ -+struct sock *get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, -+ bool zero_wnd_test) -+{ -+ struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sock *sk; -+ bool looping = false, force; -+ -+ /* Answer data_fin on same subflow!!! */ -+ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && -+ skb && mptcp_is_data_fin(skb)) { -+ struct mptcp_tcp_sock *mptcp; -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ sk = mptcp_to_sock(mptcp); -+ -+ if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index && -+ mptcp_is_available(sk, skb, zero_wnd_test)) -+ return sk; -+ } -+ } -+ -+ /* Find the best subflow */ -+restart: -+ sk = get_subflow_from_selectors(mpcb, skb, &subflow_is_active, -+ zero_wnd_test, &force); -+ if (force) -+ /* one unused active sk or one NULL sk when there is at least -+ * one temporally unavailable unused active sk -+ */ -+ return sk; -+ -+ sk = get_subflow_from_selectors(mpcb, skb, &subflow_is_backup, -+ zero_wnd_test, &force); -+ if (!force && skb) { -+ /* one used backup sk or one NULL sk where there is no one -+ * temporally unavailable unused backup sk -+ * -+ * the skb passed through all the available active and backups -+ * sks, so clean the path mask -+ */ -+ TCP_SKB_CB(skb)->path_mask = 0; -+ -+ if (!looping) { -+ looping = true; -+ goto restart; -+ } -+ } -+ return sk; -+} -+EXPORT_SYMBOL_GPL(get_available_subflow); -+ -+static struct sk_buff *mptcp_rcv_buf_optimization(struct sock *sk, int penal) -+{ -+ struct sock *meta_sk; -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct mptcp_tcp_sock *mptcp; -+ struct sk_buff *skb_head; -+ struct defsched_priv *def_p = defsched_get_priv(tp); -+ -+ meta_sk = mptcp_meta_sk(sk); -+ skb_head = tcp_rtx_queue_head(meta_sk); -+ -+ if (!skb_head) -+ return NULL; -+ -+ /* If penalization is optional (coming from mptcp_next_segment() and -+ * We are not send-buffer-limited we do not penalize. The retransmission -+ * is just an optimization to fix the idle-time due to the delay before -+ * we wake up the application. -+ */ -+ if (!penal && sk_stream_memory_free(meta_sk)) -+ goto retrans; -+ -+ /* Only penalize again after an RTT has elapsed */ -+ if (tcp_jiffies32 - def_p->last_rbuf_opti < usecs_to_jiffies(tp->srtt_us >> 3)) -+ goto retrans; -+ -+ /* Half the cwnd of the slow flows */ -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) { -+ u32 prior_cwnd = tp_it->snd_cwnd; -+ -+ tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U); -+ -+ /* If in slow start, do not reduce the ssthresh */ -+ if (prior_cwnd >= tp_it->snd_ssthresh) -+ tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U); -+ -+ def_p->last_rbuf_opti = tcp_jiffies32; -+ } -+ } -+ } -+ -+retrans: -+ -+ /* Segment not yet injected into this path? Take it!!! */ -+ if (!(TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp->mptcp->path_index))) { -+ bool do_retrans = false; -+ mptcp_for_each_sub(tp->mpcb, mptcp) { -+ struct tcp_sock *tp_it = mptcp->tp; -+ -+ if (tp_it != tp && -+ TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { -+ if (tp_it->snd_cwnd <= 4) { -+ do_retrans = true; -+ break; -+ } -+ -+ if (4 * tp->srtt_us >= tp_it->srtt_us) { -+ do_retrans = false; -+ break; -+ } else { -+ do_retrans = true; -+ } -+ } -+ } -+ -+ if (do_retrans && mptcp_is_available(sk, skb_head, false)) { -+ trace_mptcp_retransmit(sk, skb_head); -+ return skb_head; -+ } -+ } -+ return NULL; -+} -+ -+/* Returns the next segment to be sent from the mptcp meta-queue. -+ * (chooses the reinject queue if any segment is waiting in it, otherwise, -+ * chooses the normal write queue). -+ * Sets *@reinject to 1 if the returned segment comes from the -+ * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, -+ * and sets it to -1 if it is a meta-level retransmission to optimize the -+ * receive-buffer. -+ */ -+static struct sk_buff *__mptcp_next_segment(struct sock *meta_sk, int *reinject) -+{ -+ const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; -+ struct sk_buff *skb = NULL; -+ -+ *reinject = 0; -+ -+ /* If we are in fallback-mode, just take from the meta-send-queue */ -+ if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) -+ return tcp_send_head(meta_sk); -+ -+ skb = skb_peek(&mpcb->reinject_queue); -+ -+ if (skb) { -+ *reinject = 1; -+ } else { -+ skb = tcp_send_head(meta_sk); -+ -+ if (!skb && meta_sk->sk_socket && -+ test_bit(SOCK_NOSPACE, &meta_sk->sk_socket->flags) && -+ sk_stream_wspace(meta_sk) < sk_stream_min_wspace(meta_sk)) { -+ struct sock *subsk; -+ -+ /* meta is send buffer limited */ -+ tcp_chrono_start(meta_sk, TCP_CHRONO_SNDBUF_LIMITED); -+ -+ subsk = mpcb->sched_ops->get_subflow(meta_sk, -+ NULL, false); -+ if (!subsk) -+ return NULL; -+ -+ skb = mptcp_rcv_buf_optimization(subsk, 0); -+ if (skb) -+ *reinject = -1; -+ else -+ tcp_chrono_start(subsk, -+ TCP_CHRONO_SNDBUF_LIMITED); -+ } -+ } -+ return skb; -+} -+ -+struct sk_buff *mptcp_next_segment(struct sock *meta_sk, -+ int *reinject, -+ struct sock **subsk, -+ unsigned int *limit) -+{ -+ struct sk_buff *skb = __mptcp_next_segment(meta_sk, reinject); -+ unsigned int mss_now; -+ u32 max_len, gso_max_segs, max_segs, max_tso_segs, window; -+ struct tcp_sock *subtp; -+ int queued; -+ -+ /* As we set it, we have to reset it as well. */ -+ *limit = 0; -+ -+ if (!skb) -+ return NULL; -+ -+ *subsk = tcp_sk(meta_sk)->mpcb->sched_ops->get_subflow(meta_sk, skb, false); -+ if (!*subsk) -+ return NULL; -+ -+ subtp = tcp_sk(*subsk); -+ mss_now = tcp_current_mss(*subsk); -+ -+ if (!*reinject && unlikely(!tcp_snd_wnd_test(tcp_sk(meta_sk), skb, mss_now))) { -+ /* an active flow is selected, but segment will not be sent due -+ * to no more space in send window -+ * this means the meta is receive window limited -+ * the subflow might also be, if we have nothing to reinject -+ */ -+ tcp_chrono_start(meta_sk, TCP_CHRONO_RWND_LIMITED); -+ skb = mptcp_rcv_buf_optimization(*subsk, 1); -+ if (skb) -+ *reinject = -1; -+ else -+ return NULL; -+ } -+ -+ if (!*reinject) { -+ /* this will stop any other chronos on the meta */ -+ tcp_chrono_start(meta_sk, TCP_CHRONO_BUSY); -+ } -+ -+ /* No splitting required, as we will only send one single segment */ -+ if (skb->len <= mss_now) -+ return skb; -+ -+ max_tso_segs = tcp_tso_segs(*subsk, tcp_current_mss(*subsk)); -+ queued = mptcp_subflow_queued(*subsk, max_tso_segs); -+ -+ /* this condition should already have been established in -+ * mptcp_is_temp_unavailable when selecting available flows -+ */ -+ WARN_ONCE(subtp->snd_cwnd <= queued, "Selected subflow no cwnd room"); -+ -+ gso_max_segs = (*subsk)->sk_gso_max_segs; -+ if (!gso_max_segs) /* No gso supported on the subflow's NIC */ -+ gso_max_segs = 1; -+ -+ max_segs = min_t(unsigned int, subtp->snd_cwnd - queued, gso_max_segs); -+ if (!max_segs) -+ return NULL; -+ -+ /* if there is room for a segment, schedule up to a complete TSO -+ * segment to avoid TSO splitting. Even if it is more than allowed by -+ * the congestion window. -+ */ -+ max_segs = max_t(unsigned int, max_tso_segs, max_segs); -+ -+ max_len = min(mss_now * max_segs, skb->len); -+ -+ window = tcp_wnd_end(subtp) - subtp->write_seq; -+ -+ /* max_len now also respects the announced receive-window */ -+ max_len = min(max_len, window); -+ -+ *limit = max_len; -+ -+ return skb; -+} -+EXPORT_SYMBOL_GPL(mptcp_next_segment); -+ -+static void defsched_init(struct sock *sk) -+{ -+ struct defsched_priv *def_p = defsched_get_priv(tcp_sk(sk)); -+ -+ def_p->last_rbuf_opti = tcp_jiffies32; -+} -+ -+struct mptcp_sched_ops mptcp_sched_default = { -+ .get_subflow = get_available_subflow, -+ .next_segment = mptcp_next_segment, -+ .init = defsched_init, -+ .name = "default", -+ .owner = THIS_MODULE, -+}; -+ -+static struct mptcp_sched_ops *mptcp_sched_find(const char *name) -+{ -+ struct mptcp_sched_ops *e; -+ -+ list_for_each_entry_rcu(e, &mptcp_sched_list, list) { -+ if (strcmp(e->name, name) == 0) -+ return e; -+ } -+ -+ return NULL; -+} -+ -+int mptcp_register_scheduler(struct mptcp_sched_ops *sched) -+{ -+ int ret = 0; -+ -+ if (!sched->get_subflow || !sched->next_segment) -+ return -EINVAL; -+ -+ spin_lock(&mptcp_sched_list_lock); -+ if (mptcp_sched_find(sched->name)) { -+ pr_notice("%s already registered\n", sched->name); -+ ret = -EEXIST; -+ } else { -+ list_add_tail_rcu(&sched->list, &mptcp_sched_list); -+ pr_info("%s registered\n", sched->name); -+ } -+ spin_unlock(&mptcp_sched_list_lock); -+ -+ return ret; -+} -+EXPORT_SYMBOL_GPL(mptcp_register_scheduler); -+ -+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched) -+{ -+ spin_lock(&mptcp_sched_list_lock); -+ list_del_rcu(&sched->list); -+ spin_unlock(&mptcp_sched_list_lock); -+ -+ /* Wait for outstanding readers to complete before the -+ * module gets removed entirely. -+ * -+ * A try_module_get() should fail by now as our module is -+ * in "going" state since no refs are held anymore and -+ * module_exit() handler being called. -+ */ -+ synchronize_rcu(); -+} -+EXPORT_SYMBOL_GPL(mptcp_unregister_scheduler); -+ -+void mptcp_get_default_scheduler(char *name) -+{ -+ struct mptcp_sched_ops *sched; -+ -+ BUG_ON(list_empty(&mptcp_sched_list)); -+ -+ rcu_read_lock(); -+ sched = list_entry(mptcp_sched_list.next, struct mptcp_sched_ops, list); -+ strncpy(name, sched->name, MPTCP_SCHED_NAME_MAX); -+ rcu_read_unlock(); -+} -+ -+int mptcp_set_default_scheduler(const char *name) -+{ -+ struct mptcp_sched_ops *sched; -+ int ret = -ENOENT; -+ -+ spin_lock(&mptcp_sched_list_lock); -+ sched = mptcp_sched_find(name); -+#ifdef CONFIG_MODULES -+ if (!sched && capable(CAP_NET_ADMIN)) { -+ spin_unlock(&mptcp_sched_list_lock); -+ -+ request_module("mptcp_%s", name); -+ spin_lock(&mptcp_sched_list_lock); -+ sched = mptcp_sched_find(name); -+ } -+#endif -+ -+ if (sched) { -+ list_move(&sched->list, &mptcp_sched_list); -+ ret = 0; -+ } else { -+ pr_info("%s is not available\n", name); -+ } -+ spin_unlock(&mptcp_sched_list_lock); -+ -+ return ret; -+} -+ -+/* Must be called with rcu lock held */ -+static struct mptcp_sched_ops *__mptcp_sched_find_autoload(const char *name) -+{ -+ struct mptcp_sched_ops *sched = mptcp_sched_find(name); -+#ifdef CONFIG_MODULES -+ if (!sched && capable(CAP_NET_ADMIN)) { -+ rcu_read_unlock(); -+ request_module("mptcp_%s", name); -+ rcu_read_lock(); -+ sched = mptcp_sched_find(name); -+ } -+#endif -+ return sched; -+} -+ -+void mptcp_init_scheduler(struct mptcp_cb *mpcb) -+{ -+ struct mptcp_sched_ops *sched; -+ struct sock *meta_sk = mpcb->meta_sk; -+ struct tcp_sock *meta_tp = tcp_sk(meta_sk); -+ -+ rcu_read_lock(); -+ /* if scheduler was set using socket option */ -+ if (meta_tp->mptcp_sched_setsockopt) { -+ sched = __mptcp_sched_find_autoload(meta_tp->mptcp_sched_name); -+ if (sched && try_module_get(sched->owner)) { -+ mpcb->sched_ops = sched; -+ goto out; -+ } -+ } -+ -+ list_for_each_entry_rcu(sched, &mptcp_sched_list, list) { -+ if (try_module_get(sched->owner)) { -+ mpcb->sched_ops = sched; -+ break; -+ } -+ } -+out: -+ rcu_read_unlock(); -+} -+ -+/* Change scheduler for socket */ -+int mptcp_set_scheduler(struct sock *sk, const char *name) -+{ -+ struct mptcp_sched_ops *sched; -+ int err = 0; -+ -+ rcu_read_lock(); -+ sched = __mptcp_sched_find_autoload(name); -+ -+ if (!sched) { -+ err = -ENOENT; -+ } else if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { -+ err = -EPERM; -+ } else { -+ strcpy(tcp_sk(sk)->mptcp_sched_name, name); -+ tcp_sk(sk)->mptcp_sched_setsockopt = 1; -+ } -+ rcu_read_unlock(); -+ -+ return err; -+} -+ -+/* Manage refcounts on socket close. */ -+void mptcp_cleanup_scheduler(struct mptcp_cb *mpcb) -+{ -+ module_put(mpcb->sched_ops->owner); -+} -+ -+/* Set default value from kernel configuration at bootup */ -+static int __init mptcp_scheduler_default(void) -+{ -+ BUILD_BUG_ON(sizeof(struct defsched_priv) > MPTCP_SCHED_SIZE); -+ -+ return mptcp_set_default_scheduler(CONFIG_DEFAULT_MPTCP_SCHED); -+} -+late_initcall(mptcp_scheduler_default); -diff --git a/net/mptcp/mptcp_wvegas.c b/net/mptcp/mptcp_wvegas.c -new file mode 100644 -index 000000000000..787ddaab98a2 ---- /dev/null -+++ b/net/mptcp/mptcp_wvegas.c -@@ -0,0 +1,271 @@ -+/* -+ * MPTCP implementation - WEIGHTED VEGAS -+ * -+ * Algorithm design: -+ * Yu Cao -+ * Mingwei Xu -+ * Xiaoming Fu -+ * -+ * Implementation: -+ * Yu Cao -+ * Enhuan Dong -+ * -+ * Ported to the official MPTCP-kernel: -+ * Christoph Paasch -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+static int initial_alpha = 2; -+static int total_alpha = 10; -+static int gamma = 1; -+ -+module_param(initial_alpha, int, 0644); -+MODULE_PARM_DESC(initial_alpha, "initial alpha for all subflows"); -+module_param(total_alpha, int, 0644); -+MODULE_PARM_DESC(total_alpha, "total alpha for all subflows"); -+module_param(gamma, int, 0644); -+MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); -+ -+#define MPTCP_WVEGAS_SCALE 16 -+ -+/* wVegas variables */ -+struct wvegas { -+ u32 beg_snd_nxt; /* right edge during last RTT */ -+ u8 doing_wvegas_now;/* if true, do wvegas for this RTT */ -+ -+ u16 cnt_rtt; /* # of RTTs measured within last RTT */ -+ u32 sampled_rtt; /* cumulative RTTs measured within last RTT (in usec) */ -+ u32 base_rtt; /* the min of all wVegas RTT measurements seen (in usec) */ -+ -+ u64 instant_rate; /* cwnd / srtt_us, unit: pkts/us * 2^16 */ -+ u64 weight; /* the ratio of subflow's rate to the total rate, * 2^16 */ -+ int alpha; /* alpha for each subflows */ -+ -+ u32 queue_delay; /* queue delay*/ -+}; -+ -+ -+static inline u64 mptcp_wvegas_scale(u32 val, int scale) -+{ -+ return (u64) val << scale; -+} -+ -+static void wvegas_enable(const struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ struct wvegas *wvegas = inet_csk_ca(sk); -+ -+ wvegas->doing_wvegas_now = 1; -+ -+ wvegas->beg_snd_nxt = tp->snd_nxt; -+ -+ wvegas->cnt_rtt = 0; -+ wvegas->sampled_rtt = 0; -+ -+ wvegas->instant_rate = 0; -+ wvegas->alpha = initial_alpha; -+ wvegas->weight = mptcp_wvegas_scale(1, MPTCP_WVEGAS_SCALE); -+ -+ wvegas->queue_delay = 0; -+} -+ -+static inline void wvegas_disable(const struct sock *sk) -+{ -+ struct wvegas *wvegas = inet_csk_ca(sk); -+ -+ wvegas->doing_wvegas_now = 0; -+} -+ -+static void mptcp_wvegas_init(struct sock *sk) -+{ -+ struct wvegas *wvegas = inet_csk_ca(sk); -+ -+ wvegas->base_rtt = 0x7fffffff; -+ wvegas_enable(sk); -+} -+ -+static inline u64 mptcp_wvegas_rate(u32 cwnd, u32 rtt_us) -+{ -+ return div_u64(mptcp_wvegas_scale(cwnd, MPTCP_WVEGAS_SCALE), rtt_us); -+} -+ -+static void mptcp_wvegas_pkts_acked(struct sock *sk, -+ const struct ack_sample *sample) -+{ -+ struct wvegas *wvegas = inet_csk_ca(sk); -+ u32 vrtt; -+ -+ if (sample->rtt_us < 0) -+ return; -+ -+ vrtt = sample->rtt_us + 1; -+ -+ if (vrtt < wvegas->base_rtt) -+ wvegas->base_rtt = vrtt; -+ -+ wvegas->sampled_rtt += vrtt; -+ wvegas->cnt_rtt++; -+} -+ -+static void mptcp_wvegas_state(struct sock *sk, u8 ca_state) -+{ -+ if (ca_state == TCP_CA_Open) -+ wvegas_enable(sk); -+ else -+ wvegas_disable(sk); -+} -+ -+static void mptcp_wvegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) -+{ -+ if (event == CA_EVENT_CWND_RESTART) { -+ mptcp_wvegas_init(sk); -+ } else if (event == CA_EVENT_LOSS) { -+ struct wvegas *wvegas = inet_csk_ca(sk); -+ wvegas->instant_rate = 0; -+ } -+} -+ -+static inline u32 mptcp_wvegas_ssthresh(const struct tcp_sock *tp) -+{ -+ return min(tp->snd_ssthresh, tp->snd_cwnd); -+} -+ -+static u64 mptcp_wvegas_weight(const struct mptcp_cb *mpcb, const struct sock *sk) -+{ -+ u64 total_rate = 0; -+ const struct wvegas *wvegas = inet_csk_ca(sk); -+ struct mptcp_tcp_sock *mptcp; -+ -+ if (!mpcb) -+ return wvegas->weight; -+ -+ -+ mptcp_for_each_sub(mpcb, mptcp) { -+ struct sock *sub_sk = mptcp_to_sock(mptcp); -+ struct wvegas *sub_wvegas = inet_csk_ca(sub_sk); -+ -+ /* sampled_rtt is initialized by 0 */ -+ if (mptcp_sk_can_send(sub_sk) && (sub_wvegas->sampled_rtt > 0)) -+ total_rate += sub_wvegas->instant_rate; -+ } -+ -+ if (total_rate && wvegas->instant_rate) -+ return div64_u64(mptcp_wvegas_scale(wvegas->instant_rate, MPTCP_WVEGAS_SCALE), total_rate); -+ else -+ return wvegas->weight; -+} -+ -+static void mptcp_wvegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) -+{ -+ struct tcp_sock *tp = tcp_sk(sk); -+ struct wvegas *wvegas = inet_csk_ca(sk); -+ -+ if (!wvegas->doing_wvegas_now) { -+ tcp_reno_cong_avoid(sk, ack, acked); -+ return; -+ } -+ -+ if (after(ack, wvegas->beg_snd_nxt)) { -+ wvegas->beg_snd_nxt = tp->snd_nxt; -+ -+ if (wvegas->cnt_rtt <= 2) { -+ tcp_reno_cong_avoid(sk, ack, acked); -+ } else { -+ u32 rtt, diff, q_delay; -+ u64 target_cwnd; -+ -+ rtt = wvegas->sampled_rtt / wvegas->cnt_rtt; -+ target_cwnd = div_u64(((u64)tp->snd_cwnd * wvegas->base_rtt), rtt); -+ -+ diff = div_u64((u64)tp->snd_cwnd * (rtt - wvegas->base_rtt), rtt); -+ -+ if (diff > gamma && tcp_in_slow_start(tp)) { -+ tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); -+ tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp); -+ -+ } else if (tcp_in_slow_start(tp)) { -+ tcp_slow_start(tp, acked); -+ } else { -+ if (diff >= wvegas->alpha) { -+ wvegas->instant_rate = mptcp_wvegas_rate(tp->snd_cwnd, rtt); -+ wvegas->weight = mptcp_wvegas_weight(tp->mpcb, sk); -+ wvegas->alpha = max(2U, (u32)((wvegas->weight * total_alpha) >> MPTCP_WVEGAS_SCALE)); -+ } -+ if (diff > wvegas->alpha) { -+ tp->snd_cwnd--; -+ tp->snd_ssthresh = mptcp_wvegas_ssthresh(tp); -+ } else if (diff < wvegas->alpha) { -+ tp->snd_cwnd++; -+ } -+ -+ /* Try to drain link queue if needed*/ -+ q_delay = rtt - wvegas->base_rtt; -+ if ((wvegas->queue_delay == 0) || (wvegas->queue_delay > q_delay)) -+ wvegas->queue_delay = q_delay; -+ -+ if (q_delay >= 2 * wvegas->queue_delay) { -+ u32 backoff_factor = div_u64(mptcp_wvegas_scale(wvegas->base_rtt, MPTCP_WVEGAS_SCALE), 2 * rtt); -+ tp->snd_cwnd = ((u64)tp->snd_cwnd * backoff_factor) >> MPTCP_WVEGAS_SCALE; -+ wvegas->queue_delay = 0; -+ } -+ } -+ -+ if (tp->snd_cwnd < 2) -+ tp->snd_cwnd = 2; -+ else if (tp->snd_cwnd > tp->snd_cwnd_clamp) -+ tp->snd_cwnd = tp->snd_cwnd_clamp; -+ -+ tp->snd_ssthresh = tcp_current_ssthresh(sk); -+ } -+ -+ wvegas->cnt_rtt = 0; -+ wvegas->sampled_rtt = 0; -+ } -+ /* Use normal slow start */ -+ else if (tcp_in_slow_start(tp)) -+ tcp_slow_start(tp, acked); -+} -+ -+ -+static struct tcp_congestion_ops mptcp_wvegas __read_mostly = { -+ .init = mptcp_wvegas_init, -+ .ssthresh = tcp_reno_ssthresh, -+ .cong_avoid = mptcp_wvegas_cong_avoid, -+ .undo_cwnd = tcp_reno_undo_cwnd, -+ .pkts_acked = mptcp_wvegas_pkts_acked, -+ .set_state = mptcp_wvegas_state, -+ .cwnd_event = mptcp_wvegas_cwnd_event, -+ -+ .owner = THIS_MODULE, -+ .name = "wvegas", -+}; -+ -+static int __init mptcp_wvegas_register(void) -+{ -+ BUILD_BUG_ON(sizeof(struct wvegas) > ICSK_CA_PRIV_SIZE); -+ tcp_register_congestion_control(&mptcp_wvegas); -+ return 0; -+} -+ -+static void __exit mptcp_wvegas_unregister(void) -+{ -+ tcp_unregister_congestion_control(&mptcp_wvegas); -+} -+ -+module_init(mptcp_wvegas_register); -+module_exit(mptcp_wvegas_unregister); -+ -+MODULE_AUTHOR("Yu Cao, Enhuan Dong"); -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("MPTCP wVegas"); -+MODULE_VERSION("0.1"); -diff --git a/net/socket.c b/net/socket.c -index 94358566c9d1..a26eeeda2b4d 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -91,6 +91,7 @@ - #include - - #include -+#include - #include - #include - -@@ -1339,6 +1340,7 @@ int __sock_create(struct net *net, int family, int type, int protocol, - int err; - struct socket *sock; - const struct net_proto_family *pf; -+ int old_protocol = protocol; - - /* - * Check protocol is in range -@@ -1359,6 +1361,9 @@ int __sock_create(struct net *net, int family, int type, int protocol, - family = PF_PACKET; - } - -+ if (old_protocol == IPPROTO_MPTCP) -+ protocol = IPPROTO_TCP; -+ - err = security_socket_create(family, type, protocol, kern); - if (err) - return err; -@@ -1408,6 +1413,10 @@ int __sock_create(struct net *net, int family, int type, int protocol, - if (err < 0) - goto out_module_put; - -+ if (sysctl_mptcp_enabled && old_protocol == IPPROTO_MPTCP && -+ type == SOCK_STREAM && (family == AF_INET || family == AF_INET6)) -+ mptcp_enable_sock(sock->sk); -+ - /* - * Now to bump the refcnt of the [loadable] module that owns this - * socket at sock_release time we decrement its refcnt. -diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h -index 63038eb23560..7150eb62db86 100644 ---- a/tools/include/uapi/linux/bpf.h -+++ b/tools/include/uapi/linux/bpf.h -@@ -3438,6 +3438,7 @@ enum { - BPF_TCP_LISTEN, - BPF_TCP_CLOSING, /* Now a valid state */ - BPF_TCP_NEW_SYN_RECV, -+ BPF_TCP_RST_WAIT, - - BPF_TCP_MAX_STATES /* Leave at the end! */ - }; \ No newline at end of file + if (dev->flags != old_flags) { ++ /* + pr_info("device %s %s promiscuous mode\n", + dev->name, + dev->flags & IFF_PROMISC ? "entered" : "left"); ++ */ + if (audit_enabled) { + current_uid_gid(&uid, &gid); + audit_log(current->audit_context, GFP_ATOMIC, +--- a/drivers/net/usb/r8152.c 2020-08-13 13:11:25.866435255 +0200 ++++ b/drivers/net/usb/r8152.c 2020-08-13 13:11:51.973994306 +0200 +@@ -2353,7 +2353,7 @@ + + if (netdev->flags & IFF_PROMISC) { + /* Unconditionally log net taps. */ +- netif_notice(tp, link, netdev, "Promiscuous mode enabled\n"); ++ //netif_notice(tp, link, netdev, "Promiscuous mode enabled\n"); + ocp_data |= RCR_AM | RCR_AAP; + mc_filter[1] = 0xffffffff; + mc_filter[0] = 0xffffffff; +--- a/drivers/net/usb/pegasus.c 2020-08-13 13:14:15.519570376 +0200 ++++ b/drivers/net/usb/pegasus.c 2020-08-13 13:14:26.795380006 +0200 +@@ -1031,7 +1031,7 @@ + + if (net->flags & IFF_PROMISC) { + pegasus->eth_regs[EthCtrl2] |= RX_PROMISCUOUS; +- netif_info(pegasus, link, net, "Promiscuous mode enabled\n"); ++ //netif_info(pegasus, link, net, "Promiscuous mode enabled\n"); + } else if (!netdev_mc_empty(net) || (net->flags & IFF_ALLMULTI)) { + pegasus->eth_regs[EthCtrl0] |= RX_MULTICAST; + pegasus->eth_regs[EthCtrl2] &= ~RX_PROMISCUOUS; +--- a/drivers/net/ethernet/realtek/r8169_main.c 2020-08-13 13:15:44.478068638 +0200 ++++ b/drivers/net/ethernet/realtek/r8169_main.c 2020-08-13 13:15:59.181820450 +0200 +@@ -4313,7 +4313,7 @@ + + if (dev->flags & IFF_PROMISC) { + /* Unconditionally log net taps. */ +- netif_notice(tp, link, dev, "Promiscuous mode enabled\n"); ++ //netif_notice(tp, link, dev, "Promiscuous mode enabled\n"); + rx_mode |= AcceptAllPhys; + } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT || + dev->flags & IFF_ALLMULTI || diff --git a/root/target/linux/generic/hack-5.4/700-swconfig_switch_drivers.patch b/root/target/linux/generic/hack-5.4/700-swconfig_switch_drivers.patch new file mode 100755 index 00000000..f30ad81e --- /dev/null +++ b/root/target/linux/generic/hack-5.4/700-swconfig_switch_drivers.patch @@ -0,0 +1,135 @@ +From 36e516290611e613aa92996cb4339561452695b4 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:24:23 +0200 +Subject: net: swconfig: adds openwrt switch layer + +Signed-off-by: Felix Fietkau +--- + drivers/net/phy/Kconfig | 83 +++++++++++++++++++++++++++++++++++++++++++++++ + drivers/net/phy/Makefile | 15 +++++++++ + include/uapi/linux/Kbuild | 1 + + 3 files changed, 99 insertions(+) + +--- a/drivers/net/phy/Kconfig ++++ b/drivers/net/phy/Kconfig +@@ -250,6 +250,85 @@ config LED_TRIGGER_PHY + for any speed known to the PHY. + + ++comment "Switch configuration API + drivers" ++ ++config SWCONFIG ++ tristate "Switch configuration API" ++ ---help--- ++ Switch configuration API using netlink. This allows ++ you to configure the VLAN features of certain switches. ++ ++config SWCONFIG_LEDS ++ bool "Switch LED trigger support" ++ depends on (SWCONFIG && LEDS_TRIGGERS) ++ ++config ADM6996_PHY ++ tristate "Driver for ADM6996 switches" ++ select SWCONFIG ++ ---help--- ++ Currently supports the ADM6996FC and ADM6996M switches. ++ Support for FC is very limited. ++ ++config AR8216_PHY ++ tristate "Driver for Atheros AR8216 switches" ++ select ETHERNET_PACKET_MANGLE ++ select SWCONFIG ++ ++config AR8216_PHY_LEDS ++ bool "Atheros AR8216 switch LED support" ++ depends on (AR8216_PHY && LEDS_CLASS) ++ ++source "drivers/net/phy/b53/Kconfig" ++ ++config IP17XX_PHY ++ tristate "Driver for IC+ IP17xx switches" ++ select SWCONFIG ++ ++config MVSWITCH_PHY ++ tristate "Driver for Marvell 88E6060 switches" ++ select ETHERNET_PACKET_MANGLE ++ ++config PSB6970_PHY ++ tristate "Lantiq XWAY Tantos (PSB6970) Ethernet switch" ++ select SWCONFIG ++ select ETHERNET_PACKET_MANGLE ++ ++config RTL8306_PHY ++ tristate "Driver for Realtek RTL8306S switches" ++ select SWCONFIG ++ ++config RTL8366_SMI ++ tristate "Driver for the RTL8366 SMI interface" ++ depends on GPIOLIB ++ ---help--- ++ This module implements the SMI interface protocol which is used ++ by some RTL8366 ethernet switch devices via the generic GPIO API. ++ ++if RTL8366_SMI ++ ++config RTL8366_SMI_DEBUG_FS ++ bool "RTL8366 SMI interface debugfs support" ++ depends on DEBUG_FS ++ default n ++ ++config RTL8366S_PHY ++ tristate "Driver for the Realtek RTL8366S switch" ++ select SWCONFIG ++ ++config RTL8366RB_PHY ++ tristate "Driver for the Realtek RTL8366RB switch" ++ select SWCONFIG ++ ++config RTL8367_PHY ++ tristate "Driver for the Realtek RTL8367R/M switches" ++ select SWCONFIG ++ ++config RTL8367B_PHY ++ tristate "Driver fot the Realtek RTL8367R-VB switch" ++ select SWCONFIG ++ ++endif # RTL8366_SMI ++ + comment "MII PHY device drivers" + + config SFP +--- a/drivers/net/phy/Makefile ++++ b/drivers/net/phy/Makefile +@@ -22,6 +22,20 @@ libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_ + obj-$(CONFIG_PHYLINK) += phylink.o + obj-$(CONFIG_PHYLIB) += libphy.o + ++obj-$(CONFIG_SWCONFIG) += swconfig.o ++obj-$(CONFIG_ADM6996_PHY) += adm6996.o ++obj-$(CONFIG_AR8216_PHY) += ar8216.o ar8327.o ++obj-$(CONFIG_SWCONFIG_B53) += b53/ ++obj-$(CONFIG_IP17XX_PHY) += ip17xx.o ++obj-$(CONFIG_MVSWITCH_PHY) += mvswitch.o ++obj-$(CONFIG_PSB6970_PHY) += psb6970.o ++obj-$(CONFIG_RTL8306_PHY) += rtl8306.o ++obj-$(CONFIG_RTL8366_SMI) += rtl8366_smi.o ++obj-$(CONFIG_RTL8366S_PHY) += rtl8366s.o ++obj-$(CONFIG_RTL8366RB_PHY) += rtl8366rb.o ++obj-$(CONFIG_RTL8367_PHY) += rtl8367.o ++obj-$(CONFIG_RTL8367B_PHY) += rtl8367b.o ++ + obj-$(CONFIG_MDIO_ASPEED) += mdio-aspeed.o + obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o + obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o +--- a/include/linux/platform_data/b53.h ++++ b/include/linux/platform_data/b53.h +@@ -29,6 +29,9 @@ struct b53_platform_data { + u32 chip_id; + u16 enabled_ports; + ++ /* allow to specify an ethX alias */ ++ const char *alias; ++ + /* only used by MMAP'd driver */ + unsigned big_endian:1; + void __iomem *regs; diff --git a/root/target/linux/generic/hack-5.4/703-add_vsc8504_support.patch b/root/target/linux/generic/hack-5.4/703-add_vsc8504_support.patch new file mode 100755 index 00000000..afb6ca6c --- /dev/null +++ b/root/target/linux/generic/hack-5.4/703-add_vsc8504_support.patch @@ -0,0 +1,57 @@ +From: Roman Kuzmitskii +Date: Thu, 05 Nov 2020 02:00:00 +0000 +Subject: [PATCH] net: phy: vitesse: add vsc8504 support + +This patch adds support for vsc8504 phy. +That phy is changed owner: + vitesse -> microsemi -> microchip +So is its driver in kernel was changed and rewritten. + +there is no need to upstream this patch. +this vsc8504 is supported by newer kernels out of box. +support could be enabled by CONFIG_MICROSEMI_PHY. + +Tested-by: Johannes Kimmel +Signed-off-by: Roman Kuzmitskii +--- a/drivers/net/phy/vitesse.c ++++ b/drivers/net/phy/vitesse.c +@@ -61,6 +61,7 @@ + + #define PHY_ID_VSC8234 0x000fc620 + #define PHY_ID_VSC8244 0x000fc6c0 ++#define PHY_ID_VSC8504 0x000704c2 + #define PHY_ID_VSC8572 0x000704d0 + #define PHY_ID_VSC8601 0x00070420 + #define PHY_ID_VSC7385 0x00070450 +@@ -292,6 +293,7 @@ static int vsc82xx_config_intr(struct ph + err = phy_write(phydev, MII_VSC8244_IMASK, + (phydev->drv->phy_id == PHY_ID_VSC8234 || + phydev->drv->phy_id == PHY_ID_VSC8244 || ++ phydev->drv->phy_id == PHY_ID_VSC8504 || + phydev->drv->phy_id == PHY_ID_VSC8572 || + phydev->drv->phy_id == PHY_ID_VSC8601) ? + MII_VSC8244_IMASK_MASK : +@@ -402,6 +404,15 @@ static struct phy_driver vsc82xx_driver[ + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + }, { ++ .phy_id = PHY_ID_VSC8504, ++ .name = "Vitesse VSC8504", ++ .phy_id_mask = 0x000ffff0, ++ /* PHY_GBIT_FEATURES */ ++ .config_init = &vsc824x_config_init, ++ .config_aneg = &vsc82x4_config_aneg, ++ .ack_interrupt = &vsc824x_ack_interrupt, ++ .config_intr = &vsc82xx_config_intr, ++}, { + .phy_id = PHY_ID_VSC8572, + .name = "Vitesse VSC8572", + .phy_id_mask = 0x000ffff0, +@@ -488,6 +499,7 @@ module_phy_driver(vsc82xx_driver); + static struct mdio_device_id __maybe_unused vitesse_tbl[] = { + { PHY_ID_VSC8234, 0x000ffff0 }, + { PHY_ID_VSC8244, 0x000fffc0 }, ++ { PHY_ID_VSC8504, 0x000ffff0 }, + { PHY_ID_VSC8572, 0x000ffff0 }, + { PHY_ID_VSC7385, 0x000ffff0 }, + { PHY_ID_VSC7388, 0x000ffff0 }, diff --git a/root/target/linux/generic/hack-5.4/710-net-dsa-mv88e6xxx-default-VID-1.patch b/root/target/linux/generic/hack-5.4/710-net-dsa-mv88e6xxx-default-VID-1.patch new file mode 100755 index 00000000..5dc5ac68 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/710-net-dsa-mv88e6xxx-default-VID-1.patch @@ -0,0 +1,18 @@ +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -1930,6 +1930,7 @@ static int mv88e6xxx_port_fdb_add(struct + struct mv88e6xxx_chip *chip = ds->priv; + int err; + ++ vid = vid ? : 1; + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, + MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC); +@@ -1944,6 +1945,7 @@ static int mv88e6xxx_port_fdb_del(struct + struct mv88e6xxx_chip *chip = ds->priv; + int err; + ++ vid = vid ? : 1; + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid, 0); + mv88e6xxx_reg_unlock(chip); diff --git a/root/target/linux/generic/hack-5.4/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch b/root/target/linux/generic/hack-5.4/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch new file mode 100755 index 00000000..1da388c8 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/711-net-dsa-mv88e6xxx-disable-ATU-violation.patch @@ -0,0 +1,12 @@ +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -2492,6 +2492,9 @@ static int mv88e6xxx_setup_port(struct m + if (dsa_is_cpu_port(ds, port)) + reg = 0; + ++ /* Disable ATU member violation interrupt */ ++ reg |= MV88E6XXX_PORT_ASSOC_VECTOR_IGNORE_WRONG; ++ + err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR, + reg); + if (err) diff --git a/root/target/linux/generic/hack-5.4/721-phy_packets.patch b/root/target/linux/generic/hack-5.4/721-phy_packets.patch new file mode 100755 index 00000000..89ff8ea4 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/721-phy_packets.patch @@ -0,0 +1,176 @@ +From ffe387740bbe88dd88bbe04d6375902708003d6e Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Fri, 7 Jul 2017 17:25:00 +0200 +Subject: net: add packet mangeling patch + +Signed-off-by: Felix Fietkau +--- + include/linux/netdevice.h | 11 +++++++++++ + include/linux/skbuff.h | 14 ++++---------- + net/Kconfig | 6 ++++++ + net/core/dev.c | 18 ++++++++++++++---- + net/core/skbuff.c | 17 +++++++++++++++++ + net/ethernet/eth.c | 6 ++++++ + 6 files changed, 58 insertions(+), 14 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1540,6 +1540,7 @@ enum netdev_priv_flags { + IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, + IFF_LIVE_RENAME_OK = 1<<30, ++ IFF_NO_IP_ALIGN = 1<<31, + }; + + #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN +@@ -1572,6 +1573,7 @@ enum netdev_priv_flags { + #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE + #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER + #define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK ++#define IFF_NO_IP_ALIGN IFF_NO_IP_ALIGN + + /* Specifies the type of the struct net_device::ml_priv pointer */ + enum netdev_ml_priv_type { +@@ -1882,6 +1884,11 @@ struct net_device { + const struct tlsdev_ops *tlsdev_ops; + #endif + ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ void (*eth_mangle_rx)(struct net_device *dev, struct sk_buff *skb); ++ struct sk_buff *(*eth_mangle_tx)(struct net_device *dev, struct sk_buff *skb); ++#endif ++ + const struct header_ops *header_ops; + + unsigned int flags; +@@ -1964,6 +1971,10 @@ struct net_device { + struct mpls_dev __rcu *mpls_ptr; + #endif + ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ void *phy_ptr; /* PHY device specific data */ ++#endif ++ + /* + * Cache lines mostly used on receive path (including eth_type_trans()) + */ +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2684,6 +2684,10 @@ static inline int pskb_trim(struct sk_bu + return (len < skb->len) ? __pskb_trim(skb, len) : 0; + } + ++extern struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, ++ unsigned int length, gfp_t gfp); ++ ++ + /** + * pskb_trim_unique - remove end from a paged unique (not cloned) buffer + * @skb: buffer to alter +@@ -2815,16 +2819,6 @@ static inline struct sk_buff *dev_alloc_ + } + + +-static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, +- unsigned int length, gfp_t gfp) +-{ +- struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); +- +- if (NET_IP_ALIGN && skb) +- skb_reserve(skb, NET_IP_ALIGN); +- return skb; +-} +- + static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) + { +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -26,6 +26,12 @@ menuconfig NET + + if NET + ++config ETHERNET_PACKET_MANGLE ++ bool ++ help ++ This option can be selected by phy drivers that need to mangle ++ packets going in or out of an ethernet device. ++ + config WANT_COMPAT_NETLINK_MESSAGES + bool + help +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3221,10 +3221,20 @@ static int xmit_one(struct sk_buff *skb, + if (dev_nit_active(dev)) + dev_queue_xmit_nit(skb, dev); + +- len = skb->len; +- trace_net_dev_start_xmit(skb, dev); +- rc = netdev_start_xmit(skb, dev, txq, more); +- trace_net_dev_xmit(skb, rc, dev, len); ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ if (!dev->eth_mangle_tx || ++ (skb = dev->eth_mangle_tx(dev, skb)) != NULL) ++#else ++ if (1) ++#endif ++ { ++ len = skb->len; ++ trace_net_dev_start_xmit(skb, dev); ++ rc = netdev_start_xmit(skb, dev, txq, more); ++ trace_net_dev_xmit(skb, rc, dev, len); ++ } else { ++ rc = NETDEV_TX_OK; ++ } + + return rc; + } +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -60,6 +60,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -549,6 +550,22 @@ skb_fail: + } + EXPORT_SYMBOL(__napi_alloc_skb); + ++struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, ++ unsigned int length, gfp_t gfp) ++{ ++ struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); ++ ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ if (dev && (dev->priv_flags & IFF_NO_IP_ALIGN)) ++ return skb; ++#endif ++ ++ if (NET_IP_ALIGN && skb) ++ skb_reserve(skb, NET_IP_ALIGN); ++ return skb; ++} ++EXPORT_SYMBOL(__netdev_alloc_skb_ip_align); ++ + void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size, unsigned int truesize) + { +--- a/net/ethernet/eth.c ++++ b/net/ethernet/eth.c +@@ -171,6 +171,12 @@ __be16 eth_type_trans(struct sk_buff *sk + const struct ethhdr *eth; + + skb->dev = dev; ++ ++#ifdef CONFIG_ETHERNET_PACKET_MANGLE ++ if (dev->eth_mangle_rx) ++ dev->eth_mangle_rx(dev, skb); ++#endif ++ + skb_reset_mac_header(skb); + + eth = (struct ethhdr *)skb->data; diff --git a/root/target/linux/generic/hack-5.4/760-net-usb-r8152-add-LED-configuration-from-OF.patch b/root/target/linux/generic/hack-5.4/760-net-usb-r8152-add-LED-configuration-from-OF.patch new file mode 100755 index 00000000..a96661c9 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/760-net-usb-r8152-add-LED-configuration-from-OF.patch @@ -0,0 +1,74 @@ +From 82985725e071f2a5735052f18e109a32aeac3a0b Mon Sep 17 00:00:00 2001 +From: David Bauer +Date: Sun, 26 Jul 2020 02:38:31 +0200 +Subject: [PATCH] net: usb: r8152: add LED configuration from OF + +This adds the ability to configure the LED configuration register using +OF. This way, the correct value for board specific LED configuration can +be determined. + +Signed-off-by: David Bauer +--- + drivers/net/usb/r8152.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -4336,6 +4337,22 @@ static void rtl_tally_reset(struct r8152 + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY, ocp_data); + } + ++static int r8152_led_configuration(struct r8152 *tp) ++{ ++ u32 led_data; ++ int ret; ++ ++ ret = of_property_read_u32(tp->udev->dev.of_node, "realtek,led-data", ++ &led_data); ++ ++ if (ret) ++ return ret; ++ ++ ocp_write_word(tp, MCU_TYPE_PLA, PLA_LEDSEL, led_data); ++ ++ return 0; ++} ++ + static void r8152b_init(struct r8152 *tp) + { + u32 ocp_data; +@@ -4377,6 +4394,8 @@ static void r8152b_init(struct r8152 *tp + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); + ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); + ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); ++ ++ r8152_led_configuration(tp); + } + + static void r8153_init(struct r8152 *tp) +@@ -4511,6 +4530,8 @@ static void r8153_init(struct r8152 *tp) + tp->coalesce = COALESCE_SLOW; + break; + } ++ ++ r8152_led_configuration(tp); + } + + static void r8153b_init(struct r8152 *tp) +@@ -4587,6 +4608,8 @@ static void r8153b_init(struct r8152 *tp + rtl_tally_reset(tp); + + tp->coalesce = 15000; /* 15 us */ ++ ++ r8152_led_configuration(tp); + } + + static int rtl8152_pre_reset(struct usb_interface *intf) diff --git a/root/target/linux/generic/hack-5.4/761-dt-bindings-net-add-RTL8152-binding-documentation.patch b/root/target/linux/generic/hack-5.4/761-dt-bindings-net-add-RTL8152-binding-documentation.patch new file mode 100755 index 00000000..be262b99 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/761-dt-bindings-net-add-RTL8152-binding-documentation.patch @@ -0,0 +1,54 @@ +From 3ee05f4aa64fc86af3be5bc176ba5808de9260a7 Mon Sep 17 00:00:00 2001 +From: David Bauer +Date: Sun, 26 Jul 2020 15:30:33 +0200 +Subject: [PATCH] dt-bindings: net: add RTL8152 binding documentation + +Add binding documentation for the Realtek RTL8152 / RTL8153 USB ethernet +adapters. + +Signed-off-by: David Bauer +--- + .../bindings/net/realtek,rtl8152.yaml | 36 +++++++++++++++++++ + 1 file changed, 36 insertions(+) + create mode 100644 Documentation/devicetree/bindings/net/realtek,rtl8152.yaml + +--- /dev/null ++++ b/Documentation/devicetree/bindings/net/realtek,rtl8152.yaml +@@ -0,0 +1,36 @@ ++# SPDX-License-Identifier: GPL-2.0 ++%YAML 1.2 ++--- ++$id: http://devicetree.org/schemas/net/realtek,rtl8152.yaml# ++$schema: http://devicetree.org/meta-schemas/core.yaml# ++ ++title: Realtek RTL8152/RTL8153 series USB ethernet ++ ++maintainers: ++ - David Bauer ++ ++properties: ++ compatible: ++ oneOf: ++ - items: ++ - enum: ++ - realtek,rtl8152 ++ - realtek,rtl8153 ++ ++ reg: ++ description: The device number on the USB bus ++ ++ realtek,led-data: ++ description: Value to be written to the LED configuration register. ++ ++required: ++ - compatible ++ - reg ++ ++examples: ++ - | ++ usb-eth@2 { ++ compatible = "realtek,rtl8153"; ++ reg = <2>; ++ realtek,led-data = <0x87>; ++ }; +\ No newline at end of file diff --git a/root/target/linux/generic/hack-5.4/773-bgmac-add-srab-switch.patch b/root/target/linux/generic/hack-5.4/773-bgmac-add-srab-switch.patch new file mode 100755 index 00000000..88109ac8 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/773-bgmac-add-srab-switch.patch @@ -0,0 +1,98 @@ +From 3cb240533ab787899dc7f17aa7d6c5b4810e2e58 Mon Sep 17 00:00:00 2001 +From: Hauke Mehrtens +Date: Fri, 7 Jul 2017 17:26:01 +0200 +Subject: bcm53xx: bgmac: use srab switch driver + +use the srab switch driver on these SoCs. + +Signed-off-by: Hauke Mehrtens +--- + drivers/net/ethernet/broadcom/bgmac-bcma.c | 1 + + drivers/net/ethernet/broadcom/bgmac.c | 24 ++++++++++++++++++++++++ + drivers/net/ethernet/broadcom/bgmac.h | 4 ++++ + 3 files changed, 29 insertions(+) + +--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c ++++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c +@@ -266,6 +266,7 @@ static int bgmac_probe(struct bcma_devic + bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; + bgmac->feature_flags |= BGMAC_FEAT_NO_RESET; + bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; ++ bgmac->feature_flags |= BGMAC_FEAT_SRAB; + break; + default: + bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1407,6 +1408,17 @@ static const struct ethtool_ops bgmac_et + .set_link_ksettings = phy_ethtool_set_link_ksettings, + }; + ++static struct b53_platform_data bgmac_b53_pdata = { ++}; ++ ++static struct platform_device bgmac_b53_dev = { ++ .name = "b53-srab-switch", ++ .id = -1, ++ .dev = { ++ .platform_data = &bgmac_b53_pdata, ++ }, ++}; ++ + /************************************************** + * MII + **************************************************/ +@@ -1538,6 +1550,14 @@ int bgmac_enet_probe(struct bgmac *bgmac + net_dev->hw_features = net_dev->features; + net_dev->vlan_features = net_dev->features; + ++ if ((bgmac->feature_flags & BGMAC_FEAT_SRAB) && !bgmac_b53_pdata.regs) { ++ bgmac_b53_pdata.regs = ioremap_nocache(0x18007000, 0x1000); ++ ++ err = platform_device_register(&bgmac_b53_dev); ++ if (!err) ++ bgmac->b53_device = &bgmac_b53_dev; ++ } ++ + err = register_netdev(bgmac->net_dev); + if (err) { + dev_err(bgmac->dev, "Cannot register net device\n"); +@@ -1560,6 +1580,10 @@ EXPORT_SYMBOL_GPL(bgmac_enet_probe); + + void bgmac_enet_remove(struct bgmac *bgmac) + { ++ if (bgmac->b53_device) ++ platform_device_unregister(&bgmac_b53_dev); ++ bgmac->b53_device = NULL; ++ + unregister_netdev(bgmac->net_dev); + phy_disconnect(bgmac->net_dev->phydev); + netif_napi_del(&bgmac->napi); +--- a/drivers/net/ethernet/broadcom/bgmac.h ++++ b/drivers/net/ethernet/broadcom/bgmac.h +@@ -427,6 +427,7 @@ + #define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII BIT(18) + #define BGMAC_FEAT_CC7_IF_TYPE_RGMII BIT(19) + #define BGMAC_FEAT_IDM_MASK BIT(20) ++#define BGMAC_FEAT_SRAB BIT(21) + + struct bgmac_slot_info { + union { +@@ -532,6 +533,9 @@ struct bgmac { + void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask, + u32 set); + int (*phy_connect)(struct bgmac *bgmac); ++ ++ /* platform device for associated switch */ ++ struct platform_device *b53_device; + }; + + struct bgmac *bgmac_alloc(struct device *dev); diff --git a/root/target/linux/generic/hack-5.4/901-debloat_sock_diag.patch b/root/target/linux/generic/hack-5.4/901-debloat_sock_diag.patch new file mode 100755 index 00000000..0abb6726 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/901-debloat_sock_diag.patch @@ -0,0 +1,145 @@ +From 3b6115d6b57a263bdc8c9b1df273bd4a7955eead Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 8 Jul 2017 08:16:31 +0200 +Subject: debloat: add some debloat patches, strip down procfs and make O_DIRECT support optional, saves ~15K after lzma on MIPS + +Signed-off-by: Felix Fietkau +--- + net/Kconfig | 3 +++ + net/core/Makefile | 3 ++- + net/core/sock.c | 2 ++ + net/ipv4/Kconfig | 1 + + net/netlink/Kconfig | 1 + + net/packet/Kconfig | 1 + + net/unix/Kconfig | 1 + + 7 files changed, 11 insertions(+), 1 deletion(-) + +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -103,6 +103,9 @@ source "net/netlabel/Kconfig" + + endif # if INET + ++config SOCK_DIAG ++ bool ++ + config NETWORK_SECMARK + bool "Security Marking" + help +--- a/net/core/Makefile ++++ b/net/core/Makefile +@@ -10,9 +10,10 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core. + + obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ + neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ +- sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ ++ dev_ioctl.o tso.o sock_reuseport.o \ + fib_notifier.o xdp.o flow_offload.o + ++obj-$(CONFIG_SOCK_DIAG) += sock_diag.o + obj-y += net-sysfs.o + obj-$(CONFIG_PAGE_POOL) += page_pool.o + obj-$(CONFIG_PROC_FS) += net-procfs.o +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -140,6 +140,7 @@ + + static DEFINE_MUTEX(proto_list_mutex); + static LIST_HEAD(proto_list); ++static atomic64_t cookie_gen; + + static void sock_inuse_add(struct net *net, int val); + +@@ -539,6 +540,18 @@ discard_and_relse: + } + EXPORT_SYMBOL(__sk_receive_skb); + ++u64 sock_gen_cookie(struct sock *sk) ++{ ++ while (1) { ++ u64 res = atomic64_read(&sk->sk_cookie); ++ ++ if (res) ++ return res; ++ res = atomic64_inc_return(&cookie_gen); ++ atomic64_cmpxchg(&sk->sk_cookie, 0, res); ++ } ++} ++ + struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) + { + struct dst_entry *dst = __sk_dst_get(sk); +@@ -1760,9 +1773,11 @@ static void __sk_free(struct sock *sk) + if (likely(sk->sk_net_refcnt)) + sock_inuse_add(sock_net(sk), -1); + ++#ifdef CONFIG_SOCK_DIAG + if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk))) + sock_diag_broadcast_destroy(sk); + else ++#endif + sk_destruct(sk); + } + +--- a/net/core/sock_diag.c ++++ b/net/core/sock_diag.c +@@ -19,19 +19,6 @@ static const struct sock_diag_handler *s + static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); + static DEFINE_MUTEX(sock_diag_table_mutex); + static struct workqueue_struct *broadcast_wq; +-static atomic64_t cookie_gen; +- +-u64 sock_gen_cookie(struct sock *sk) +-{ +- while (1) { +- u64 res = atomic64_read(&sk->sk_cookie); +- +- if (res) +- return res; +- res = atomic64_inc_return(&cookie_gen); +- atomic64_cmpxchg(&sk->sk_cookie, 0, res); +- } +-} + + int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie) + { +--- a/net/ipv4/Kconfig ++++ b/net/ipv4/Kconfig +@@ -400,6 +400,7 @@ config INET_TUNNEL + + config INET_DIAG + tristate "INET: socket monitoring interface" ++ select SOCK_DIAG + default y + ---help--- + Support for INET (TCP, DCCP, etc) socket monitoring interface used by +--- a/net/netlink/Kconfig ++++ b/net/netlink/Kconfig +@@ -5,6 +5,7 @@ + + config NETLINK_DIAG + tristate "NETLINK: socket monitoring interface" ++ select SOCK_DIAG + default n + ---help--- + Support for NETLINK socket monitoring interface used by the ss tool. +--- a/net/packet/Kconfig ++++ b/net/packet/Kconfig +@@ -19,6 +19,7 @@ config PACKET + config PACKET_DIAG + tristate "Packet: sockets monitoring interface" + depends on PACKET ++ select SOCK_DIAG + default n + ---help--- + Support for PF_PACKET sockets monitoring interface used by the ss tool. +--- a/net/unix/Kconfig ++++ b/net/unix/Kconfig +@@ -28,6 +28,7 @@ config UNIX_SCM + config UNIX_DIAG + tristate "UNIX: socket monitoring interface" + depends on UNIX ++ select SOCK_DIAG + default n + ---help--- + Support for UNIX socket monitoring interface used by the ss tool. diff --git a/root/target/linux/generic/hack-5.4/902-debloat_proc.patch b/root/target/linux/generic/hack-5.4/902-debloat_proc.patch new file mode 100755 index 00000000..198b0376 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/902-debloat_proc.patch @@ -0,0 +1,408 @@ +From 9e3f1d0805b2d919904dd9a4ff0d956314cc3cba Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 8 Jul 2017 08:20:09 +0200 +Subject: debloat: procfs + +Signed-off-by: Felix Fietkau +--- + fs/locks.c | 2 ++ + fs/proc/Kconfig | 5 +++++ + fs/proc/consoles.c | 3 +++ + fs/proc/proc_tty.c | 11 ++++++++++- + include/net/snmp.h | 18 +++++++++++++++++- + ipc/msg.c | 3 +++ + ipc/sem.c | 2 ++ + ipc/shm.c | 2 ++ + ipc/util.c | 3 +++ + kernel/exec_domain.c | 2 ++ + kernel/irq/proc.c | 9 +++++++++ + kernel/time/timer_list.c | 2 ++ + mm/vmalloc.c | 2 ++ + mm/vmstat.c | 8 +++++--- + net/8021q/vlanproc.c | 6 ++++++ + net/core/net-procfs.c | 18 ++++++++++++------ + net/core/sock.c | 2 ++ + net/ipv4/fib_trie.c | 18 ++++++++++++------ + net/ipv4/proc.c | 3 +++ + net/ipv4/route.c | 3 +++ + 20 files changed, 105 insertions(+), 17 deletions(-) + +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -2989,6 +2989,8 @@ static const struct seq_operations locks + + static int __init proc_locks_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; + proc_create_seq_private("locks", 0, NULL, &locks_seq_operations, + sizeof(struct locks_iterator), NULL); + return 0; +--- a/fs/proc/Kconfig ++++ b/fs/proc/Kconfig +@@ -100,6 +100,11 @@ config PROC_CHILDREN + Say Y if you are running any user-space software which takes benefit from + this interface. For example, rkt is such a piece of software. + ++config PROC_STRIPPED ++ default n ++ depends on EXPERT ++ bool "Strip non-essential /proc functionality to reduce code size" ++ + config PROC_PID_ARCH_STATUS + def_bool n + depends on PROC_FS +--- a/fs/proc/consoles.c ++++ b/fs/proc/consoles.c +@@ -92,6 +92,9 @@ static const struct seq_operations conso + + static int __init proc_consoles_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; ++ + proc_create_seq("consoles", 0, NULL, &consoles_op); + return 0; + } +--- a/fs/proc/proc_tty.c ++++ b/fs/proc/proc_tty.c +@@ -133,7 +133,10 @@ static const struct seq_operations tty_d + void proc_tty_register_driver(struct tty_driver *driver) + { + struct proc_dir_entry *ent; +- ++ ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; ++ + if (!driver->driver_name || driver->proc_entry || + !driver->ops->proc_show) + return; +@@ -150,6 +153,9 @@ void proc_tty_unregister_driver(struct t + { + struct proc_dir_entry *ent; + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; ++ + ent = driver->proc_entry; + if (!ent) + return; +@@ -164,6 +170,9 @@ void proc_tty_unregister_driver(struct t + */ + void __init proc_tty_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; ++ + if (!proc_mkdir("tty", NULL)) + return; + proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */ +--- a/include/net/snmp.h ++++ b/include/net/snmp.h +@@ -118,6 +118,21 @@ struct linux_xfrm_mib { + #define DECLARE_SNMP_STAT(type, name) \ + extern __typeof__(type) __percpu *name + ++#ifdef CONFIG_PROC_STRIPPED ++#define __SNMP_STATS_DUMMY(mib) \ ++ do { (void) mib->mibs[0]; } while(0) ++ ++#define __SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib) ++#define SNMP_INC_STATS_ATOMIC_LONG(mib, field) __SNMP_STATS_DUMMY(mib) ++#define SNMP_INC_STATS(mib, field) __SNMP_STATS_DUMMY(mib) ++#define SNMP_DEC_STATS(mib, field) __SNMP_STATS_DUMMY(mib) ++#define __SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib) ++#define SNMP_ADD_STATS(mib, field, addend) __SNMP_STATS_DUMMY(mib) ++#define SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib) ++#define __SNMP_UPD_PO_STATS(mib, basefield, addend) __SNMP_STATS_DUMMY(mib) ++ ++#else ++ + #define __SNMP_INC_STATS(mib, field) \ + __this_cpu_inc(mib->mibs[field]) + +@@ -148,8 +163,9 @@ struct linux_xfrm_mib { + __this_cpu_add(ptr[basefield##OCTETS], addend); \ + } while (0) + ++#endif + +-#if BITS_PER_LONG==32 ++#if (BITS_PER_LONG==32) && !defined(CONFIG_PROC_STRIPPED) + + #define __SNMP_ADD_STATS64(mib, field, addend) \ + do { \ +--- a/ipc/msg.c ++++ b/ipc/msg.c +@@ -1317,6 +1317,9 @@ void __init msg_init(void) + { + msg_init_ns(&init_ipc_ns); + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; ++ + ipc_init_proc_interface("sysvipc/msg", + " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", + IPC_MSG_IDS, sysvipc_msg_proc_show); +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -243,6 +243,8 @@ void sem_exit_ns(struct ipc_namespace *n + void __init sem_init(void) + { + sem_init_ns(&init_ipc_ns); ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; + ipc_init_proc_interface("sysvipc/sem", + " key semid perms nsems uid gid cuid cgid otime ctime\n", + IPC_SEM_IDS, sysvipc_sem_proc_show); +--- a/ipc/shm.c ++++ b/ipc/shm.c +@@ -154,6 +154,8 @@ pure_initcall(ipc_ns_init); + + void __init shm_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; + ipc_init_proc_interface("sysvipc/shm", + #if BITS_PER_LONG <= 32 + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", +--- a/ipc/util.c ++++ b/ipc/util.c +@@ -140,6 +140,9 @@ void __init ipc_init_proc_interface(cons + struct proc_dir_entry *pde; + struct ipc_proc_iface *iface; + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; ++ + iface = kmalloc(sizeof(*iface), GFP_KERNEL); + if (!iface) + return; +--- a/kernel/exec_domain.c ++++ b/kernel/exec_domain.c +@@ -29,6 +29,8 @@ static int execdomains_proc_show(struct + + static int __init proc_execdomains_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; + proc_create_single("execdomains", 0, NULL, execdomains_proc_show); + return 0; + } +--- a/kernel/irq/proc.c ++++ b/kernel/irq/proc.c +@@ -341,6 +341,9 @@ void register_irq_proc(unsigned int irq, + void __maybe_unused *irqp = (void *)(unsigned long) irq; + char name [MAX_NAMELEN]; + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP)) ++ return; ++ + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) + return; + +@@ -394,6 +397,9 @@ void unregister_irq_proc(unsigned int ir + { + char name [MAX_NAMELEN]; + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP)) ++ return; ++ + if (!root_irq_dir || !desc->dir) + return; + #ifdef CONFIG_SMP +@@ -432,6 +438,9 @@ void init_irq_proc(void) + unsigned int irq; + struct irq_desc *desc; + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED) && !IS_ENABLED(CONFIG_SMP)) ++ return; ++ + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", NULL); + if (!root_irq_dir) +--- a/kernel/time/timer_list.c ++++ b/kernel/time/timer_list.c +@@ -370,6 +370,8 @@ static int __init init_timer_list_procfs + { + struct proc_dir_entry *pe; + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; + pe = proc_create_seq_private("timer_list", 0400, NULL, &timer_list_sops, + sizeof(struct timer_list_iter), NULL); + if (!pe) +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -3564,6 +3564,8 @@ static const struct seq_operations vmall + + static int __init proc_vmalloc_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; + if (IS_ENABLED(CONFIG_NUMA)) + proc_create_seq_private("vmallocinfo", 0400, NULL, + &vmalloc_op, +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1988,10 +1988,12 @@ void __init init_mm_internals(void) + start_shepherd_timer(); + #endif + #ifdef CONFIG_PROC_FS +- proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); +- proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { ++ proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); ++ proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); ++ proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); ++ } + proc_create_seq("vmstat", 0444, NULL, &vmstat_op); +- proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); + #endif + } + +--- a/net/8021q/vlanproc.c ++++ b/net/8021q/vlanproc.c +@@ -93,6 +93,9 @@ void vlan_proc_cleanup(struct net *net) + { + struct vlan_net *vn = net_generic(net, vlan_net_id); + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return; ++ + if (vn->proc_vlan_conf) + remove_proc_entry(name_conf, vn->proc_vlan_dir); + +@@ -112,6 +115,9 @@ int __net_init vlan_proc_init(struct net + { + struct vlan_net *vn = net_generic(net, vlan_net_id); + ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; ++ + vn->proc_vlan_dir = proc_net_mkdir(net, name_root, net->proc_net); + if (!vn->proc_vlan_dir) + goto err; +--- a/net/core/net-procfs.c ++++ b/net/core/net-procfs.c +@@ -279,10 +279,12 @@ static int __net_init dev_proc_net_init( + if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, + sizeof(struct seq_net_private))) + goto out; +- if (!proc_create_seq("softnet_stat", 0444, net->proc_net, ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && ++ !proc_create_seq("softnet_stat", 0444, net->proc_net, + &softnet_seq_ops)) + goto out_dev; +- if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && ++ !proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, + sizeof(struct seq_net_private))) + goto out_softnet; + +@@ -292,9 +294,11 @@ static int __net_init dev_proc_net_init( + out: + return rc; + out_ptype: +- remove_proc_entry("ptype", net->proc_net); ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ remove_proc_entry("ptype", net->proc_net); + out_softnet: +- remove_proc_entry("softnet_stat", net->proc_net); ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ remove_proc_entry("softnet_stat", net->proc_net); + out_dev: + remove_proc_entry("dev", net->proc_net); + goto out; +@@ -304,8 +308,10 @@ static void __net_exit dev_proc_net_exit + { + wext_proc_exit(net); + +- remove_proc_entry("ptype", net->proc_net); +- remove_proc_entry("softnet_stat", net->proc_net); ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { ++ remove_proc_entry("ptype", net->proc_net); ++ remove_proc_entry("softnet_stat", net->proc_net); ++ } + remove_proc_entry("dev", net->proc_net); + } + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -3643,6 +3643,8 @@ static __net_initdata struct pernet_oper + + static int __init proto_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; + return register_pernet_subsys(&proto_net_ops); + } + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2848,11 +2848,13 @@ static const struct seq_operations fib_r + + int __net_init fib_proc_init(struct net *net) + { +- if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops, ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && ++ !proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops, + sizeof(struct fib_trie_iter))) + goto out1; + +- if (!proc_create_net_single("fib_triestat", 0444, net->proc_net, ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && ++ !proc_create_net_single("fib_triestat", 0444, net->proc_net, + fib_triestat_seq_show, NULL)) + goto out2; + +@@ -2863,17 +2865,21 @@ int __net_init fib_proc_init(struct net + return 0; + + out3: +- remove_proc_entry("fib_triestat", net->proc_net); ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ remove_proc_entry("fib_triestat", net->proc_net); + out2: +- remove_proc_entry("fib_trie", net->proc_net); ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ remove_proc_entry("fib_trie", net->proc_net); + out1: + return -ENOMEM; + } + + void __net_exit fib_proc_exit(struct net *net) + { +- remove_proc_entry("fib_trie", net->proc_net); +- remove_proc_entry("fib_triestat", net->proc_net); ++ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { ++ remove_proc_entry("fib_trie", net->proc_net); ++ remove_proc_entry("fib_triestat", net->proc_net); ++ } + remove_proc_entry("route", net->proc_net); + } + +--- a/net/ipv4/proc.c ++++ b/net/ipv4/proc.c +@@ -522,5 +522,8 @@ static __net_initdata struct pernet_oper + + int __init ip_misc_proc_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; ++ + return register_pernet_subsys(&ip_proc_ops); + } +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -410,6 +410,9 @@ static struct pernet_operations ip_rt_pr + + static int __init ip_rt_proc_init(void) + { ++ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) ++ return 0; ++ + return register_pernet_subsys(&ip_rt_proc_ops); + } + diff --git a/root/target/linux/generic/hack-5.4/904-debloat_dma_buf.patch b/root/target/linux/generic/hack-5.4/904-debloat_dma_buf.patch new file mode 100755 index 00000000..76032d9b --- /dev/null +++ b/root/target/linux/generic/hack-5.4/904-debloat_dma_buf.patch @@ -0,0 +1,74 @@ +From e3692cb2fcd5ba1244512a0f43b8118f65f1c375 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 8 Jul 2017 08:20:43 +0200 +Subject: debloat: dmabuf + +Signed-off-by: Felix Fietkau +--- + drivers/base/Kconfig | 2 +- + drivers/dma-buf/Makefile | 10 +++++++--- + drivers/dma-buf/dma-buf.c | 4 +++- + kernel/sched/core.c | 1 + + 4 files changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/base/Kconfig ++++ b/drivers/base/Kconfig +@@ -179,7 +179,7 @@ config SOC_BUS + source "drivers/base/regmap/Kconfig" + + config DMA_SHARED_BUFFER +- bool ++ tristate + default n + select IRQ_WORK + help +--- a/drivers/dma-buf/Makefile ++++ b/drivers/dma-buf/Makefile +@@ -1,9 +1,13 @@ + # SPDX-License-Identifier: GPL-2.0-only +-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ +- dma-resv.o seqno-fence.o +-obj-$(CONFIG_SYNC_FILE) += sync_file.o +-obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o +-obj-$(CONFIG_UDMABUF) += udmabuf.o ++obj-$(CONFIG_DMA_SHARED_BUFFER) := dma-shared-buffer.o ++ ++dma-buf-objs-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ ++ dma-resv.o seqno-fence.o ++dma-buf-objs-$(CONFIG_SYNC_FILE) += sync_file.o ++dma-buf-objs-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o ++dma-buf-objs-$(CONFIG_UDMABUF) += udmabuf.o ++ ++dma-shared-buffer-objs := $(dma-buf-objs-y) + + dmabuf_selftests-y := \ + selftest.o \ +--- a/drivers/dma-buf/dma-buf.c ++++ b/drivers/dma-buf/dma-buf.c +@@ -1314,4 +1314,5 @@ static void __exit dma_buf_deinit(void) + dma_buf_uninit_debugfs(); + kern_unmount(dma_buf_mnt); + } +-__exitcall(dma_buf_deinit); ++module_exit(dma_buf_deinit); ++MODULE_LICENSE("GPL"); +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -2770,6 +2770,7 @@ int wake_up_state(struct task_struct *p, + { + return try_to_wake_up(p, state, 0); + } ++EXPORT_SYMBOL_GPL(wake_up_state); + + /* + * Perform scheduler related setup for a newly forked process p. +--- a/fs/d_path.c ++++ b/fs/d_path.c +@@ -311,6 +311,7 @@ char *dynamic_dname(struct dentry *dentr + buffer += buflen - sz; + return memcpy(buffer, temp, sz); + } ++EXPORT_SYMBOL_GPL(dynamic_dname); + + char *simple_dname(struct dentry *dentry, char *buffer, int buflen) + { diff --git a/root/target/linux/generic/hack-5.4/910-kobject_uevent.patch b/root/target/linux/generic/hack-5.4/910-kobject_uevent.patch new file mode 100755 index 00000000..c4c41ca4 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/910-kobject_uevent.patch @@ -0,0 +1,32 @@ +From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sun, 16 Jul 2017 16:56:10 +0200 +Subject: lib: add uevent_next_seqnum() + +Signed-off-by: Felix Fietkau +--- + include/linux/kobject.h | 5 +++++ + lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 42 insertions(+) + +--- a/lib/kobject_uevent.c ++++ b/lib/kobject_uevent.c +@@ -179,6 +179,18 @@ out: + return r; + } + ++u64 uevent_next_seqnum(void) ++{ ++ u64 seq; ++ ++ mutex_lock(&uevent_sock_mutex); ++ seq = ++uevent_seqnum; ++ mutex_unlock(&uevent_sock_mutex); ++ ++ return seq; ++} ++EXPORT_SYMBOL_GPL(uevent_next_seqnum); ++ + /** + * kobject_synth_uevent - send synthetic uevent with arguments + * diff --git a/root/target/linux/generic/hack-5.4/911-kobject_add_broadcast_uevent.patch b/root/target/linux/generic/hack-5.4/911-kobject_add_broadcast_uevent.patch new file mode 100755 index 00000000..6f5e50d0 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/911-kobject_add_broadcast_uevent.patch @@ -0,0 +1,76 @@ +From 0d37e6edc09c99e683dd91ca0e83bbc0df8477b3 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sun, 16 Jul 2017 16:56:10 +0200 +Subject: lib: add uevent_next_seqnum() + +Signed-off-by: Felix Fietkau +--- + include/linux/kobject.h | 5 +++++ + lib/kobject_uevent.c | 37 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 42 insertions(+) + +--- a/include/linux/kobject.h ++++ b/include/linux/kobject.h +@@ -32,6 +32,8 @@ + #define UEVENT_NUM_ENVP 32 /* number of env pointers */ + #define UEVENT_BUFFER_SIZE 2048 /* buffer for the variables */ + ++struct sk_buff; ++ + #ifdef CONFIG_UEVENT_HELPER + /* path to the userspace helper executed on an event */ + extern char uevent_helper[]; +@@ -245,4 +247,7 @@ int kobject_synth_uevent(struct kobject + __printf(2, 3) + int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); + ++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group, ++ gfp_t allocation); ++ + #endif /* _KOBJECT_H_ */ +--- a/lib/kobject_uevent.c ++++ b/lib/kobject_uevent.c +@@ -691,6 +691,43 @@ int add_uevent_var(struct kobj_uevent_en + EXPORT_SYMBOL_GPL(add_uevent_var); + + #if defined(CONFIG_NET) ++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group, ++ gfp_t allocation) ++{ ++ struct uevent_sock *ue_sk; ++ int err = 0; ++ ++ /* send netlink message */ ++ mutex_lock(&uevent_sock_mutex); ++ list_for_each_entry(ue_sk, &uevent_sock_list, list) { ++ struct sock *uevent_sock = ue_sk->sk; ++ struct sk_buff *skb2; ++ ++ skb2 = skb_clone(skb, allocation); ++ if (!skb2) ++ break; ++ ++ err = netlink_broadcast(uevent_sock, skb2, pid, group, ++ allocation); ++ if (err) ++ break; ++ } ++ mutex_unlock(&uevent_sock_mutex); ++ ++ kfree_skb(skb); ++ return err; ++} ++#else ++int broadcast_uevent(struct sk_buff *skb, __u32 pid, __u32 group, ++ gfp_t allocation) ++{ ++ kfree_skb(skb); ++ return 0; ++} ++#endif ++EXPORT_SYMBOL_GPL(broadcast_uevent); ++ ++#if defined(CONFIG_NET) + static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb, + struct netlink_ext_ack *extack) + { diff --git a/root/target/linux/generic/hack-5.4/921-always-create-console-node-in-initramfs.patch b/root/target/linux/generic/hack-5.4/921-always-create-console-node-in-initramfs.patch new file mode 100755 index 00000000..e4375790 --- /dev/null +++ b/root/target/linux/generic/hack-5.4/921-always-create-console-node-in-initramfs.patch @@ -0,0 +1,40 @@ +From 5d301596fdc72f6cb672f72eb3c66e7cddefb103 Mon Sep 17 00:00:00 2001 +From: Felix Fietkau +Date: Sat, 8 Jul 2017 08:26:02 +0200 +Subject: initramfs: always create console node + +Signed-off-by: Felix Fietkau +--- + usr/gen_initramfs_list.sh | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/usr/gen_initramfs_list.sh ++++ b/usr/gen_initramfs_list.sh +@@ -59,6 +59,18 @@ default_initramfs() { + EOF + } + ++list_openwrt_initramfs() { ++ : ++} ++ ++openwrt_initramfs() { ++ # make sure that /dev/console exists ++ cat <<-EOF >> ${output} ++ dir /dev 0755 0 0 ++ nod /dev/console 0600 0 0 c 5 1 ++ EOF ++} ++ + filetype() { + local argv1="$1" + +@@ -180,6 +192,8 @@ dir_filelist() { + if [ "$(echo "${dirlist}" | wc -l)" -gt 1 ]; then + ${dep_list}print_mtime "$1" + ++ ${dep_list}openwrt_initramfs ++ + echo "${dirlist}" | \ + while read x; do + ${dep_list}parse ${x} diff --git a/root/target/linux/generic/hack-5.4/999-stop-promiscuous-info.patch b/root/target/linux/generic/hack-5.4/999-stop-promiscuous-info.patch deleted file mode 100755 index 850613b5..00000000 --- a/root/target/linux/generic/hack-5.4/999-stop-promiscuous-info.patch +++ /dev/null @@ -1,47 +0,0 @@ ---- a/net/core/dev.c 2018-08-10 10:31:41.199494561 +0200 -+++ b/net/core/dev.c 2018-08-10 10:32:03.635272509 +0200 -@@ -6613,9 +6613,11 @@ - } - } - if (dev->flags != old_flags) { -+ /* - pr_info("device %s %s promiscuous mode\n", - dev->name, - dev->flags & IFF_PROMISC ? "entered" : "left"); -+ */ - if (audit_enabled) { - current_uid_gid(&uid, &gid); - audit_log(current->audit_context, GFP_ATOMIC, ---- a/drivers/net/usb/r8152.c 2020-08-13 13:11:25.866435255 +0200 -+++ b/drivers/net/usb/r8152.c 2020-08-13 13:11:51.973994306 +0200 -@@ -2353,7 +2353,7 @@ - - if (netdev->flags & IFF_PROMISC) { - /* Unconditionally log net taps. */ -- netif_notice(tp, link, netdev, "Promiscuous mode enabled\n"); -+ //netif_notice(tp, link, netdev, "Promiscuous mode enabled\n"); - ocp_data |= RCR_AM | RCR_AAP; - mc_filter[1] = 0xffffffff; - mc_filter[0] = 0xffffffff; ---- a/drivers/net/usb/pegasus.c 2020-08-13 13:14:15.519570376 +0200 -+++ b/drivers/net/usb/pegasus.c 2020-08-13 13:14:26.795380006 +0200 -@@ -1031,7 +1031,7 @@ - - if (net->flags & IFF_PROMISC) { - pegasus->eth_regs[EthCtrl2] |= RX_PROMISCUOUS; -- netif_info(pegasus, link, net, "Promiscuous mode enabled\n"); -+ //netif_info(pegasus, link, net, "Promiscuous mode enabled\n"); - } else if (!netdev_mc_empty(net) || (net->flags & IFF_ALLMULTI)) { - pegasus->eth_regs[EthCtrl0] |= RX_MULTICAST; - pegasus->eth_regs[EthCtrl2] &= ~RX_PROMISCUOUS; ---- a/drivers/net/ethernet/realtek/r8169_main.c 2020-08-13 13:15:44.478068638 +0200 -+++ b/drivers/net/ethernet/realtek/r8169_main.c 2020-08-13 13:15:59.181820450 +0200 -@@ -4313,7 +4313,7 @@ - - if (dev->flags & IFF_PROMISC) { - /* Unconditionally log net taps. */ -- netif_notice(tp, link, dev, "Promiscuous mode enabled\n"); -+ //netif_notice(tp, link, dev, "Promiscuous mode enabled\n"); - rx_mode |= AcceptAllPhys; - } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT || - dev->flags & IFF_ALLMULTI || diff --git a/root/target/linux/generic/pending-5.15/050-dtc-checks-Drop-interrupt-provider-address-cells-check.patch b/root/target/linux/generic/pending-5.15/050-dtc-checks-Drop-interrupt-provider-address-cells-check.patch new file mode 100755 index 00000000..75f63728 --- /dev/null +++ b/root/target/linux/generic/pending-5.15/050-dtc-checks-Drop-interrupt-provider-address-cells-check.patch @@ -0,0 +1,28 @@ +From d8d1a9a77863a8c7031ae82a1d461aa78eb72a7b Mon Sep 17 00:00:00 2001 +From: Rob Herring +Date: Mon, 11 Oct 2021 14:12:43 -0500 +Subject: [PATCH] checks: Drop interrupt provider '#address-cells' check + +'#address-cells' is only needed when parsing 'interrupt-map' properties, so +remove it from the common interrupt-provider test. + +Cc: Andre Przywara +Reviewed-by: David Gibson +Signed-off-by: Rob Herring +Message-Id: <20211011191245.1009682-3-robh@kernel.org> +Signed-off-by: David Gibson +--- +--- a/scripts/dtc/checks.c ++++ b/scripts/dtc/checks.c +@@ -1569,11 +1569,6 @@ static void check_interrupt_provider(str + if (!prop) + FAIL(c, dti, node, + "Missing #interrupt-cells in interrupt provider"); +- +- prop = get_property(node, "#address-cells"); +- if (!prop) +- FAIL(c, dti, node, +- "Missing #address-cells in interrupt provider"); + } + WARNING(interrupt_provider, check_interrupt_provider, NULL); + diff --git a/root/target/linux/generic/pending-5.15/120-Fix-alloc_node_mem_map-with-ARCH_PFN_OFFSET-calcu.patch b/root/target/linux/generic/pending-5.15/120-Fix-alloc_node_mem_map-with-ARCH_PFN_OFFSET-calcu.patch index bf4ef8c7..f18b4c87 100755 --- a/root/target/linux/generic/pending-5.15/120-Fix-alloc_node_mem_map-with-ARCH_PFN_OFFSET-calcu.patch +++ b/root/target/linux/generic/pending-5.15/120-Fix-alloc_node_mem_map-with-ARCH_PFN_OFFSET-calcu.patch @@ -71,7 +71,7 @@ Signed-off-by: Tobias Wolf --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -7544,7 +7544,7 @@ static void __ref alloc_node_mem_map(str +@@ -7550,7 +7550,7 @@ static void __init alloc_node_mem_map(st if (pgdat == NODE_DATA(0)) { mem_map = NODE_DATA(0)->node_mem_map; if (page_to_pfn(mem_map) != pgdat->node_start_pfn) diff --git a/root/target/linux/generic/pending-5.15/130-add-linux-spidev-compatible-si3210.patch b/root/target/linux/generic/pending-5.15/130-add-linux-spidev-compatible-si3210.patch index 986149f4..d260cf1f 100755 --- a/root/target/linux/generic/pending-5.15/130-add-linux-spidev-compatible-si3210.patch +++ b/root/target/linux/generic/pending-5.15/130-add-linux-spidev-compatible-si3210.patch @@ -8,11 +8,11 @@ Signed-off-by: Giuseppe Lippolis --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c -@@ -682,6 +682,7 @@ static const struct of_device_id spidev_ - { .compatible = "lwn,bk4" }, - { .compatible = "dh,dhcom-board" }, +@@ -696,6 +696,7 @@ static const struct of_device_id spidev_ { .compatible = "menlo,m53cpld" }, -+ { .compatible = "siliconlabs,si3210" }, { .compatible = "cisco,spi-petra" }, { .compatible = "micron,spi-authenta" }, ++ { .compatible = "siliconlabs,si3210" }, {}, + }; + MODULE_DEVICE_TABLE(of, spidev_dt_ids); diff --git a/root/target/linux/generic/pending-5.15/140-jffs2-use-.rename2-and-add-RENAME_WHITEOUT-support.patch b/root/target/linux/generic/pending-5.15/140-jffs2-use-.rename2-and-add-RENAME_WHITEOUT-support.patch new file mode 100755 index 00000000..8f40ae3b --- /dev/null +++ b/root/target/linux/generic/pending-5.15/140-jffs2-use-.rename2-and-add-RENAME_WHITEOUT-support.patch @@ -0,0 +1,81 @@ +From: Felix Fietkau +Subject: jffs2: use .rename2 and add RENAME_WHITEOUT support + +It is required for renames on overlayfs + +Signed-off-by: Felix Fietkau +--- + +--- a/fs/jffs2/dir.c ++++ b/fs/jffs2/dir.c +@@ -614,8 +614,8 @@ static int jffs2_rmdir (struct inode *di + return ret; + } + +-static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i, +- struct dentry *dentry, umode_t mode, dev_t rdev) ++static int __jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i, ++ struct dentry *dentry, umode_t mode, dev_t rdev, bool whiteout) + { + struct jffs2_inode_info *f, *dir_f; + struct jffs2_sb_info *c; +@@ -754,7 +754,11 @@ static int jffs2_mknod (struct user_name + mutex_unlock(&dir_f->sem); + jffs2_complete_reservation(c); + +- d_instantiate_new(dentry, inode); ++ if (!whiteout) ++ d_instantiate_new(dentry, inode); ++ else ++ unlock_new_inode(inode); ++ + return 0; + + fail: +@@ -762,6 +766,19 @@ static int jffs2_mknod (struct user_name + return ret; + } + ++static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i, ++ struct dentry *dentry, umode_t mode, dev_t rdev) ++{ ++ return __jffs2_mknod(mnt_userns, dir_i, dentry, mode, rdev, false); ++} ++ ++static int jffs2_whiteout (struct user_namespace *mnt_userns, struct inode *old_dir, ++ struct dentry *old_dentry) ++{ ++ return __jffs2_mknod(mnt_userns, old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, ++ WHITEOUT_DEV, true); ++} ++ + static int jffs2_rename (struct user_namespace *mnt_userns, + struct inode *old_dir_i, struct dentry *old_dentry, + struct inode *new_dir_i, struct dentry *new_dentry, +@@ -773,7 +790,7 @@ static int jffs2_rename (struct user_nam + uint8_t type; + uint32_t now; + +- if (flags & ~RENAME_NOREPLACE) ++ if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT)) + return -EINVAL; + + /* The VFS will check for us and prevent trying to rename a +@@ -839,9 +856,14 @@ static int jffs2_rename (struct user_nam + if (d_is_dir(old_dentry) && !victim_f) + inc_nlink(new_dir_i); + +- /* Unlink the original */ +- ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i), +- old_dentry->d_name.name, old_dentry->d_name.len, NULL, now); ++ if (flags & RENAME_WHITEOUT) ++ /* Replace with whiteout */ ++ ret = jffs2_whiteout(mnt_userns, old_dir_i, old_dentry); ++ else ++ /* Unlink the original */ ++ ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i), ++ old_dentry->d_name.name, ++ old_dentry->d_name.len, NULL, now); + + /* We don't touch inode->i_nlink */ + diff --git a/root/target/linux/generic/pending-5.15/141-jffs2-add-RENAME_EXCHANGE-support.patch b/root/target/linux/generic/pending-5.15/141-jffs2-add-RENAME_EXCHANGE-support.patch new file mode 100755 index 00000000..f58fc791 --- /dev/null +++ b/root/target/linux/generic/pending-5.15/141-jffs2-add-RENAME_EXCHANGE-support.patch @@ -0,0 +1,73 @@ +From: Felix Fietkau +Subject: jffs2: add RENAME_EXCHANGE support + +Signed-off-by: Felix Fietkau +--- + +--- a/fs/jffs2/dir.c ++++ b/fs/jffs2/dir.c +@@ -787,18 +787,31 @@ static int jffs2_rename (struct user_nam + int ret; + struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dir_i->i_sb); + struct jffs2_inode_info *victim_f = NULL; ++ struct inode *fst_inode = d_inode(old_dentry); ++ struct inode *snd_inode = d_inode(new_dentry); + uint8_t type; + uint32_t now; + +- if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT)) ++ if (flags & ~(RENAME_NOREPLACE|RENAME_WHITEOUT|RENAME_EXCHANGE)) + return -EINVAL; + ++ if ((flags & RENAME_EXCHANGE) && (old_dir_i != new_dir_i)) { ++ if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) { ++ inc_nlink(new_dir_i); ++ drop_nlink(old_dir_i); ++ } ++ else if (!S_ISDIR(fst_inode->i_mode) && S_ISDIR(snd_inode->i_mode)) { ++ drop_nlink(new_dir_i); ++ inc_nlink(old_dir_i); ++ } ++ } ++ + /* The VFS will check for us and prevent trying to rename a + * file over a directory and vice versa, but if it's a directory, + * the VFS can't check whether the victim is empty. The filesystem + * needs to do that for itself. + */ +- if (d_really_is_positive(new_dentry)) { ++ if (d_really_is_positive(new_dentry) && !(flags & RENAME_EXCHANGE)) { + victim_f = JFFS2_INODE_INFO(d_inode(new_dentry)); + if (d_is_dir(new_dentry)) { + struct jffs2_full_dirent *fd; +@@ -833,7 +846,7 @@ static int jffs2_rename (struct user_nam + if (ret) + return ret; + +- if (victim_f) { ++ if (victim_f && !(flags & RENAME_EXCHANGE)) { + /* There was a victim. Kill it off nicely */ + if (d_is_dir(new_dentry)) + clear_nlink(d_inode(new_dentry)); +@@ -859,6 +872,12 @@ static int jffs2_rename (struct user_nam + if (flags & RENAME_WHITEOUT) + /* Replace with whiteout */ + ret = jffs2_whiteout(mnt_userns, old_dir_i, old_dentry); ++ else if (flags & RENAME_EXCHANGE) ++ /* Replace the original */ ++ ret = jffs2_do_link(c, JFFS2_INODE_INFO(old_dir_i), ++ d_inode(new_dentry)->i_ino, type, ++ old_dentry->d_name.name, old_dentry->d_name.len, ++ now); + else + /* Unlink the original */ + ret = jffs2_do_unlink(c, JFFS2_INODE_INFO(old_dir_i), +@@ -890,7 +909,7 @@ static int jffs2_rename (struct user_nam + return ret; + } + +- if (d_is_dir(old_dentry)) ++ if (d_is_dir(old_dentry) && !(flags & RENAME_EXCHANGE)) + drop_nlink(old_dir_i); + + new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now); diff --git a/root/target/linux/generic/pending-5.15/142-jffs2-add-splice-ops.patch b/root/target/linux/generic/pending-5.15/142-jffs2-add-splice-ops.patch new file mode 100755 index 00000000..de847a1f --- /dev/null +++ b/root/target/linux/generic/pending-5.15/142-jffs2-add-splice-ops.patch @@ -0,0 +1,20 @@ +From: Felix Fietkau +Subject: jffs2: add splice ops + +Add splice_read using generic_file_splice_read. +Add splice_write using iter_file_splice_write + +Signed-off-by: Felix Fietkau +--- + +--- a/fs/jffs2/file.c ++++ b/fs/jffs2/file.c +@@ -53,6 +53,8 @@ const struct file_operations jffs2_file_ + .open = generic_file_open, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, ++ .splice_read = generic_file_splice_read, ++ .splice_write = iter_file_splice_write, + .unlocked_ioctl=jffs2_ioctl, + .mmap = generic_file_readonly_mmap, + .fsync = jffs2_fsync, diff --git a/root/target/linux/generic/pending-5.15/150-bridge_allow_receiption_on_disabled_port.patch b/root/target/linux/generic/pending-5.15/150-bridge_allow_receiption_on_disabled_port.patch index bf97e987..2b9570e9 100755 --- a/root/target/linux/generic/pending-5.15/150-bridge_allow_receiption_on_disabled_port.patch +++ b/root/target/linux/generic/pending-5.15/150-bridge_allow_receiption_on_disabled_port.patch @@ -15,7 +15,7 @@ Signed-off-by: Felix Fietkau --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c -@@ -192,6 +192,9 @@ static void __br_handle_local_finish(str +@@ -197,6 +197,9 @@ static void __br_handle_local_finish(str /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -25,7 +25,7 @@ Signed-off-by: Felix Fietkau __br_handle_local_finish(skb); /* return 1 to signal the okfn() was called so it's ok to use the skb */ -@@ -360,6 +363,17 @@ static rx_handler_result_t br_handle_fra +@@ -362,6 +365,17 @@ static rx_handler_result_t br_handle_fra forward: switch (p->state) { diff --git a/root/target/linux/generic/pending-5.15/201-extra_optimization.patch b/root/target/linux/generic/pending-5.15/201-extra_optimization.patch index 8ca487f6..5300a5c4 100755 --- a/root/target/linux/generic/pending-5.15/201-extra_optimization.patch +++ b/root/target/linux/generic/pending-5.15/201-extra_optimization.patch @@ -14,7 +14,7 @@ Signed-off-by: Felix Fietkau --- a/Makefile +++ b/Makefile -@@ -763,11 +763,11 @@ KBUILD_CFLAGS += $(call cc-disable-warni +@@ -752,11 +752,11 @@ KBUILD_CFLAGS += $(call cc-disable-warni KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE diff --git a/root/target/linux/generic/pending-5.15/203-kallsyms_uncompressed.patch b/root/target/linux/generic/pending-5.15/203-kallsyms_uncompressed.patch index 6e8dea3e..370fc97e 100755 --- a/root/target/linux/generic/pending-5.15/203-kallsyms_uncompressed.patch +++ b/root/target/linux/generic/pending-5.15/203-kallsyms_uncompressed.patch @@ -13,7 +13,7 @@ Signed-off-by: Felix Fietkau --- a/init/Kconfig +++ b/init/Kconfig -@@ -1410,6 +1410,17 @@ config SYSCTL_ARCH_UNALIGN_ALLOW +@@ -1438,6 +1438,17 @@ config SYSCTL_ARCH_UNALIGN_ALLOW the unaligned access emulation. see arch/parisc/kernel/unaligned.c for reference @@ -106,7 +106,7 @@ Signed-off-by: Felix Fietkau } --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh -@@ -273,6 +273,10 @@ kallsyms() +@@ -260,6 +260,10 @@ kallsyms() kallsymopt="${kallsymopt} --base-relative" fi diff --git a/root/target/linux/generic/pending-5.15/270-platform-mikrotik-build-bits.patch b/root/target/linux/generic/pending-5.15/270-platform-mikrotik-build-bits.patch index 31f86f4a..99f83bb2 100755 --- a/root/target/linux/generic/pending-5.15/270-platform-mikrotik-build-bits.patch +++ b/root/target/linux/generic/pending-5.15/270-platform-mikrotik-build-bits.patch @@ -16,20 +16,16 @@ Signed-off-by: Thibaut VARÈNE --- a/drivers/platform/Kconfig +++ b/drivers/platform/Kconfig -@@ -12,6 +12,8 @@ source "drivers/platform/chrome/Kconfig" - - source "drivers/platform/mellanox/Kconfig" - -+source "drivers/platform/mikrotik/Kconfig" -+ +@@ -15,3 +15,5 @@ source "drivers/platform/mellanox/Kconfi source "drivers/platform/olpc/Kconfig" source "drivers/platform/surface/Kconfig" ++ ++source "drivers/platform/mikrotik/Kconfig" --- a/drivers/platform/Makefile +++ b/drivers/platform/Makefile -@@ -9,4 +9,5 @@ obj-$(CONFIG_MIPS) += mips/ - obj-$(CONFIG_OLPC_EC) += olpc/ +@@ -10,3 +10,4 @@ obj-$(CONFIG_OLPC_EC) += olpc/ obj-$(CONFIG_GOLDFISH) += goldfish/ obj-$(CONFIG_CHROME_PLATFORMS) += chrome/ -+obj-$(CONFIG_MIKROTIK) += mikrotik/ obj-$(CONFIG_SURFACE_PLATFORMS) += surface/ ++obj-$(CONFIG_MIKROTIK) += mikrotik/ diff --git a/root/target/linux/generic/pending-5.15/300-mips_expose_boot_raw.patch b/root/target/linux/generic/pending-5.15/300-mips_expose_boot_raw.patch index c94f40d1..092f00b5 100755 --- a/root/target/linux/generic/pending-5.15/300-mips_expose_boot_raw.patch +++ b/root/target/linux/generic/pending-5.15/300-mips_expose_boot_raw.patch @@ -9,7 +9,7 @@ Acked-by: Rob Landley --- --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig -@@ -1122,9 +1122,6 @@ config FW_ARC +@@ -1100,9 +1100,6 @@ config FW_ARC config ARCH_MAY_HAVE_PC_FDC bool @@ -19,7 +19,7 @@ Acked-by: Rob Landley config CEVT_BCM1480 bool -@@ -3199,6 +3196,18 @@ choice +@@ -3178,6 +3175,18 @@ choice bool "Extend builtin kernel arguments with bootloader arguments" endchoice diff --git a/root/target/linux/generic/pending-5.15/305-mips_module_reloc.patch b/root/target/linux/generic/pending-5.15/305-mips_module_reloc.patch index 13cd2d77..bbea1f61 100755 --- a/root/target/linux/generic/pending-5.15/305-mips_module_reloc.patch +++ b/root/target/linux/generic/pending-5.15/305-mips_module_reloc.patch @@ -165,6 +165,7 @@ Signed-off-by: Felix Fietkau + page++; + } while (free); +} ++ + void *module_alloc(unsigned long size) { @@ -299,7 +300,6 @@ Signed-off-by: Felix Fietkau + me->arch.virt_plt_tbl, v); + +} -+ + static int apply_r_mips_26(struct module *me, u32 *location, u32 base, Elf_Addr v) diff --git a/root/target/linux/generic/pending-5.15/309-MIPS-Add-CPU-option-reporting-to-proc-cpuinfo.patch b/root/target/linux/generic/pending-5.15/309-MIPS-Add-CPU-option-reporting-to-proc-cpuinfo.patch index 794f027f..318c0b1b 100755 --- a/root/target/linux/generic/pending-5.15/309-MIPS-Add-CPU-option-reporting-to-proc-cpuinfo.patch +++ b/root/target/linux/generic/pending-5.15/309-MIPS-Add-CPU-option-reporting-to-proc-cpuinfo.patch @@ -17,7 +17,7 @@ Signed-off-by: Hauke Mehrtens --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c -@@ -138,6 +138,120 @@ static int show_cpuinfo(struct seq_file +@@ -138,6 +138,116 @@ static int show_cpuinfo(struct seq_file seq_printf(m, "micromips kernel\t: %s\n", (read_c0_config3() & MIPS_CONF3_ISA_OE) ? "yes" : "no"); } @@ -45,10 +45,6 @@ Signed-off-by: Hauke Mehrtens + seq_printf(m, "%s", " 3k_cache"); + if (cpu_has_4k_cache) + seq_printf(m, "%s", " 4k_cache"); -+ if (cpu_has_6k_cache) -+ seq_printf(m, "%s", " 6k_cache"); -+ if (cpu_has_8k_cache) -+ seq_printf(m, "%s", " 8k_cache"); + if (cpu_has_tx39_cache) + seq_printf(m, "%s", " tx39_cache"); + if (cpu_has_octeon_cache) diff --git a/root/target/linux/generic/pending-5.15/330-MIPS-kexec-Accept-command-line-parameters-from-users.patch b/root/target/linux/generic/pending-5.15/330-MIPS-kexec-Accept-command-line-parameters-from-users.patch index da50880d..2880d3ee 100755 --- a/root/target/linux/generic/pending-5.15/330-MIPS-kexec-Accept-command-line-parameters-from-users.patch +++ b/root/target/linux/generic/pending-5.15/330-MIPS-kexec-Accept-command-line-parameters-from-users.patch @@ -238,7 +238,7 @@ Signed-off-by: Yousong Zhou +#endif --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S -@@ -10,10 +10,12 @@ +@@ -10,10 +10,11 @@ #include #include #include @@ -248,11 +248,10 @@ Signed-off-by: Yousong Zhou -LEAF(relocate_new_kernel) +LEAF(kexec_relocate_new_kernel) -+ PTR_L a0, arg0 PTR_L a1, arg1 PTR_L a2, arg2 -@@ -98,7 +100,7 @@ done: +@@ -98,7 +99,7 @@ done: #endif /* jump to kexec_start_address */ j s1 @@ -261,7 +260,7 @@ Signed-off-by: Yousong Zhou #ifdef CONFIG_SMP /* -@@ -181,9 +183,15 @@ kexec_indirection_page: +@@ -181,9 +182,15 @@ kexec_indirection_page: PTR 0 .size kexec_indirection_page, PTRSIZE diff --git a/root/target/linux/generic/pending-5.15/342-powerpc-Enable-kernel-XZ-compression-option-on-PPC_8.patch b/root/target/linux/generic/pending-5.15/342-powerpc-Enable-kernel-XZ-compression-option-on-PPC_8.patch index de1e0fc1..cb9f2952 100755 --- a/root/target/linux/generic/pending-5.15/342-powerpc-Enable-kernel-XZ-compression-option-on-PPC_8.patch +++ b/root/target/linux/generic/pending-5.15/342-powerpc-Enable-kernel-XZ-compression-option-on-PPC_8.patch @@ -14,7 +14,7 @@ Signed-off-by: Pawel Dembicki --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -226,7 +226,7 @@ config PPC +@@ -222,7 +222,7 @@ config PPC select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE select HAVE_KERNEL_LZO if DEFAULT_UIMAGE diff --git a/root/target/linux/generic/pending-5.15/420-mtd-redboot_space.patch b/root/target/linux/generic/pending-5.15/420-mtd-redboot_space.patch new file mode 100755 index 00000000..fee19365 --- /dev/null +++ b/root/target/linux/generic/pending-5.15/420-mtd-redboot_space.patch @@ -0,0 +1,41 @@ +From: Felix Fietkau +Subject: add patch for including unpartitioned space in the rootfs partition for redboot devices (if applicable) + +[john@phrozen.org: used by ixp and others] + +lede-commit: 394918851f84e4d00fa16eb900e7700e95091f00 +Signed-off-by: Felix Fietkau +--- + drivers/mtd/redboot.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +--- a/drivers/mtd/parsers/redboot.c ++++ b/drivers/mtd/parsers/redboot.c +@@ -277,14 +277,21 @@ nogood: + #endif + names += strlen(names) + 1; + +-#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED + if (fl->next && fl->img->flash_base + fl->img->size + master->erasesize <= fl->next->img->flash_base) { +- i++; +- parts[i].offset = parts[i - 1].size + parts[i - 1].offset; +- parts[i].size = fl->next->img->flash_base - parts[i].offset; +- parts[i].name = nullname; +- } ++ if (!strcmp(parts[i].name, "rootfs")) { ++ parts[i].size = fl->next->img->flash_base; ++ parts[i].size &= ~(master->erasesize - 1); ++ parts[i].size -= parts[i].offset; ++#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED ++ nrparts--; ++ } else { ++ i++; ++ parts[i].offset = parts[i-1].size + parts[i-1].offset; ++ parts[i].size = fl->next->img->flash_base - parts[i].offset; ++ parts[i].name = nullname; + #endif ++ } ++ } + tmp_fl = fl; + fl = fl->next; + kfree(tmp_fl); diff --git a/root/target/linux/generic/pending-5.15/435-mtd-add-routerbootpart-parser-config.patch b/root/target/linux/generic/pending-5.15/435-mtd-add-routerbootpart-parser-config.patch index b5384673..721a4d3a 100755 --- a/root/target/linux/generic/pending-5.15/435-mtd-add-routerbootpart-parser-config.patch +++ b/root/target/linux/generic/pending-5.15/435-mtd-add-routerbootpart-parser-config.patch @@ -16,10 +16,11 @@ Signed-off-by: Thibaut VARÈNE --- a/drivers/mtd/parsers/Kconfig +++ b/drivers/mtd/parsers/Kconfig -@@ -196,6 +196,15 @@ config MTD_REDBOOT_PARTS_READONLY - - endif # MTD_REDBOOT_PARTS - +@@ -202,3 +202,12 @@ config MTD_QCOMSMEM_PARTS + help + This provides support for parsing partitions from Shared Memory (SMEM) + for NAND and SPI flash on Qualcomm platforms. ++ +config MTD_ROUTERBOOT_PARTS + tristate "RouterBoot flash partition parser" + depends on MTD && OF @@ -28,15 +29,10 @@ Signed-off-by: Thibaut VARÈNE + flash, some of which are fixed and some of which are located at + variable offsets. This parser handles both cases via properly + formatted DTS. -+ - config MTD_QCOMSMEM_PARTS - tristate "Qualcomm SMEM flash partition parser" - depends on QCOM_SMEM --- a/drivers/mtd/parsers/Makefile +++ b/drivers/mtd/parsers/Makefile -@@ -13,4 +13,5 @@ obj-$(CONFIG_MTD_AFS_PARTS) += afs.o - obj-$(CONFIG_MTD_PARSER_TRX) += parser_trx.o +@@ -14,3 +14,4 @@ obj-$(CONFIG_MTD_PARSER_TRX) += parser_ obj-$(CONFIG_MTD_SHARPSL_PARTS) += sharpslpart.o obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o -+obj-$(CONFIG_MTD_ROUTERBOOT_PARTS) += routerbootpart.o obj-$(CONFIG_MTD_QCOMSMEM_PARTS) += qcomsmempart.o ++obj-$(CONFIG_MTD_ROUTERBOOT_PARTS) += routerbootpart.o diff --git a/root/target/linux/generic/pending-5.15/481-mtd-spi-nor-rework-broken-flash-reset-support.patch b/root/target/linux/generic/pending-5.15/481-mtd-spi-nor-rework-broken-flash-reset-support.patch new file mode 100755 index 00000000..8fb46b90 --- /dev/null +++ b/root/target/linux/generic/pending-5.15/481-mtd-spi-nor-rework-broken-flash-reset-support.patch @@ -0,0 +1,182 @@ +From ea92cbb50a78404e29de2cc3999a240615ffb1c8 Mon Sep 17 00:00:00 2001 +From: Chuanhong Guo +Date: Mon, 6 Apr 2020 17:58:48 +0800 +Subject: [PATCH] mtd: spi-nor: rework broken-flash-reset support + +Instead of resetting flash to 3B address on remove hook, this +implementation only enters 4B mode when needed, which prevents +more unexpected reboot stuck. This implementation makes it only +break when a kernel panic happens during flash operation on 16M+ +areas. +*OpenWrt only*: silent broken-flash-reset warning. We are not dealing +with vendors and it's unpleasant for users to se that unnecessary +and long WARN_ON print. + +Signed-off-by: Chuanhong Guo +--- + drivers/mtd/spi-nor/spi-nor.c | 52 +++++++++++++++++++++++++++++++++-- + 1 file changed, 49 insertions(+), 3 deletions(-) + +--- a/drivers/mtd/spi-nor/core.c ++++ b/drivers/mtd/spi-nor/core.c +@@ -1637,6 +1637,23 @@ destroy_erase_cmd_list: + return ret; + } + ++int spi_nor_check_set_addr_width(struct spi_nor *nor, loff_t addr) ++{ ++ u8 addr_width; ++ ++ if ((nor->flags & (SNOR_F_4B_OPCODES | SNOR_F_BROKEN_RESET)) != ++ SNOR_F_BROKEN_RESET) ++ return 0; ++ ++ addr_width = addr & 0xff000000 ? 4 : 3; ++ if (nor->addr_width == addr_width) ++ return 0; ++ ++ nor->addr_width = addr_width; ++ ++ return nor->params->set_4byte_addr_mode(nor, addr_width == 4); ++} ++ + /* + * Erase an address range on the nor chip. The address range may extend + * one or more erase sectors. Return an error if there is a problem erasing. +@@ -1664,6 +1681,10 @@ static int spi_nor_erase(struct mtd_info + if (ret) + return ret; + ++ ret = spi_nor_check_set_addr_width(nor, instr->addr + instr->len); ++ if (ret < 0) ++ return ret; ++ + /* whole-chip erase? */ + if (len == mtd->size && !(nor->flags & SNOR_F_NO_OP_CHIP_ERASE)) { + unsigned long timeout; +@@ -1723,6 +1744,7 @@ static int spi_nor_erase(struct mtd_info + ret = spi_nor_write_disable(nor); + + erase_err: ++ spi_nor_check_set_addr_width(nor, 0); + spi_nor_unlock_and_unprep(nor); + + return ret; +@@ -1915,6 +1937,10 @@ static int spi_nor_read(struct mtd_info + if (ret) + return ret; + ++ ret = spi_nor_check_set_addr_width(nor, from + len); ++ if (ret < 0) ++ return ret; ++ + while (len) { + loff_t addr = from; + +@@ -1938,6 +1964,7 @@ static int spi_nor_read(struct mtd_info + ret = 0; + + read_err: ++ spi_nor_check_set_addr_width(nor, 0); + spi_nor_unlock_and_unprep(nor); + return ret; + } +@@ -1960,6 +1987,10 @@ static int spi_nor_write(struct mtd_info + if (ret) + return ret; + ++ ret = spi_nor_check_set_addr_width(nor, to + len); ++ if (ret < 0) ++ return ret; ++ + for (i = 0; i < len; ) { + ssize_t written; + loff_t addr = to + i; +@@ -1999,6 +2030,7 @@ static int spi_nor_write(struct mtd_info + } + + write_err: ++ spi_nor_check_set_addr_width(nor, 0); + spi_nor_unlock_and_unprep(nor); + return ret; + } +@@ -2846,9 +2878,13 @@ static int spi_nor_init(struct spi_nor * + * reboots (e.g., crashes). Warn the user (or hopefully, system + * designer) that this is bad. + */ +- WARN_ONCE(nor->flags & SNOR_F_BROKEN_RESET, +- "enabling reset hack; may not recover from unexpected reboots\n"); +- nor->params->set_4byte_addr_mode(nor, true); ++ if (nor->flags & SNOR_F_BROKEN_RESET) { ++ dev_warn(nor->dev, ++ "enabling reset hack; may not recover from unexpected reboots\n"); ++ nor->addr_width = 3; ++ } else { ++ nor->params->set_4byte_addr_mode(nor, true); ++ } + } + + return 0; +--- a/drivers/mtd/spi-nor/swp.c ++++ b/drivers/mtd/spi-nor/swp.c +@@ -352,7 +352,9 @@ static int spi_nor_lock(struct mtd_info + if (ret) + return ret; + ++ spi_nor_check_set_addr_width(nor, ofs + len); + ret = nor->params->locking_ops->lock(nor, ofs, len); ++ spi_nor_check_set_addr_width(nor, 0); + + spi_nor_unlock_and_unprep(nor); + return ret; +@@ -367,7 +369,9 @@ static int spi_nor_unlock(struct mtd_inf + if (ret) + return ret; + ++ spi_nor_check_set_addr_width(nor, ofs + len); + ret = nor->params->locking_ops->unlock(nor, ofs, len); ++ spi_nor_check_set_addr_width(nor, 0); + + spi_nor_unlock_and_unprep(nor); + return ret; +@@ -382,7 +386,9 @@ static int spi_nor_is_locked(struct mtd_ + if (ret) + return ret; + ++ spi_nor_check_set_addr_width(nor, ofs + len); + ret = nor->params->locking_ops->is_locked(nor, ofs, len); ++ spi_nor_check_set_addr_width(nor, 0); + + spi_nor_unlock_and_unprep(nor); + return ret; +--- a/drivers/mtd/spi-nor/core.h ++++ b/drivers/mtd/spi-nor/core.h +@@ -496,6 +496,7 @@ extern const struct attribute_group *spi + void spi_nor_spimem_setup_op(const struct spi_nor *nor, + struct spi_mem_op *op, + const enum spi_nor_protocol proto); ++int spi_nor_check_set_addr_width(struct spi_nor *nor, loff_t addr); + int spi_nor_write_enable(struct spi_nor *nor); + int spi_nor_write_disable(struct spi_nor *nor); + int spi_nor_set_4byte_addr_mode(struct spi_nor *nor, bool enable); +--- a/drivers/mtd/spi-nor/sst.c ++++ b/drivers/mtd/spi-nor/sst.c +@@ -104,6 +104,10 @@ static int sst_write(struct mtd_info *mt + if (ret) + return ret; + ++ ret = spi_nor_check_set_addr_width(nor, to + len); ++ if (ret < 0) ++ return ret; ++ + ret = spi_nor_write_enable(nor); + if (ret) + goto out; +@@ -173,6 +177,7 @@ static int sst_write(struct mtd_info *mt + } + out: + *retlen += actual; ++ spi_nor_check_set_addr_width(nor, 0); + spi_nor_unlock_and_unprep(nor); + return ret; + } diff --git a/root/target/linux/generic/pending-5.15/483-mtd-spinand-add-support-for-xtx-xt26g0xa.patch b/root/target/linux/generic/pending-5.15/483-mtd-spinand-add-support-for-xtx-xt26g0xa.patch new file mode 100755 index 00000000..4672f8c8 --- /dev/null +++ b/root/target/linux/generic/pending-5.15/483-mtd-spinand-add-support-for-xtx-xt26g0xa.patch @@ -0,0 +1,178 @@ +From a07e31adf2753cad2fd9790db5bfc047c81e8152 Mon Sep 17 00:00:00 2001 +From: Felix Matouschek +Date: Fri, 2 Jul 2021 20:31:23 +0200 +Subject: [PATCH] mtd: spinand: Add support for XTX XT26G0xA + +Add support for XTX Technology XT26G01AXXXXX, XTX26G02AXXXXX and +XTX26G04AXXXXX SPI NAND. + +These are 3V, 1G/2G/4Gbit serial SLC NAND flash devices with on-die ECC +(8bit strength per 512bytes). + +Tested on Teltonika RUTX10 flashed with OpenWrt. + +Datasheets available at +http://www.xtxtech.com/download/?AId=225 +https://datasheet.lcsc.com/szlcsc/2005251034_XTX-XT26G01AWSEGA_C558841.pdf + +Signed-off-by: Felix Matouschek +--- + drivers/mtd/nand/spi/Makefile | 2 +- + drivers/mtd/nand/spi/core.c | 1 + + drivers/mtd/nand/spi/xtx.c | 122 ++++++++++++++++++++++++++++++++++ + include/linux/mtd/spinand.h | 1 + + 4 files changed, 125 insertions(+), 1 deletion(-) + create mode 100644 drivers/mtd/nand/spi/xtx.c + +--- a/drivers/mtd/nand/spi/Makefile ++++ b/drivers/mtd/nand/spi/Makefile +@@ -1,3 +1,3 @@ + # SPDX-License-Identifier: GPL-2.0 +-spinand-objs := core.o gigadevice.o macronix.o micron.o paragon.o toshiba.o winbond.o ++spinand-objs := core.o gigadevice.o macronix.o micron.o paragon.o toshiba.o winbond.o xtx.o + obj-$(CONFIG_MTD_SPI_NAND) += spinand.o +--- a/drivers/mtd/nand/spi/core.c ++++ b/drivers/mtd/nand/spi/core.c +@@ -902,6 +902,7 @@ static const struct spinand_manufacturer + ¶gon_spinand_manufacturer, + &toshiba_spinand_manufacturer, + &winbond_spinand_manufacturer, ++ &xtx_spinand_manufacturer, + }; + + static int spinand_manufacturer_match(struct spinand_device *spinand, +--- /dev/null ++++ b/drivers/mtd/nand/spi/xtx.c +@@ -0,0 +1,122 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Author: ++ * Felix Matouschek ++ */ ++ ++#include ++#include ++#include ++ ++#define SPINAND_MFR_XTX 0x0B ++ ++#define XT26G0XA_STATUS_ECC_MASK GENMASK(5, 2) ++#define XT26G0XA_STATUS_ECC_NO_DETECTED (0 << 2) ++#define XT26G0XA_STATUS_ECC_8_CORRECTED (3 << 4) ++#define XT26G0XA_STATUS_ECC_UNCOR_ERROR (2 << 4) ++ ++static SPINAND_OP_VARIANTS(read_cache_variants, ++ SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0), ++ SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), ++ SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0), ++ SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), ++ SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), ++ SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); ++ ++static SPINAND_OP_VARIANTS(write_cache_variants, ++ SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), ++ SPINAND_PROG_LOAD(true, 0, NULL, 0)); ++ ++static SPINAND_OP_VARIANTS(update_cache_variants, ++ SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), ++ SPINAND_PROG_LOAD(false, 0, NULL, 0)); ++ ++static int xt26g0xa_ooblayout_ecc(struct mtd_info *mtd, int section, ++ struct mtd_oob_region *region) ++{ ++ if (section) ++ return -ERANGE; ++ ++ region->offset = 8; ++ region->length = 40; ++ ++ return 0; ++} ++ ++static int xt26g0xa_ooblayout_free(struct mtd_info *mtd, int section, ++ struct mtd_oob_region *region) ++{ ++ if (section) ++ return -ERANGE; ++ ++ region->offset = 1; ++ region->length = 7; ++ ++ return 0; ++} ++ ++static const struct mtd_ooblayout_ops xt26g0xa_ooblayout = { ++ .ecc = xt26g0xa_ooblayout_ecc, ++ .free = xt26g0xa_ooblayout_free, ++}; ++ ++static int xt26g0xa_ecc_get_status(struct spinand_device *spinand, ++ u8 status) ++{ ++ switch (status & XT26G0XA_STATUS_ECC_MASK) { ++ case XT26G0XA_STATUS_ECC_NO_DETECTED: ++ return 0; ++ case XT26G0XA_STATUS_ECC_8_CORRECTED: ++ return 8; ++ case XT26G0XA_STATUS_ECC_UNCOR_ERROR: ++ return -EBADMSG; ++ default: /* (1 << 2) through (7 << 2) are 1-7 corrected errors */ ++ return (status & XT26G0XA_STATUS_ECC_MASK) >> 2; ++ } ++ ++ return -EINVAL; ++} ++ ++static const struct spinand_info xtx_spinand_table[] = { ++ SPINAND_INFO("XT26G01A", ++ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xE1), ++ NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), ++ NAND_ECCREQ(8, 512), ++ SPINAND_INFO_OP_VARIANTS(&read_cache_variants, ++ &write_cache_variants, ++ &update_cache_variants), ++ SPINAND_HAS_QE_BIT, ++ SPINAND_ECCINFO(&xt26g0xa_ooblayout, ++ xt26g0xa_ecc_get_status)), ++ SPINAND_INFO("XT26G02A", ++ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xE2), ++ NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 1, 1, 1), ++ NAND_ECCREQ(8, 512), ++ SPINAND_INFO_OP_VARIANTS(&read_cache_variants, ++ &write_cache_variants, ++ &update_cache_variants), ++ SPINAND_HAS_QE_BIT, ++ SPINAND_ECCINFO(&xt26g0xa_ooblayout, ++ xt26g0xa_ecc_get_status)), ++ SPINAND_INFO("XT26G04A", ++ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xE3), ++ NAND_MEMORG(1, 2048, 64, 128, 2048, 40, 1, 1, 1), ++ NAND_ECCREQ(8, 512), ++ SPINAND_INFO_OP_VARIANTS(&read_cache_variants, ++ &write_cache_variants, ++ &update_cache_variants), ++ SPINAND_HAS_QE_BIT, ++ SPINAND_ECCINFO(&xt26g0xa_ooblayout, ++ xt26g0xa_ecc_get_status)), ++}; ++ ++static const struct spinand_manufacturer_ops xtx_spinand_manuf_ops = { ++}; ++ ++const struct spinand_manufacturer xtx_spinand_manufacturer = { ++ .id = SPINAND_MFR_XTX, ++ .name = "XTX", ++ .chips = xtx_spinand_table, ++ .nchips = ARRAY_SIZE(xtx_spinand_table), ++ .ops = &xtx_spinand_manuf_ops, ++}; +--- a/include/linux/mtd/spinand.h ++++ b/include/linux/mtd/spinand.h +@@ -266,6 +266,7 @@ extern const struct spinand_manufacturer + extern const struct spinand_manufacturer paragon_spinand_manufacturer; + extern const struct spinand_manufacturer toshiba_spinand_manufacturer; + extern const struct spinand_manufacturer winbond_spinand_manufacturer; ++extern const struct spinand_manufacturer xtx_spinand_manufacturer; + + /** + * struct spinand_op_variants - SPI NAND operation variants diff --git a/root/target/linux/generic/pending-5.15/484-mtd-spi-nor-add-esmt-f25l16pa.patch b/root/target/linux/generic/pending-5.15/484-mtd-spi-nor-add-esmt-f25l16pa.patch new file mode 100755 index 00000000..b2f0b92a --- /dev/null +++ b/root/target/linux/generic/pending-5.15/484-mtd-spi-nor-add-esmt-f25l16pa.patch @@ -0,0 +1,11 @@ +--- a/drivers/mtd/spi-nor/esmt.c ++++ b/drivers/mtd/spi-nor/esmt.c +@@ -10,6 +10,8 @@ + + static const struct flash_info esmt_parts[] = { + /* ESMT */ ++ { "f25l16pa-2s", INFO(0x8c2115, 0, 64 * 1024, 32, ++ SECT_4K | SPI_NOR_HAS_LOCK) }, + { "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, + SECT_4K | SPI_NOR_HAS_LOCK | SPI_NOR_SWP_IS_VOLATILE) }, + { "f25l32qa", INFO(0x8c4116, 0, 64 * 1024, 64, diff --git a/root/target/linux/generic/pending-5.15/492-try-auto-mounting-ubi0-rootfs-in-init-do_mounts.c.patch b/root/target/linux/generic/pending-5.15/492-try-auto-mounting-ubi0-rootfs-in-init-do_mounts.c.patch new file mode 100755 index 00000000..c27aee8f --- /dev/null +++ b/root/target/linux/generic/pending-5.15/492-try-auto-mounting-ubi0-rootfs-in-init-do_mounts.c.patch @@ -0,0 +1,51 @@ +From: Daniel Golle +Subject: try auto-mounting ubi0:rootfs in init/do_mounts.c + +Signed-off-by: Daniel Golle +--- + init/do_mounts.c | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +--- a/init/do_mounts.c ++++ b/init/do_mounts.c +@@ -447,7 +447,28 @@ retry: + out: + put_page(page); + } +- ++ ++static int __init mount_ubi_rootfs(void) ++{ ++ int flags = MS_SILENT; ++ int err, tried = 0; ++ ++ while (tried < 2) { ++ err = do_mount_root("ubi0:rootfs", "ubifs", flags, \ ++ root_mount_data); ++ switch (err) { ++ case -EACCES: ++ flags |= MS_RDONLY; ++ tried++; ++ break; ++ default: ++ return err; ++ } ++ } ++ ++ return -EINVAL; ++} ++ + #ifdef CONFIG_ROOT_NFS + + #define NFSROOT_TIMEOUT_MIN 5 +@@ -580,6 +601,10 @@ void __init mount_root(void) + return; + } + #endif ++#ifdef CONFIG_MTD_ROOTFS_ROOT_DEV ++ if (!mount_ubi_rootfs()) ++ return; ++#endif + if (ROOT_DEV == 0 && root_device_name && root_fs_names) { + if (mount_nodev_root() == 0) + return; diff --git a/root/target/linux/generic/pending-5.15/497-mtd-mtdconcat-add-dt-driver-for-concat-devices.patch b/root/target/linux/generic/pending-5.15/497-mtd-mtdconcat-add-dt-driver-for-concat-devices.patch index 3f236228..32168015 100755 --- a/root/target/linux/generic/pending-5.15/497-mtd-mtdconcat-add-dt-driver-for-concat-devices.patch +++ b/root/target/linux/generic/pending-5.15/497-mtd-mtdconcat-add-dt-driver-for-concat-devices.patch @@ -43,7 +43,7 @@ Signed-off-by: Bernhard Frauendienst --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig -@@ -239,4 +239,6 @@ source "drivers/mtd/ubi/Kconfig" +@@ -241,4 +241,6 @@ source "drivers/mtd/ubi/Kconfig" source "drivers/mtd/hyperbus/Kconfig" diff --git a/root/target/linux/generic/pending-5.15/530-jffs2_make_lzma_available.patch b/root/target/linux/generic/pending-5.15/530-jffs2_make_lzma_available.patch index cf2ab71d..01aba80e 100755 --- a/root/target/linux/generic/pending-5.15/530-jffs2_make_lzma_available.patch +++ b/root/target/linux/generic/pending-5.15/530-jffs2_make_lzma_available.patch @@ -1087,7 +1087,7 @@ Signed-off-by: Alexandros C. Couloumbis #define JFFS2_NODE_ACCURATE 0x2000 --- a/lib/Kconfig +++ b/lib/Kconfig -@@ -330,6 +330,12 @@ config ZSTD_DECOMPRESS +@@ -336,6 +336,12 @@ config ZSTD_DECOMPRESS source "lib/xz/Kconfig" diff --git a/root/target/linux/generic/pending-5.15/600-netfilter_conntrack_flush.patch b/root/target/linux/generic/pending-5.15/600-netfilter_conntrack_flush.patch index f4b815c8..a88e3d7d 100755 --- a/root/target/linux/generic/pending-5.15/600-netfilter_conntrack_flush.patch +++ b/root/target/linux/generic/pending-5.15/600-netfilter_conntrack_flush.patch @@ -17,7 +17,7 @@ Signed-off-by: Felix Fietkau #include #ifdef CONFIG_SYSCTL #include -@@ -459,6 +460,56 @@ static int ct_cpu_seq_show(struct seq_fi +@@ -462,6 +463,56 @@ static int ct_cpu_seq_show(struct seq_fi return 0; } @@ -74,7 +74,7 @@ Signed-off-by: Felix Fietkau static const struct seq_operations ct_cpu_seq_ops = { .start = ct_cpu_seq_start, .next = ct_cpu_seq_next, -@@ -472,8 +523,9 @@ static int nf_conntrack_standalone_init_ +@@ -475,8 +526,9 @@ static int nf_conntrack_standalone_init_ kuid_t root_uid; kgid_t root_gid; diff --git a/root/target/linux/generic/pending-5.15/613-netfilter_optional_tcp_window_check.patch b/root/target/linux/generic/pending-5.15/613-netfilter_optional_tcp_window_check.patch index 53abd9f7..249c3c8a 100755 --- a/root/target/linux/generic/pending-5.15/613-netfilter_optional_tcp_window_check.patch +++ b/root/target/linux/generic/pending-5.15/613-netfilter_optional_tcp_window_check.patch @@ -8,62 +8,42 @@ Signed-off-by: Felix Fietkau --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c -@@ -31,6 +31,9 @@ - #include - #include - -+/* Do not check the TCP window for incoming packets */ -+static int nf_ct_tcp_no_window_check __read_mostly = 1; -+ - /* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR */ - -@@ -465,6 +468,9 @@ static bool tcp_in_window(struct nf_conn +@@ -465,6 +465,9 @@ static bool tcp_in_window(struct nf_conn s32 receiver_offset; bool res, in_recv_win; -+ if (nf_ct_tcp_no_window_check) ++ if (net->ct.sysctl_no_window_check) + return true; + /* * Get the required data from the packet. */ -@@ -1151,7 +1157,7 @@ int nf_conntrack_tcp_packet(struct nf_co +@@ -1151,7 +1154,7 @@ int nf_conntrack_tcp_packet(struct nf_co IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; - else if (ct->proto.tcp.last_win == 0 && -+ else if (!nf_ct_tcp_no_window_check && ct->proto.tcp.last_win == 0 && ++ else if (!net->ct.sysctl_no_window_check && ct->proto.tcp.last_win == 0 && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c -@@ -25,6 +25,9 @@ - #include - #include - -+/* Do not check the TCP window for incoming packets */ -+static int nf_ct_tcp_no_window_check __read_mostly = 1; -+ - static bool enable_hooks __read_mostly; - MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks"); - module_param(enable_hooks, bool, 0000); -@@ -665,6 +668,7 @@ enum nf_ct_sysctl_index { - NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, +@@ -671,6 +671,7 @@ enum nf_ct_sysctl_index { + NF_SYSCTL_CT_LWTUNNEL, #endif + NF_SYSCTL_CT_PROTO_TCP_NO_WINDOW_CHECK, __NF_SYSCTL_CT_LAST_SYSCTL, }; -@@ -1011,6 +1015,13 @@ static struct ctl_table nf_ct_sysctl_tab - .proc_handler = proc_dointvec_jiffies, +@@ -1026,6 +1027,13 @@ static struct ctl_table nf_ct_sysctl_tab + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, }, #endif + [NF_SYSCTL_CT_PROTO_TCP_NO_WINDOW_CHECK] = { + .procname = "nf_conntrack_tcp_no_window_check", -+ .data = &nf_ct_tcp_no_window_check, ++ .data = &init_net.ct.sysctl_no_window_check, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, @@ -71,3 +51,29 @@ Signed-off-by: Felix Fietkau {} }; +@@ -1153,6 +1161,7 @@ static int nf_conntrack_standalone_init_ + #ifdef CONFIG_NF_CONNTRACK_EVENTS + table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; + #endif ++ table[NF_SYSCTL_CT_PROTO_TCP_NO_WINDOW_CHECK].data = &net->ct.sysctl_no_window_check; + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + table[NF_SYSCTL_CT_TIMESTAMP].data = &net->ct.sysctl_tstamp; + #endif +@@ -1222,6 +1231,7 @@ static int nf_conntrack_pernet_init(stru + int ret; + + net->ct.sysctl_checksum = 1; ++ net->ct.sysctl_no_window_check = 1; + + ret = nf_conntrack_standalone_init_sysctl(net); + if (ret < 0) +--- a/include/net/netns/conntrack.h ++++ b/include/net/netns/conntrack.h +@@ -109,6 +109,7 @@ struct netns_ct { + u8 sysctl_auto_assign_helper; + u8 sysctl_tstamp; + u8 sysctl_checksum; ++ u8 sysctl_no_window_check; + + struct ct_pcpu __percpu *pcpu_lists; + struct ip_conntrack_stat __percpu *stat; diff --git a/root/target/linux/generic/pending-5.15/630-packet_socket_type.patch b/root/target/linux/generic/pending-5.15/630-packet_socket_type.patch index 92db60b8..7f646b5a 100755 --- a/root/target/linux/generic/pending-5.15/630-packet_socket_type.patch +++ b/root/target/linux/generic/pending-5.15/630-packet_socket_type.patch @@ -30,7 +30,7 @@ Signed-off-by: Felix Fietkau #define PACKET_FANOUT_LB 1 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c -@@ -1820,6 +1820,7 @@ static int packet_rcv_spkt(struct sk_buf +@@ -1821,6 +1821,7 @@ static int packet_rcv_spkt(struct sk_buf { struct sock *sk; struct sockaddr_pkt *spkt; @@ -38,7 +38,7 @@ Signed-off-by: Felix Fietkau /* * When we registered the protocol we saved the socket in the data -@@ -1827,6 +1828,7 @@ static int packet_rcv_spkt(struct sk_buf +@@ -1828,6 +1829,7 @@ static int packet_rcv_spkt(struct sk_buf */ sk = pt->af_packet_priv; @@ -46,7 +46,7 @@ Signed-off-by: Felix Fietkau /* * Yank back the headers [hope the device set this -@@ -1839,7 +1841,7 @@ static int packet_rcv_spkt(struct sk_buf +@@ -1840,7 +1842,7 @@ static int packet_rcv_spkt(struct sk_buf * so that this procedure is noop. */ @@ -55,7 +55,7 @@ Signed-off-by: Felix Fietkau goto out; if (!net_eq(dev_net(dev), sock_net(sk))) -@@ -2077,12 +2079,12 @@ static int packet_rcv(struct sk_buff *sk +@@ -2078,12 +2080,12 @@ static int packet_rcv(struct sk_buff *sk unsigned int snaplen, res; bool is_drop_n_account = false; @@ -71,7 +71,7 @@ Signed-off-by: Felix Fietkau if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; -@@ -2208,12 +2210,12 @@ static int tpacket_rcv(struct sk_buff *s +@@ -2209,12 +2211,12 @@ static int tpacket_rcv(struct sk_buff *s BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); @@ -87,7 +87,7 @@ Signed-off-by: Felix Fietkau if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; -@@ -3320,6 +3322,7 @@ static int packet_create(struct net *net +@@ -3318,6 +3320,7 @@ static int packet_create(struct net *net mutex_init(&po->pg_vec_lock); po->rollover = NULL; po->prot_hook.func = packet_rcv; @@ -95,7 +95,7 @@ Signed-off-by: Felix Fietkau if (sock->type == SOCK_PACKET) po->prot_hook.func = packet_rcv_spkt; -@@ -3953,6 +3956,16 @@ packet_setsockopt(struct socket *sock, i +@@ -3951,6 +3954,16 @@ packet_setsockopt(struct socket *sock, i po->xmit = val ? packet_direct_xmit : dev_queue_xmit; return 0; } @@ -112,7 +112,7 @@ Signed-off-by: Felix Fietkau default: return -ENOPROTOOPT; } -@@ -4009,6 +4022,13 @@ static int packet_getsockopt(struct sock +@@ -4007,6 +4020,13 @@ static int packet_getsockopt(struct sock case PACKET_VNET_HDR: val = po->has_vnet_hdr; break; diff --git a/root/target/linux/generic/pending-5.15/655-increase_skb_pad.patch b/root/target/linux/generic/pending-5.15/655-increase_skb_pad.patch index c7d35f20..98d9afb6 100755 --- a/root/target/linux/generic/pending-5.15/655-increase_skb_pad.patch +++ b/root/target/linux/generic/pending-5.15/655-increase_skb_pad.patch @@ -9,7 +9,7 @@ Signed-off-by: Felix Fietkau --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h -@@ -2703,7 +2703,7 @@ static inline int pskb_network_may_pull( +@@ -2724,7 +2724,7 @@ static inline int pskb_network_may_pull( * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD diff --git a/root/target/linux/generic/pending-5.15/666-Add-support-for-MAP-E-FMRs-mesh-mode.patch b/root/target/linux/generic/pending-5.15/666-Add-support-for-MAP-E-FMRs-mesh-mode.patch index 45c95c21..09efa1e7 100755 --- a/root/target/linux/generic/pending-5.15/666-Add-support-for-MAP-E-FMRs-mesh-mode.patch +++ b/root/target/linux/generic/pending-5.15/666-Add-support-for-MAP-E-FMRs-mesh-mode.patch @@ -361,7 +361,7 @@ Signed-off-by: Steven Barth memcpy(p->name, u->name, sizeof(u->name)); } -@@ -1948,6 +2118,15 @@ static int ip6_tnl_validate(struct nlatt +@@ -1949,6 +2119,15 @@ static int ip6_tnl_validate(struct nlatt return 0; } @@ -377,7 +377,7 @@ Signed-off-by: Steven Barth static void ip6_tnl_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { -@@ -1985,6 +2164,46 @@ static void ip6_tnl_netlink_parms(struct +@@ -1986,6 +2165,46 @@ static void ip6_tnl_netlink_parms(struct if (data[IFLA_IPTUN_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); @@ -424,7 +424,7 @@ Signed-off-by: Steven Barth } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], -@@ -2100,6 +2319,12 @@ static void ip6_tnl_dellink(struct net_d +@@ -2101,6 +2320,12 @@ static void ip6_tnl_dellink(struct net_d static size_t ip6_tnl_get_size(const struct net_device *dev) { @@ -437,7 +437,7 @@ Signed-off-by: Steven Barth return /* IFLA_IPTUN_LINK */ nla_total_size(4) + -@@ -2129,6 +2354,24 @@ static size_t ip6_tnl_get_size(const str +@@ -2130,6 +2355,24 @@ static size_t ip6_tnl_get_size(const str nla_total_size(0) + /* IFLA_IPTUN_FWMARK */ nla_total_size(4) + @@ -462,7 +462,7 @@ Signed-off-by: Steven Barth 0; } -@@ -2136,6 +2379,9 @@ static int ip6_tnl_fill_info(struct sk_b +@@ -2137,6 +2380,9 @@ static int ip6_tnl_fill_info(struct sk_b { struct ip6_tnl *tunnel = netdev_priv(dev); struct __ip6_tnl_parm *parm = &tunnel->parms; @@ -472,7 +472,7 @@ Signed-off-by: Steven Barth if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || -@@ -2145,9 +2391,27 @@ static int ip6_tnl_fill_info(struct sk_b +@@ -2146,9 +2392,27 @@ static int ip6_tnl_fill_info(struct sk_b nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || @@ -501,7 +501,7 @@ Signed-off-by: Steven Barth if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || -@@ -2187,6 +2451,7 @@ static const struct nla_policy ip6_tnl_p +@@ -2188,6 +2452,7 @@ static const struct nla_policy ip6_tnl_p [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, diff --git a/root/target/linux/generic/pending-5.15/670-ipv6-allow-rejecting-with-source-address-failed-policy.patch b/root/target/linux/generic/pending-5.15/670-ipv6-allow-rejecting-with-source-address-failed-policy.patch index 9a398de6..81a9b38e 100755 --- a/root/target/linux/generic/pending-5.15/670-ipv6-allow-rejecting-with-source-address-failed-policy.patch +++ b/root/target/linux/generic/pending-5.15/670-ipv6-allow-rejecting-with-source-address-failed-policy.patch @@ -20,7 +20,7 @@ Signed-off-by: Jonas Gorski --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h -@@ -82,6 +82,7 @@ struct netns_ipv6 { +@@ -84,6 +84,7 @@ struct netns_ipv6 { unsigned int fib6_routes_require_src; #endif struct rt6_info *ip6_prohibit_entry; diff --git a/root/target/linux/generic/pending-5.15/680-NET-skip-GRO-for-foreign-MAC-addresses.patch b/root/target/linux/generic/pending-5.15/680-NET-skip-GRO-for-foreign-MAC-addresses.patch index e621bb90..2fcfce82 100755 --- a/root/target/linux/generic/pending-5.15/680-NET-skip-GRO-for-foreign-MAC-addresses.patch +++ b/root/target/linux/generic/pending-5.15/680-NET-skip-GRO-for-foreign-MAC-addresses.patch @@ -11,7 +11,7 @@ Signed-off-by: Felix Fietkau --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -2051,6 +2051,8 @@ struct net_device { +@@ -2068,6 +2068,8 @@ struct net_device { struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; @@ -22,17 +22,17 @@ Signed-off-by: Felix Fietkau #endif --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h -@@ -870,6 +870,7 @@ struct sk_buff { - #ifdef CONFIG_TLS_DEVICE - __u8 decrypted:1; +@@ -852,6 +852,7 @@ struct sk_buff { + #ifdef CONFIG_IPV6_NDISC_NODETYPE + __u8 ndisc_nodetype:2; #endif + __u8 gro_skip:1; - #ifdef CONFIG_NET_SCHED - __u16 tc_index; /* traffic control index */ + __u8 ipvs_property:1; + __u8 inner_protocol_type:1; --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -6108,6 +6108,9 @@ static enum gro_result dev_gro_receive(s +@@ -6051,6 +6051,9 @@ static enum gro_result dev_gro_receive(s int same_flow; int grow; @@ -42,7 +42,7 @@ Signed-off-by: Felix Fietkau if (netif_elide_gro(skb->dev)) goto normal; -@@ -8118,6 +8121,48 @@ static void __netdev_adjacent_dev_unlink +@@ -8065,6 +8068,48 @@ static void __netdev_adjacent_dev_unlink &upper_dev->adj_list.lower); } @@ -91,7 +91,7 @@ Signed-off-by: Felix Fietkau static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info, -@@ -8169,6 +8214,7 @@ static int __netdev_upper_dev_link(struc +@@ -8116,6 +8161,7 @@ static int __netdev_upper_dev_link(struc if (ret) return ret; @@ -99,7 +99,7 @@ Signed-off-by: Felix Fietkau ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); -@@ -8265,6 +8311,7 @@ static void __netdev_upper_dev_unlink(st +@@ -8212,6 +8258,7 @@ static void __netdev_upper_dev_unlink(st __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); @@ -107,7 +107,7 @@ Signed-off-by: Felix Fietkau call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); -@@ -9084,6 +9131,7 @@ int dev_set_mac_address(struct net_devic +@@ -9031,6 +9078,7 @@ int dev_set_mac_address(struct net_devic if (err) return err; dev->addr_assign_type = NET_ADDR_SET; @@ -117,7 +117,7 @@ Signed-off-by: Felix Fietkau return 0; --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c -@@ -144,6 +144,18 @@ u32 eth_get_headlen(const struct net_dev +@@ -142,6 +142,18 @@ u32 eth_get_headlen(const struct net_dev } EXPORT_SYMBOL(eth_get_headlen); @@ -136,7 +136,7 @@ Signed-off-by: Felix Fietkau /** * eth_type_trans - determine the packet's protocol ID. * @skb: received socket data -@@ -175,6 +187,10 @@ __be16 eth_type_trans(struct sk_buff *sk +@@ -173,6 +185,10 @@ __be16 eth_type_trans(struct sk_buff *sk } else { skb->pkt_type = PACKET_OTHERHOST; } diff --git a/root/target/linux/generic/pending-5.15/682-of_net-add-mac-address-increment-support.patch b/root/target/linux/generic/pending-5.15/682-of_net-add-mac-address-increment-support.patch index eb295b17..43a06a23 100755 --- a/root/target/linux/generic/pending-5.15/682-of_net-add-mac-address-increment-support.patch +++ b/root/target/linux/generic/pending-5.15/682-of_net-add-mac-address-increment-support.patch @@ -1,7 +1,7 @@ -From 639dba857aa554f2a78572adc4cf3c32de9ec2e2 Mon Sep 17 00:00:00 2001 +From 844c273286f328acf0dab5fbd5d864366b4904dc Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Tue, 30 Mar 2021 18:21:14 +0200 -Subject: [PATCH 2/2] of_net: add mac-address-increment support +Subject: [PATCH] of_net: add mac-address-increment support Lots of embedded devices use the mac-address of other interface extracted from nvmem cells and increments it by one or two. Add two @@ -15,28 +15,30 @@ early has to be increased. Signed-off-by: Ansuel Smith --- - drivers/of/of_net.c | 59 ++++++++++++++++++++++++++++++++++----------- - 1 file changed, 45 insertions(+), 14 deletions(-) + drivers/of/of_net.c | 43 +++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 39 insertions(+), 4 deletions(-) --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c -@@ -170,31 +170,56 @@ static int of_get_mac_address_mtd(struct - * If a mtd-mac-address property exists, try to fetch the MAC address from the - * specified mtd device. +@@ -119,27 +119,62 @@ static int of_get_mac_addr_nvmem(struct + * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists + * but is all zeros. * + * DT can tell the system to increment the mac-address after is extracted by + * using: + * - mac-address-increment-byte to decide what byte to increase + * (if not defined is increased the last byte) -+ * - mac-address-increment to decide how much to increase. The value will -+ * not overflow to other bytes if the increment is over 255. -+ * (example 00:01:02:03:04:ff + 1 == 00:01:02:03:04:00) ++ * - mac-address-increment to decide how much to increase. The value WILL ++ * overflow to other bytes if the increment is over 255 or the total ++ * increment will exceed 255 of the current byte. ++ * (example 00:01:02:03:04:ff + 1 == 00:01:02:03:05:00) ++ * (example 00:01:02:03:04:fe + 5 == 00:01:02:03:05:03) + * * Return: 0 on success and errno in case of error. */ int of_get_mac_address(struct device_node *np, u8 *addr) { -+ u32 inc_idx, mac_inc; ++ u32 inc_idx, mac_inc, mac_val; int ret; + /* Check first if the increment byte is present and valid. @@ -63,11 +65,6 @@ Signed-off-by: Ansuel Smith ret = of_get_mac_addr(np, "address", addr); if (!ret) - return 0; -+ goto found; - - ret = of_get_mac_address_mtd(np, addr); - if (!ret) -- return 0; + goto found; + + ret = of_get_mac_addr_nvmem(np, addr); @@ -75,8 +72,16 @@ Signed-off-by: Ansuel Smith + return ret; + +found: -+ if (!of_property_read_u32(np, "mac-address-increment", &mac_inc)) -+ addr[inc_idx] += mac_inc; ++ if (!of_property_read_u32(np, "mac-address-increment", &mac_inc)) { ++ /* Convert to a contiguous value */ ++ mac_val = (addr[3] << 16) + (addr[4] << 8) + addr[5]; ++ mac_val += mac_inc << 8 * (5-inc_idx); ++ ++ /* Apply the incremented value handling overflow case */ ++ addr[3] = (mac_val >> 16) & 0xff; ++ addr[4] = (mac_val >> 8) & 0xff; ++ addr[5] = (mac_val >> 0) & 0xff; ++ } - return of_get_mac_addr_nvmem(np, addr); + return ret; diff --git a/root/target/linux/generic/pending-5.15/683-of_net-add-mac-address-to-of-tree.patch b/root/target/linux/generic/pending-5.15/683-of_net-add-mac-address-to-of-tree.patch index 04df11f7..50bcf73c 100755 --- a/root/target/linux/generic/pending-5.15/683-of_net-add-mac-address-to-of-tree.patch +++ b/root/target/linux/generic/pending-5.15/683-of_net-add-mac-address-to-of-tree.patch @@ -1,7 +1,7 @@ --- a/drivers/of/of_net.c +++ b/drivers/of/of_net.c -@@ -141,6 +141,26 @@ static int of_get_mac_address_mtd(struct - return -EINVAL; +@@ -95,6 +95,27 @@ static int of_get_mac_addr_nvmem(struct + return 0; } +static int of_add_mac_address(struct device_node *np, u8* addr) @@ -24,12 +24,13 @@ + kfree(prop); + return -ENOMEM; +} - ++ /** * of_get_mac_address() -@@ -220,6 +240,7 @@ found: - if (!of_property_read_u32(np, "mac-address-increment", &mac_inc)) - addr[inc_idx] += mac_inc; + * @np: Caller's Device Node +@@ -175,6 +196,7 @@ found: + addr[5] = (mac_val >> 0) & 0xff; + } + of_add_mac_address(np, addr); return ret; diff --git a/root/target/linux/generic/pending-5.15/703-phy-add-detach-callback-to-struct-phy_driver.patch b/root/target/linux/generic/pending-5.15/703-phy-add-detach-callback-to-struct-phy_driver.patch index 016ed94a..723dbacd 100755 --- a/root/target/linux/generic/pending-5.15/703-phy-add-detach-callback-to-struct-phy_driver.patch +++ b/root/target/linux/generic/pending-5.15/703-phy-add-detach-callback-to-struct-phy_driver.patch @@ -11,7 +11,7 @@ Signed-off-by: Gabor Juhos --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c -@@ -1701,6 +1701,9 @@ void phy_detach(struct phy_device *phyde +@@ -1715,6 +1715,9 @@ void phy_detach(struct phy_device *phyde struct module *ndev_owner = NULL; struct mii_bus *bus; diff --git a/root/target/linux/generic/pending-5.15/710-bridge-add-knob-for-filtering-rx-tx-BPDU-pack.patch b/root/target/linux/generic/pending-5.15/710-bridge-add-knob-for-filtering-rx-tx-BPDU-pack.patch new file mode 100755 index 00000000..70e45d13 --- /dev/null +++ b/root/target/linux/generic/pending-5.15/710-bridge-add-knob-for-filtering-rx-tx-BPDU-pack.patch @@ -0,0 +1,174 @@ +From: Felix Fietkau +Date: Fri, 27 Aug 2021 12:22:32 +0200 +Subject: [PATCH] bridge: add knob for filtering rx/tx BPDU packets on a port + +Some devices (e.g. wireless APs) can't have devices behind them be part of +a bridge topology with redundant links, due to address limitations. +Additionally, broadcast traffic on these devices is somewhat expensive, due to +the low data rate and wakeups of clients in powersave mode. +This knob can be used to ensure that BPDU packets are never sent or forwarded +to/from these devices + +Signed-off-by: Felix Fietkau +--- + +--- a/include/linux/if_bridge.h ++++ b/include/linux/if_bridge.h +@@ -58,6 +58,7 @@ struct br_ip_list { + #define BR_MRP_LOST_CONT BIT(18) + #define BR_MRP_LOST_IN_CONT BIT(19) + #define BR_TX_FWD_OFFLOAD BIT(20) ++#define BR_BPDU_FILTER BIT(21) + + #define BR_DEFAULT_AGEING_TIME (300 * HZ) + +--- a/net/bridge/br_forward.c ++++ b/net/bridge/br_forward.c +@@ -199,6 +199,7 @@ out: + void br_flood(struct net_bridge *br, struct sk_buff *skb, + enum br_pkt_type pkt_type, bool local_rcv, bool local_orig) + { ++ const unsigned char *dest = eth_hdr(skb)->h_dest; + struct net_bridge_port *prev = NULL; + struct net_bridge_port *p; + +@@ -214,6 +215,10 @@ void br_flood(struct net_bridge *br, str + case BR_PKT_MULTICAST: + if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev) + continue; ++ if ((p->flags & BR_BPDU_FILTER) && ++ unlikely(is_link_local_ether_addr(dest) && ++ dest[5] == 0)) ++ continue; + break; + case BR_PKT_BROADCAST: + if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev) +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -319,6 +319,8 @@ static rx_handler_result_t br_handle_fra + fwd_mask |= p->group_fwd_mask; + switch (dest[5]) { + case 0x00: /* Bridge Group Address */ ++ if (p->flags & BR_BPDU_FILTER) ++ goto drop; + /* If STP is turned off, + then must forward to keep loop detection */ + if (p->br->stp_enabled == BR_NO_STP || +--- a/net/bridge/br_sysfs_if.c ++++ b/net/bridge/br_sysfs_if.c +@@ -240,6 +240,7 @@ BRPORT_ATTR_FLAG(multicast_flood, BR_MCA + BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); + BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS); + BRPORT_ATTR_FLAG(isolated, BR_ISOLATED); ++BRPORT_ATTR_FLAG(bpdu_filter, BR_BPDU_FILTER); + + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) +@@ -292,6 +293,7 @@ static const struct brport_attribute *br + &brport_attr_group_fwd_mask, + &brport_attr_neigh_suppress, + &brport_attr_isolated, ++ &brport_attr_bpdu_filter, + &brport_attr_backup_port, + NULL + }; +--- a/net/bridge/br_stp_bpdu.c ++++ b/net/bridge/br_stp_bpdu.c +@@ -80,7 +80,8 @@ void br_send_config_bpdu(struct net_brid + { + unsigned char buf[35]; + +- if (p->br->stp_enabled != BR_KERNEL_STP) ++ if (p->br->stp_enabled != BR_KERNEL_STP || ++ (p->flags & BR_BPDU_FILTER)) + return; + + buf[0] = 0; +@@ -127,7 +128,8 @@ void br_send_tcn_bpdu(struct net_bridge_ + { + unsigned char buf[4]; + +- if (p->br->stp_enabled != BR_KERNEL_STP) ++ if (p->br->stp_enabled != BR_KERNEL_STP || ++ (p->flags & BR_BPDU_FILTER)) + return; + + buf[0] = 0; +@@ -172,6 +174,9 @@ void br_stp_rcv(const struct stp_proto * + if (!(br->dev->flags & IFF_UP)) + goto out; + ++ if (p->flags & BR_BPDU_FILTER) ++ goto out; ++ + if (p->state == BR_STATE_DISABLED) + goto out; + +--- a/include/uapi/linux/if_link.h ++++ b/include/uapi/linux/if_link.h +@@ -536,6 +536,7 @@ enum { + IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, ++ IFLA_BRPORT_BPDU_FILTER, + __IFLA_BRPORT_MAX + }; + #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -184,6 +184,7 @@ static inline size_t br_port_info_size(v + + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ + + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */ + + nla_total_size(1) /* IFLA_BRPORT_ISOLATED */ ++ + nla_total_size(1) /* IFLA_BRPORT_BPDU_FILTER */ + + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ +@@ -269,7 +270,8 @@ static int br_port_fill_attrs(struct sk_ + BR_MRP_LOST_CONT)) || + nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN, + !!(p->flags & BR_MRP_LOST_IN_CONT)) || +- nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED))) ++ nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) || ++ nla_put_u8(skb, IFLA_BRPORT_BPDU_FILTER, !!(p->flags & BR_BPDU_FILTER))) + return -EMSGSIZE; + + timerval = br_timer_value(&p->message_age_timer); +@@ -829,6 +831,7 @@ static const struct nla_policy br_port_p + [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 }, + [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 }, + [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 }, ++ [IFLA_BRPORT_BPDU_FILTER] = { .type = NLA_U8 }, + }; + + /* Change the state of the port and notify spanning tree */ +@@ -893,6 +896,7 @@ static int br_setport(struct net_bridge_ + br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS); + br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED); ++ br_set_port_flag(p, tb, IFLA_BRPORT_BPDU_FILTER, BR_BPDU_FILTER); + + changed_mask = old_flags ^ p->flags; + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -55,7 +55,7 @@ + #include + + #define RTNL_MAX_TYPE 50 +-#define RTNL_SLAVE_MAX_TYPE 40 ++#define RTNL_SLAVE_MAX_TYPE 41 + + struct rtnl_link { + rtnl_doit_func doit; +@@ -4685,7 +4685,9 @@ int ndo_dflt_bridge_getlink(struct sk_bu + brport_nla_put_flag(skb, flags, mask, + IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD) || + brport_nla_put_flag(skb, flags, mask, +- IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD)) { ++ IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD) || ++ brport_nla_put_flag(skb, flags, mask, ++ IFLA_BRPORT_BPDU_FILTER, BR_BPDU_FILTER)) { + nla_nest_cancel(skb, protinfo); + goto nla_put_failure; + } diff --git a/root/target/linux/generic/pending-5.15/760-net-dsa-mv88e6xxx-fix-vlan-setup.patch b/root/target/linux/generic/pending-5.15/760-net-dsa-mv88e6xxx-fix-vlan-setup.patch index ecc3a384..6287d202 100755 --- a/root/target/linux/generic/pending-5.15/760-net-dsa-mv88e6xxx-fix-vlan-setup.patch +++ b/root/target/linux/generic/pending-5.15/760-net-dsa-mv88e6xxx-fix-vlan-setup.patch @@ -17,11 +17,11 @@ Signed-off-by: DENG Qingfang --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c -@@ -3027,6 +3027,7 @@ static int mv88e6xxx_setup(struct dsa_sw +@@ -3189,6 +3189,7 @@ static int mv88e6xxx_setup(struct dsa_sw chip->ds = ds; ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); + ds->configure_vlan_while_not_filtering = true; - mv88e6xxx_reg_lock(chip); - + /* Since virtual bridges are mapped in the PVT, the number we support + * depends on the physical switch topology. We need to let DSA figure diff --git a/root/target/linux/generic/pending-5.15/768-net-dsa-mv88e6xxx-Request-assisted-learning-on-CPU-port.patch b/root/target/linux/generic/pending-5.15/768-net-dsa-mv88e6xxx-Request-assisted-learning-on-CPU-port.patch index 83171f24..6ad9b449 100755 --- a/root/target/linux/generic/pending-5.15/768-net-dsa-mv88e6xxx-Request-assisted-learning-on-CPU-port.patch +++ b/root/target/linux/generic/pending-5.15/768-net-dsa-mv88e6xxx-Request-assisted-learning-on-CPU-port.patch @@ -17,7 +17,7 @@ Signed-off-by: Tobias Waldekranz --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c -@@ -6144,6 +6144,7 @@ static int mv88e6xxx_register_switch(str +@@ -6310,6 +6310,7 @@ static int mv88e6xxx_register_switch(str ds->ops = &mv88e6xxx_switch_ops; ds->ageing_time_min = chip->info->age_time_coeff; ds->ageing_time_max = chip->info->age_time_coeff * U8_MAX; diff --git a/root/target/linux/generic/pending-5.15/780-ARM-kirkwood-add-missing-linux-if_ether.h-for-ETH_AL.patch b/root/target/linux/generic/pending-5.15/780-ARM-kirkwood-add-missing-linux-if_ether.h-for-ETH_AL.patch new file mode 100755 index 00000000..fcf7892c --- /dev/null +++ b/root/target/linux/generic/pending-5.15/780-ARM-kirkwood-add-missing-linux-if_ether.h-for-ETH_AL.patch @@ -0,0 +1,61 @@ +From patchwork Thu Aug 5 22:23:30 2021 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +X-Patchwork-Submitter: Daniel Golle +X-Patchwork-Id: 12422209 +Date: Thu, 5 Aug 2021 23:23:30 +0100 +From: Daniel Golle +To: linux-arm-kernel@lists.infradead.org, netdev@vger.kernel.org, + linux-kernel@vger.kernel.org +Cc: "David S. Miller" , Andrew Lunn , + Michael Walle +Subject: [PATCH] ARM: kirkwood: add missing for ETH_ALEN +Message-ID: +MIME-Version: 1.0 +Content-Disposition: inline +X-BeenThere: linux-arm-kernel@lists.infradead.org +X-Mailman-Version: 2.1.34 +Precedence: list +List-Id: +List-Archive: +Sender: "linux-arm-kernel" + +After commit 83216e3988cd1 ("of: net: pass the dst buffer to +of_get_mac_address()") build fails for kirkwood as ETH_ALEN is not +defined. + +arch/arm/mach-mvebu/kirkwood.c: In function 'kirkwood_dt_eth_fixup': +arch/arm/mach-mvebu/kirkwood.c:87:13: error: 'ETH_ALEN' undeclared (first use in this function); did you mean 'ESTALE'? + u8 tmpmac[ETH_ALEN]; + ^~~~~~~~ + ESTALE +arch/arm/mach-mvebu/kirkwood.c:87:13: note: each undeclared identifier is reported only once for each function it appears in +arch/arm/mach-mvebu/kirkwood.c:87:6: warning: unused variable 'tmpmac' [-Wunused-variable] + u8 tmpmac[ETH_ALEN]; + ^~~~~~ +make[5]: *** [scripts/Makefile.build:262: arch/arm/mach-mvebu/kirkwood.o] Error 1 +make[5]: *** Waiting for unfinished jobs.... + +Add missing #include to fix this. + +Cc: David S. Miller +Cc: Andrew Lunn +Cc: Michael Walle +Reported-by: https://buildbot.openwrt.org/master/images/#/builders/56/builds/220/steps/44/logs/stdio +Fixes: 83216e3988cd1 ("of: net: pass the dst buffer to of_get_mac_address()") +Signed-off-by: Daniel Golle +--- + arch/arm/mach-mvebu/kirkwood.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm/mach-mvebu/kirkwood.c ++++ b/arch/arm/mach-mvebu/kirkwood.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/root/target/linux/generic/pending-5.15/800-bcma-get-SoC-device-struct-copy-its-DMA-params-to-th.patch b/root/target/linux/generic/pending-5.15/800-bcma-get-SoC-device-struct-copy-its-DMA-params-to-th.patch index 8ea307ea..478a2cb2 100755 --- a/root/target/linux/generic/pending-5.15/800-bcma-get-SoC-device-struct-copy-its-DMA-params-to-th.patch +++ b/root/target/linux/generic/pending-5.15/800-bcma-get-SoC-device-struct-copy-its-DMA-params-to-th.patch @@ -54,12 +54,14 @@ Signed-off-by: Rafał Miłecki void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) { +- device_initialize(&core->dev); + struct device *dev = &core->dev; + - device_initialize(&core->dev); ++ device_initialize(dev); core->dev.release = bcma_release_core_dev; core->dev.bus = &bcma_bus_type; - dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index); +- dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index); ++ dev_set_name(dev, "bcma%d:%d", bus->num, core->core_index); core->dev.parent = bus->dev; - if (bus->dev) + if (bus->dev) { diff --git a/root/target/linux/generic/pending-5.15/810-pci_disable_common_quirks.patch b/root/target/linux/generic/pending-5.15/810-pci_disable_common_quirks.patch index deddd188..165ae672 100755 --- a/root/target/linux/generic/pending-5.15/810-pci_disable_common_quirks.patch +++ b/root/target/linux/generic/pending-5.15/810-pci_disable_common_quirks.patch @@ -33,7 +33,7 @@ Signed-off-by: Gabor Juhos /* * The Mellanox Tavor device gives false positive parity errors. Disable * parity error reporting. -@@ -3312,6 +3313,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I +@@ -3351,6 +3352,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65f9, quirk_intel_mc_errata); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x65fa, quirk_intel_mc_errata); @@ -42,7 +42,7 @@ Signed-off-by: Gabor Juhos /* * Ivytown NTB BAR sizes are misreported by the hardware due to an erratum. * To work around this, query the size it should be configured to by the -@@ -3337,6 +3340,8 @@ static void quirk_intel_ntb(struct pci_d +@@ -3376,6 +3379,8 @@ static void quirk_intel_ntb(struct pci_d DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e08, quirk_intel_ntb); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0e0d, quirk_intel_ntb); @@ -51,7 +51,7 @@ Signed-off-by: Gabor Juhos /* * Some BIOS implementations leave the Intel GPU interrupts enabled, even * though no one is handling them (e.g., if the i915 driver is never -@@ -3375,6 +3380,8 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IN +@@ -3414,6 +3419,8 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IN DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0152, disable_igfx_irq); diff --git a/root/target/linux/generic/pending-5.15/811-pci_disable_usb_common_quirks.patch b/root/target/linux/generic/pending-5.15/811-pci_disable_usb_common_quirks.patch index 67406bac..9b0ce710 100755 --- a/root/target/linux/generic/pending-5.15/811-pci_disable_usb_common_quirks.patch +++ b/root/target/linux/generic/pending-5.15/811-pci_disable_usb_common_quirks.patch @@ -98,7 +98,7 @@ Signed-off-by: Felix Fietkau #endif /* __LINUX_USB_PCI_QUIRKS_H */ --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h -@@ -497,7 +497,14 @@ extern int usb_hcd_pci_probe(struct pci_ +@@ -495,7 +495,14 @@ extern int usb_hcd_pci_probe(struct pci_ extern void usb_hcd_pci_remove(struct pci_dev *dev); extern void usb_hcd_pci_shutdown(struct pci_dev *dev); diff --git a/root/target/linux/generic/pending-5.15/834-ledtrig-libata.patch b/root/target/linux/generic/pending-5.15/834-ledtrig-libata.patch index fddec74d..ee911e3c 100755 --- a/root/target/linux/generic/pending-5.15/834-ledtrig-libata.patch +++ b/root/target/linux/generic/pending-5.15/834-ledtrig-libata.patch @@ -45,7 +45,7 @@ Signed-off-by: Daniel Golle depends on ACPI --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c -@@ -650,6 +650,19 @@ u64 ata_tf_read_block(const struct ata_t +@@ -656,6 +656,19 @@ u64 ata_tf_read_block(const struct ata_t return block; } @@ -65,7 +65,7 @@ Signed-off-by: Daniel Golle /** * ata_build_rw_tf - Build ATA taskfile for given read/write request * @tf: Target ATA taskfile -@@ -4545,6 +4558,9 @@ struct ata_queued_cmd *ata_qc_new_init(s +@@ -4572,6 +4585,9 @@ struct ata_queued_cmd *ata_qc_new_init(s if (tag < 0) return NULL; } @@ -75,7 +75,7 @@ Signed-off-by: Daniel Golle qc = __ata_qc_from_tag(ap, tag); qc->tag = qc->hw_tag = tag; -@@ -5323,6 +5339,9 @@ struct ata_port *ata_port_alloc(struct a +@@ -5350,6 +5366,9 @@ struct ata_port *ata_port_alloc(struct a ap->stats.unhandled_irq = 1; ap->stats.idle_irq = 1; #endif @@ -85,7 +85,7 @@ Signed-off-by: Daniel Golle ata_sff_port_init(ap); return ap; -@@ -5358,6 +5377,12 @@ static void ata_host_release(struct kref +@@ -5385,6 +5404,12 @@ static void ata_host_release(struct kref kfree(ap->pmp_link); kfree(ap->slave_link); @@ -98,7 +98,7 @@ Signed-off-by: Daniel Golle kfree(ap); host->ports[i] = NULL; } -@@ -5764,7 +5789,23 @@ int ata_host_register(struct ata_host *h +@@ -5791,7 +5816,23 @@ int ata_host_register(struct ata_host *h host->ports[i]->print_id = atomic_inc_return(&ata_print_id); host->ports[i]->local_port_no = i + 1; } @@ -134,7 +134,7 @@ Signed-off-by: Daniel Golle /* * Define if arch has non-standard setup. This is a _PCI_ standard -@@ -883,6 +886,12 @@ struct ata_port { +@@ -888,6 +891,12 @@ struct ata_port { #ifdef CONFIG_ATA_ACPI struct ata_acpi_gtm __acpi_init_gtm; /* use ata_acpi_init_gtm() */ #endif diff --git a/root/target/linux/generic/pending-5.15/920-mangle_bootargs.patch b/root/target/linux/generic/pending-5.15/920-mangle_bootargs.patch index 3299151e..d3f3a1c6 100755 --- a/root/target/linux/generic/pending-5.15/920-mangle_bootargs.patch +++ b/root/target/linux/generic/pending-5.15/920-mangle_bootargs.patch @@ -13,7 +13,7 @@ Signed-off-by: Imre Kaloz --- a/init/Kconfig +++ b/init/Kconfig -@@ -1782,6 +1782,15 @@ config EMBEDDED +@@ -1805,6 +1805,15 @@ config EMBEDDED an embedded system so certain expert options are available for configuration. @@ -31,7 +31,7 @@ Signed-off-by: Imre Kaloz help --- a/init/main.c +++ b/init/main.c -@@ -604,6 +604,29 @@ static inline void setup_nr_cpu_ids(void +@@ -614,6 +614,29 @@ static inline void setup_nr_cpu_ids(void static inline void smp_prepare_cpus(unsigned int maxcpus) { } #endif @@ -61,7 +61,7 @@ Signed-off-by: Imre Kaloz /* * We need to store the untouched command line for future reference. * We also need to store the touched command line since the parameter -@@ -935,6 +958,7 @@ asmlinkage __visible void __init __no_sa +@@ -954,6 +977,7 @@ asmlinkage __visible void __init __no_sa pr_notice("%s", linux_banner); early_security_init(); setup_arch(&command_line); diff --git a/root/target/linux/generic/pending-5.15/930-qcom-qmi-helpers.patch b/root/target/linux/generic/pending-5.15/930-qcom-qmi-helpers.patch new file mode 100755 index 00000000..2f427742 --- /dev/null +++ b/root/target/linux/generic/pending-5.15/930-qcom-qmi-helpers.patch @@ -0,0 +1,11 @@ +--- a/drivers/soc/qcom/Kconfig ++++ b/drivers/soc/qcom/Kconfig +@@ -92,7 +92,7 @@ config QCOM_PDR_HELPERS + select QCOM_QMI_HELPERS + + config QCOM_QMI_HELPERS +- tristate ++ tristate "Qualcomm QMI Helpers" + depends on NET + + config QCOM_RMTFS_MEM