diff --git a/CREDITS b/CREDITS index b735b8c58102a..ca75f110edb6f 100644 --- a/CREDITS +++ b/CREDITS @@ -1987,6 +1987,7 @@ D: netfilter: TCP window tracking code D: netfilter: raw table D: netfilter: iprange match D: netfilter: new logging interfaces +D: netfilter: ipset D: netfilter: various other hacks S: Tata S: Hungary diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml index 23e92be33ac86..b12632545673b 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml @@ -40,6 +40,9 @@ properties: description: PHY regulator + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml index fc445ad5a1f1a..448e617cddc4c 100644 --- a/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml +++ b/Documentation/devicetree/bindings/net/altr,socfpga-stmmac.yaml @@ -140,6 +140,9 @@ properties: - description: offset of the control register - description: shift within the control register + power-domains: + maxItems: 1 + patternProperties: "^mdio[0-9]$": type: object diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml index 5c91716d1f21e..9c9cc3ef384da 100644 --- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml @@ -158,6 +158,9 @@ properties: interrupt-names: const: macirq + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml index 91e8cd1db67b8..c1b67cfa76d07 100644 --- a/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml +++ b/Documentation/devicetree/bindings/net/eswin,eic7700-eth.yaml @@ -83,6 +83,9 @@ properties: register - description: Offset of register controlling TX/RX clock delay + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml index 62c1da36a2b5a..e41851931b947 100644 --- a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml +++ b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml @@ -47,6 +47,9 @@ properties: interrupt-names: const: macirq + power-domains: + maxItems: 1 + required: - compatible - clocks diff --git a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml index c4f3224bad387..c9a131b8d8304 100644 --- a/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml +++ b/Documentation/devicetree/bindings/net/loongson,ls1b-gmac.yaml @@ -66,6 +66,9 @@ properties: - mii - rgmii-id + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml index 99001b940b836..49db18423dd80 100644 --- a/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml +++ b/Documentation/devicetree/bindings/net/loongson,ls1c-emac.yaml @@ -65,6 +65,9 @@ properties: - mii - rmii + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml index e5db346beca96..b240c76e7dd52 100644 --- a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml +++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml @@ -83,6 +83,9 @@ properties: description: To select RMII reference clock from external. + power-domains: + maxItems: 1 + required: - compatible - clocks diff --git a/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml b/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml index 05acd9bc76163..f61188ab0dbe3 100644 --- a/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/nxp,lpc1850-dwmac.yaml @@ -51,6 +51,9 @@ properties: items: - const: stmmaceth + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml index 2b8b74c5feec8..716407a750796 100644 --- a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml @@ -52,6 +52,9 @@ properties: - const: rx - const: ptp_ref + power-domains: + maxItems: 1 + required: - clocks - clock-names diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml index 423959cb928d9..ef520f8105773 100644 --- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml @@ -86,6 +86,9 @@ properties: phy-names: const: serdes + power-domains: + maxItems: 1 + required: - compatible - clocks diff --git a/Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml new file mode 100644 index 0000000000000..72bb764c0ca04 --- /dev/null +++ b/Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/qcom,sa8255p-ethqos.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Ethernet ETHQOS device (firmware managed) + +maintainers: + - Bjorn Andersson + - Konrad Dybcio + - Bartosz Golaszewski + +description: + dwmmac based Qualcomm ethernet devices which support Gigabit + ethernet (version v2.3.0 and onwards) with clocks, interconnects, etc. + managed by firmware + +allOf: + - $ref: snps,dwmac.yaml# + +properties: + compatible: + const: qcom,sa8255p-ethqos + + reg: + maxItems: 2 + + reg-names: + items: + - const: stmmaceth + - const: rgmii + + interrupts: + items: + - description: Combined signal for various interrupt events + - description: The interrupt that occurs when HW safety error triggered + + interrupt-names: + items: + - const: macirq + - const: sfty + + power-domains: + minItems: 3 + maxItems: 3 + + power-domain-names: + items: + - const: core + - const: mdio + - const: serdes + + iommus: + maxItems: 1 + + dma-coherent: true + +required: + - compatible + - reg-names + - power-domains + - power-domain-names + +unevaluatedProperties: false + +examples: + - | + #include + + ethernet: ethernet@7a80000 { + compatible = "qcom,sa8255p-ethqos"; + reg = <0x23040000 0x10000>, + <0x23056000 0x100>; + reg-names = "stmmaceth", "rgmii"; + + iommus = <&apps_smmu 0x120 0x7>; + + interrupts = , + ; + interrupt-names = "macirq", "sfty"; + + dma-coherent; + + snps,tso; + snps,pbl = <32>; + rx-fifo-depth = <16384>; + tx-fifo-depth = <16384>; + + phy-handle = <ðernet_phy>; + phy-mode = "2500base-x"; + + snps,mtl-rx-config = <&mtl_rx_setup1>; + snps,mtl-tx-config = <&mtl_tx_setup1>; + + power-domains = <&scmi8_pd 0>, <&scmi8_pd 1>, <&scmi8_dvfs 0>; + power-domain-names = "core", "mdio", "serdes"; + }; diff --git a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml index 16dd7a2631abf..ed0d10a19ca4c 100644 --- a/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml +++ b/Documentation/devicetree/bindings/net/renesas,rzn1-gmac.yaml @@ -44,6 +44,9 @@ properties: phandle pointing to a PCS sub-node compatible with renesas,rzn1-miic.yaml# + power-domains: + maxItems: 1 + required: - compatible diff --git a/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml b/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml index bd53ab300f500..bc5054b05f6d7 100644 --- a/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml +++ b/Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml @@ -123,6 +123,9 @@ properties: Documentation/devicetree/bindings/net/pcs/renesas,rzn1-miic.yaml# (Refer RZ/T2H portion in the DT-binding file) + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml index d17112527dab0..ef82ff2a2884d 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml @@ -121,6 +121,9 @@ properties: phy-supply: description: PHY regulator + power-domains: + maxItems: 1 + required: - compatible - clocks diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index dd3c72e8363e7..312d1bbc2ad10 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -71,6 +71,7 @@ properties: - loongson,ls7a-dwmac - nxp,s32g2-dwmac - qcom,qcs404-ethqos + - qcom,sa8255p-ethqos - qcom,sa8775p-ethqos - qcom,sc8280xp-ethqos - qcom,sm8150-ethqos @@ -180,7 +181,8 @@ properties: - const: ahb power-domains: - maxItems: 1 + minItems: 1 + maxItems: 3 mac-mode: $ref: ethernet-controller.yaml#/properties/phy-connection-type @@ -643,6 +645,7 @@ allOf: - ingenic,x1830-mac - ingenic,x2000-mac - qcom,qcs404-ethqos + - qcom,sa8255p-ethqos - qcom,sa8775p-ethqos - qcom,sc8280xp-ethqos - qcom,sm8150-ethqos diff --git a/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml index b89456f0ef830..e78cbf594c695 100644 --- a/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/sophgo,cv1800b-dwmac.yaml @@ -49,6 +49,9 @@ properties: reset-names: const: stmmaceth + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml index e8d3814db0e94..845e2c67d2003 100644 --- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml @@ -52,6 +52,9 @@ properties: interrupt-names: maxItems: 1 + power-domains: + maxItems: 1 + resets: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml index 313a153316612..8a68c6d7b5c6d 100644 --- a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml @@ -71,6 +71,9 @@ properties: The argument one is the offset of phy mode selection, the argument two is the shift of phy mode selection. + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml index 987254900d0da..29b878079ff0c 100644 --- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml @@ -121,6 +121,9 @@ properties: minItems: 1 maxItems: 2 + power-domains: + maxItems: 1 + required: - compatible - clocks diff --git a/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml b/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml index dd7481bb16e59..ad635529d676e 100644 --- a/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml +++ b/Documentation/devicetree/bindings/net/tesla,fsd-ethqos.yaml @@ -67,6 +67,9 @@ properties: - rgmii-rxid - rgmii-txid + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml index b3492a9aa4eff..c859f8bb5d582 100644 --- a/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml +++ b/Documentation/devicetree/bindings/net/thead,th1520-gmac.yaml @@ -78,6 +78,9 @@ properties: items: - const: macirq + power-domains: + maxItems: 1 + required: - clocks - clock-names diff --git a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml index f0f32e18fc855..efa39eab0256a 100644 --- a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml @@ -48,6 +48,9 @@ properties: interrupt-names: const: macirq + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/networking/caif/linux_caif.rst b/Documentation/networking/caif/linux_caif.rst index a0480862ab8ce..969e4a2af98f5 100644 --- a/Documentation/networking/caif/linux_caif.rst +++ b/Documentation/networking/caif/linux_caif.rst @@ -181,8 +181,8 @@ CAIF Core protocol. The IP Interface and CAIF socket have an instance of 'struct cflayer', just like the CAIF Core protocol stack. Net device and Socket implement the 'receive()' function defined by 'struct cflayer', just like the rest of the CAIF stack. In this way, transmit and -receive of packets is handled as by the rest of the layers: the 'dn->transmit()' -function is called in order to transmit data. +receive of packets is handled like the rest of the layers: the 'dn->transmit()' +function is called to transmit data. Configuration of Link Layer --------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index e7027fba97db2..0011035d2cf37 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18030,7 +18030,6 @@ F: drivers/net/ethernet/neterion/ NETFILTER M: Pablo Neira Ayuso -M: Jozsef Kadlecsik M: Florian Westphal R: Phil Sutter L: netfilter-devel@vger.kernel.org @@ -21431,6 +21430,7 @@ L: netdev@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/qcom,ethqos.yaml +F: Documentation/devicetree/bindings/net/qcom,sa8255p-ethqos.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c QUALCOMM FASTRPC DRIVER diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 79fa38ca954d1..652896207f2f0 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -2,7 +2,7 @@ CONFIG_WERROR=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_HZ_PERIODIC=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_BSD_PROCESS_ACCT=y @@ -190,19 +190,6 @@ CONFIG_HPET=y # CONFIG_HPET_MMAP is not set CONFIG_I2C_I801=y CONFIG_WATCHDOG=y -CONFIG_AGP=y -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -CONFIG_DRM=y -CONFIG_DRM_I915=y -CONFIG_DRM_VIRTIO_GPU=y -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_HRTIMER=y -CONFIG_SND_SEQUENCER=y -CONFIG_SND_SEQ_DUMMY=y -CONFIG_SND_HDA_INTEL=y -CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y CONFIG_HID_GYRATION=y CONFIG_HID_NTRIG=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7d7310cdf8b0a..d9e4feeb5a3d5 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -2,7 +2,7 @@ CONFIG_WERROR=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_HZ_PERIODIC=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_BSD_PROCESS_ACCT=y @@ -29,6 +29,7 @@ CONFIG_KALLSYMS_ALL=y CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_SMP=y +CONFIG_CPU_MITIGATIONS=n CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y @@ -187,19 +188,6 @@ CONFIG_HPET=y # CONFIG_HPET_MMAP is not set CONFIG_I2C_I801=y CONFIG_WATCHDOG=y -CONFIG_AGP=y -CONFIG_AGP_AMD64=y -CONFIG_AGP_INTEL=y -CONFIG_DRM=y -CONFIG_DRM_I915=y -CONFIG_DRM_VIRTIO_GPU=y -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_HRTIMER=y -CONFIG_SND_SEQUENCER=y -CONFIG_SND_SEQ_DUMMY=y -CONFIG_SND_HDA_INTEL=y -CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y CONFIG_HID_GYRATION=y CONFIG_HID_NTRIG=y diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 8ed3883ab8eef..ded09e94d296d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -4052,7 +4052,7 @@ static int btusb_probe(struct usb_interface *intf, return -ENODEV; } - data = devm_kzalloc(&intf->dev, sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -4075,8 +4075,10 @@ static int btusb_probe(struct usb_interface *intf, } } - if (!data->intr_ep || !data->bulk_tx_ep || !data->bulk_rx_ep) + if (!data->intr_ep || !data->bulk_tx_ep || !data->bulk_rx_ep) { + kfree(data); return -ENODEV; + } if (id->driver_info & BTUSB_AMP) { data->cmdreq_type = USB_TYPE_CLASS | 0x01; @@ -4131,8 +4133,10 @@ static int btusb_probe(struct usb_interface *intf, data->recv_acl = hci_recv_frame; hdev = hci_alloc_dev_priv(priv_size); - if (!hdev) + if (!hdev) { + kfree(data); return -ENOMEM; + } hdev->bus = HCI_USB; hci_set_drvdata(hdev, data); @@ -4406,6 +4410,7 @@ static int btusb_probe(struct usb_interface *intf, if (data->reset_gpio) gpiod_put(data->reset_gpio); hci_free_dev(hdev); + kfree(data); return err; } @@ -4454,6 +4459,7 @@ static void btusb_disconnect(struct usb_interface *intf) } hci_free_dev(hdev); + kfree(data); } #ifdef CONFIG_PM diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 3a1611008e48e..7fd4192302c94 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -555,7 +555,6 @@ source "drivers/clk/zynqmp/Kconfig" config CLK_KUNIT_TEST tristate "Basic Clock Framework Kunit Tests" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS select DTC help Kunit tests for the common clock framework. @@ -563,7 +562,6 @@ config CLK_KUNIT_TEST config CLK_FIXED_RATE_KUNIT_TEST tristate "Basic fixed rate clk type KUnit test" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS select DTC help KUnit tests for the basic fixed rate clk type. @@ -572,14 +570,12 @@ config CLK_GATE_KUNIT_TEST tristate "Basic gate type Kunit test" if !KUNIT_ALL_TESTS depends on KUNIT depends on !S390 - default KUNIT_ALL_TESTS help Kunit test for the basic clk gate type. config CLK_FD_KUNIT_TEST tristate "Basic fractional divider type Kunit test" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS help Kunit test for the clk-fractional-divider type. diff --git a/drivers/firewire/Kconfig b/drivers/firewire/Kconfig index a5f5e250223a1..8cddc84d4d236 100644 --- a/drivers/firewire/Kconfig +++ b/drivers/firewire/Kconfig @@ -21,7 +21,6 @@ config FIREWIRE config FIREWIRE_KUNIT_UAPI_TEST tristate "KUnit tests for layout of structure in UAPI" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests whether structures exposed to user space have expected layout. @@ -37,7 +36,6 @@ config FIREWIRE_KUNIT_UAPI_TEST config FIREWIRE_KUNIT_DEVICE_ATTRIBUTE_TEST tristate "KUnit tests for device attributes" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests for device attribute for node and unit. @@ -53,7 +51,6 @@ config FIREWIRE_KUNIT_DEVICE_ATTRIBUTE_TEST config FIREWIRE_KUNIT_PACKET_SERDES_TEST tristate "KUnit tests for packet serialization/deserialization" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests for packet serialization and deserialization. @@ -69,7 +66,6 @@ config FIREWIRE_KUNIT_PACKET_SERDES_TEST config FIREWIRE_KUNIT_SELF_ID_SEQUENCE_HELPER_TEST tristate "KUnit tests for helpers of self ID sequence" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests for helpers of self ID sequence. @@ -95,7 +91,6 @@ config FIREWIRE_OHCI config FIREWIRE_KUNIT_OHCI_SERDES_TEST tristate "KUnit tests for serialization/deserialization of data in buffers/registers" if !KUNIT_ALL_TESTS depends on FIREWIRE && KUNIT - default KUNIT_ALL_TESTS help This builds the KUnit tests to check serialization and deserialization of data in buffers and registers defined in 1394 OHCI specification. diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index e3c2e38b746df..ad7055f7e48d7 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -10,7 +10,6 @@ config FW_CS_DSP_KUNIT_TEST_UTILS config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS depends on KUNIT && REGMAP && FW_CS_DSP - default KUNIT_ALL_TESTS select FW_CS_DSP_KUNIT_TEST_UTILS help This builds KUnit tests for cs_dsp. diff --git a/drivers/fpga/tests/Kconfig b/drivers/fpga/tests/Kconfig index e4a64815f16d6..e8ded96191df9 100644 --- a/drivers/fpga/tests/Kconfig +++ b/drivers/fpga/tests/Kconfig @@ -1,7 +1,6 @@ config FPGA_KUNIT_TESTS tristate "KUnit test for the FPGA subsystem" if !KUNIT_ALL_TESTS depends on FPGA && FPGA_REGION && FPGA_BRIDGE && KUNIT=y - default KUNIT_ALL_TESTS help This builds unit tests for the FPGA subsystem diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index bb8c40be32503..5ba1e7f7e7c38 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -44,7 +44,6 @@ config DRM_VC4_KUNIT_TEST tristate "KUnit tests for VC4" if !KUNIT_ALL_TESTS depends on DRM_VC4 && KUNIT select DRM_KUNIT_TEST_HELPERS - default KUNIT_ALL_TESTS help This builds unit tests for the VC4 DRM/KMS driver. This option is not useful for distributions or general kernels, but only for kernel diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 01227c77f6d70..886657fa1c2a9 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -76,7 +76,6 @@ config DRM_XE_DEBUG_MEM config DRM_XE_KUNIT_TEST tristate "KUnit tests for the drm xe driver" if !KUNIT_ALL_TESTS depends on DRM_XE && KUNIT && DEBUG_FS - default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m help Choose this option to allow the driver to perform selftests under diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 920a64b66b25b..140d15a701365 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -1416,7 +1416,6 @@ config HID_KUNIT_TEST depends on KUNIT depends on HID_BATTERY_STRENGTH depends on HID_UCLOGIC - default KUNIT_ALL_TESTS help This builds unit tests for HID. This option is not useful for distributions or general kernels, but only for kernel diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index f483e0c124445..7089a982b876f 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ nldev.o restrack.o counters.o ib_core_uverbs.o \ - trace.o lag.o + trace.o lag.o frmr_pools.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/frmr_pools.c b/drivers/infiniband/core/frmr_pools.c new file mode 100644 index 0000000000000..c688529a16d8b --- /dev/null +++ b/drivers/infiniband/core/frmr_pools.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include "frmr_pools.h" + +#define FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS 60 + +static int push_handle_to_queue_locked(struct frmr_queue *queue, u32 handle) +{ + u32 tmp = queue->ci % NUM_HANDLES_PER_PAGE; + struct frmr_handles_page *page; + + if (queue->ci >= queue->num_pages * NUM_HANDLES_PER_PAGE) { + page = kzalloc(sizeof(*page), GFP_ATOMIC); + if (!page) + return -ENOMEM; + queue->num_pages++; + list_add_tail(&page->list, &queue->pages_list); + } else { + page = list_last_entry(&queue->pages_list, + struct frmr_handles_page, list); + } + + page->handles[tmp] = handle; + queue->ci++; + return 0; +} + +static u32 pop_handle_from_queue_locked(struct frmr_queue *queue) +{ + u32 tmp = (queue->ci - 1) % NUM_HANDLES_PER_PAGE; + struct frmr_handles_page *page; + u32 handle; + + page = list_last_entry(&queue->pages_list, struct frmr_handles_page, + list); + handle = page->handles[tmp]; + queue->ci--; + + if (!tmp) { + list_del(&page->list); + queue->num_pages--; + kfree(page); + } + + return handle; +} + +static bool pop_frmr_handles_page(struct ib_frmr_pool *pool, + struct frmr_queue *queue, + struct frmr_handles_page **page, u32 *count) +{ + spin_lock(&pool->lock); + if (list_empty(&queue->pages_list)) { + spin_unlock(&pool->lock); + return false; + } + + *page = list_first_entry(&queue->pages_list, struct frmr_handles_page, + list); + list_del(&(*page)->list); + queue->num_pages--; + + /* If this is the last page, count may be less than + * NUM_HANDLES_PER_PAGE. + */ + if (queue->ci >= NUM_HANDLES_PER_PAGE) + *count = NUM_HANDLES_PER_PAGE; + else + *count = queue->ci; + + queue->ci -= *count; + spin_unlock(&pool->lock); + return true; +} + +static void destroy_all_handles_in_queue(struct ib_device *device, + struct ib_frmr_pool *pool, + struct frmr_queue *queue) +{ + struct ib_frmr_pools *pools = device->frmr_pools; + struct frmr_handles_page *page; + u32 count; + + while (pop_frmr_handles_page(pool, queue, &page, &count)) { + pools->pool_ops->destroy_frmrs(device, page->handles, count); + kfree(page); + } +} + +static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool) +{ + struct ib_frmr_pools *pools = device->frmr_pools; + u32 total, to_destroy, destroyed = 0; + bool has_work = false; + u32 *handles; + u32 handle; + + spin_lock(&pool->lock); + total = pool->queue.ci + pool->inactive_queue.ci + pool->in_use; + if (total <= pool->pinned_handles) { + spin_unlock(&pool->lock); + return false; + } + + to_destroy = total - pool->pinned_handles; + + handles = kcalloc(to_destroy, sizeof(*handles), GFP_ATOMIC); + if (!handles) { + spin_unlock(&pool->lock); + return true; + } + + /* Destroy all excess handles in the inactive queue */ + while (pool->inactive_queue.ci && destroyed < to_destroy) { + handles[destroyed++] = pop_handle_from_queue_locked( + &pool->inactive_queue); + } + + /* Move all handles from regular queue to inactive queue */ + while (pool->queue.ci) { + handle = pop_handle_from_queue_locked(&pool->queue); + push_handle_to_queue_locked(&pool->inactive_queue, + handle); + has_work = true; + } + + spin_unlock(&pool->lock); + + if (destroyed) + pools->pool_ops->destroy_frmrs(device, handles, destroyed); + kfree(handles); + return has_work; +} + +static void pool_aging_work(struct work_struct *work) +{ + struct ib_frmr_pool *pool = container_of( + to_delayed_work(work), struct ib_frmr_pool, aging_work); + struct ib_frmr_pools *pools = pool->device->frmr_pools; + bool has_work = false; + + if (pool->pinned_handles) { + has_work = age_pinned_pool(pool->device, pool); + goto out; + } + + destroy_all_handles_in_queue(pool->device, pool, &pool->inactive_queue); + + /* Move all pages from regular queue to inactive queue */ + spin_lock(&pool->lock); + if (pool->queue.ci > 0) { + list_splice_tail_init(&pool->queue.pages_list, + &pool->inactive_queue.pages_list); + pool->inactive_queue.num_pages = pool->queue.num_pages; + pool->inactive_queue.ci = pool->queue.ci; + + pool->queue.num_pages = 0; + pool->queue.ci = 0; + has_work = true; + } + spin_unlock(&pool->lock); + +out: + /* Reschedule if there are handles to age in next aging period */ + if (has_work) + queue_delayed_work( + pools->aging_wq, &pool->aging_work, + secs_to_jiffies(READ_ONCE(pools->aging_period_sec))); +} + +static void destroy_frmr_pool(struct ib_device *device, + struct ib_frmr_pool *pool) +{ + cancel_delayed_work_sync(&pool->aging_work); + destroy_all_handles_in_queue(device, pool, &pool->queue); + destroy_all_handles_in_queue(device, pool, &pool->inactive_queue); + + rb_erase(&pool->node, &device->frmr_pools->rb_root); + kfree(pool); +} + +/* + * Initialize the FRMR pools for a device. + * + * @device: The device to initialize the FRMR pools for. + * @pool_ops: The pool operations to use. + * + * Returns 0 on success, negative error code on failure. + */ +int ib_frmr_pools_init(struct ib_device *device, + const struct ib_frmr_pool_ops *pool_ops) +{ + struct ib_frmr_pools *pools; + + pools = kzalloc(sizeof(*pools), GFP_KERNEL); + if (!pools) + return -ENOMEM; + + pools->rb_root = RB_ROOT; + rwlock_init(&pools->rb_lock); + pools->pool_ops = pool_ops; + pools->aging_wq = create_singlethread_workqueue("frmr_aging_wq"); + if (!pools->aging_wq) { + kfree(pools); + return -ENOMEM; + } + + pools->aging_period_sec = FRMR_POOLS_DEFAULT_AGING_PERIOD_SECS; + + device->frmr_pools = pools; + return 0; +} +EXPORT_SYMBOL(ib_frmr_pools_init); + +/* + * Clean up the FRMR pools for a device. + * + * @device: The device to clean up the FRMR pools for. + * + * Call cleanup only after all FRMR handles have been pushed back to the pool + * and no other FRMR operations are allowed to run in parallel. + * Ensuring this allows us to save synchronization overhead in pop and push + * operations. + */ +void ib_frmr_pools_cleanup(struct ib_device *device) +{ + struct ib_frmr_pools *pools = device->frmr_pools; + struct rb_node *node = rb_first(&pools->rb_root); + struct ib_frmr_pool *pool; + + while (node) { + struct rb_node *next = rb_next(node); + + pool = rb_entry(node, struct ib_frmr_pool, node); + destroy_frmr_pool(device, pool); + node = next; + } + + destroy_workqueue(pools->aging_wq); + kfree(pools); + device->frmr_pools = NULL; +} +EXPORT_SYMBOL(ib_frmr_pools_cleanup); + +int ib_frmr_pools_set_aging_period(struct ib_device *device, u32 period_sec) +{ + struct ib_frmr_pools *pools = device->frmr_pools; + struct ib_frmr_pool *pool; + struct rb_node *node; + + if (!pools) + return -EINVAL; + + if (period_sec == 0) + return -EINVAL; + + WRITE_ONCE(pools->aging_period_sec, period_sec); + + read_lock(&pools->rb_lock); + for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) { + pool = rb_entry(node, struct ib_frmr_pool, node); + mod_delayed_work(pools->aging_wq, &pool->aging_work, + secs_to_jiffies(period_sec)); + } + read_unlock(&pools->rb_lock); + + return 0; +} + +static int compare_keys(struct ib_frmr_key *key1, struct ib_frmr_key *key2) +{ + int res; + + res = key1->ats - key2->ats; + if (res) + return res; + + res = key1->access_flags - key2->access_flags; + if (res) + return res; + + res = key1->vendor_key - key2->vendor_key; + if (res) + return res; + + res = key1->kernel_vendor_key - key2->kernel_vendor_key; + if (res) + return res; + + /* + * allow using handles that support more DMA blocks, up to twice the + * requested number + */ + res = key1->num_dma_blocks - key2->num_dma_blocks; + if (res > 0 && res < key2->num_dma_blocks) + return 0; + + return res; +} + +static struct ib_frmr_pool *ib_frmr_pool_find(struct ib_frmr_pools *pools, + struct ib_frmr_key *key) +{ + struct rb_node *node = pools->rb_root.rb_node; + struct ib_frmr_pool *pool; + int cmp; + + /* find operation is done under read lock for performance reasons. + * The case of threads failing to find the same pool and creating it + * is handled by the create_frmr_pool function. + */ + read_lock(&pools->rb_lock); + while (node) { + pool = rb_entry(node, struct ib_frmr_pool, node); + cmp = compare_keys(&pool->key, key); + if (cmp < 0) { + node = node->rb_right; + } else if (cmp > 0) { + node = node->rb_left; + } else { + read_unlock(&pools->rb_lock); + return pool; + } + } + + read_unlock(&pools->rb_lock); + + return NULL; +} + +static struct ib_frmr_pool *create_frmr_pool(struct ib_device *device, + struct ib_frmr_key *key) +{ + struct rb_node **new = &device->frmr_pools->rb_root.rb_node, + *parent = NULL; + struct ib_frmr_pools *pools = device->frmr_pools; + struct ib_frmr_pool *pool; + int cmp; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + memcpy(&pool->key, key, sizeof(*key)); + INIT_LIST_HEAD(&pool->queue.pages_list); + INIT_LIST_HEAD(&pool->inactive_queue.pages_list); + spin_lock_init(&pool->lock); + INIT_DELAYED_WORK(&pool->aging_work, pool_aging_work); + pool->device = device; + + write_lock(&pools->rb_lock); + while (*new) { + parent = *new; + cmp = compare_keys( + &rb_entry(parent, struct ib_frmr_pool, node)->key, key); + if (cmp < 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + /* If a different thread has already created the pool, return + * it. The insert operation is done under the write lock so we + * are sure that the pool is not inserted twice. + */ + if (cmp == 0) { + write_unlock(&pools->rb_lock); + kfree(pool); + return rb_entry(parent, struct ib_frmr_pool, node); + } + } + + rb_link_node(&pool->node, parent, new); + rb_insert_color(&pool->node, &pools->rb_root); + + write_unlock(&pools->rb_lock); + + return pool; +} + +int ib_frmr_pools_set_pinned(struct ib_device *device, struct ib_frmr_key *key, + u32 pinned_handles) +{ + struct ib_frmr_pools *pools = device->frmr_pools; + struct ib_frmr_key driver_key = {}; + struct ib_frmr_pool *pool; + u32 needed_handles; + u32 current_total; + int i, ret = 0; + u32 *handles; + + if (!pools) + return -EINVAL; + + ret = ib_check_mr_access(device, key->access_flags); + if (ret) + return ret; + + if (pools->pool_ops->build_key) { + ret = pools->pool_ops->build_key(device, key, &driver_key); + if (ret) + return ret; + } else { + memcpy(&driver_key, key, sizeof(*key)); + } + + pool = ib_frmr_pool_find(pools, &driver_key); + if (!pool) { + pool = create_frmr_pool(device, &driver_key); + if (IS_ERR(pool)) + return PTR_ERR(pool); + } + + spin_lock(&pool->lock); + current_total = pool->in_use + pool->queue.ci + pool->inactive_queue.ci; + + if (current_total < pinned_handles) + needed_handles = pinned_handles - current_total; + else + needed_handles = 0; + + pool->pinned_handles = pinned_handles; + spin_unlock(&pool->lock); + + if (!needed_handles) + goto schedule_aging; + + handles = kcalloc(needed_handles, sizeof(*handles), GFP_KERNEL); + if (!handles) + return -ENOMEM; + + ret = pools->pool_ops->create_frmrs(device, key, handles, + needed_handles); + if (ret) { + kfree(handles); + return ret; + } + + spin_lock(&pool->lock); + for (i = 0; i < needed_handles; i++) { + ret = push_handle_to_queue_locked(&pool->queue, + handles[i]); + if (ret) + goto end; + } + +end: + spin_unlock(&pool->lock); + kfree(handles); + +schedule_aging: + /* Ensure aging is scheduled to adjust to new pinned handles count */ + mod_delayed_work(pools->aging_wq, &pool->aging_work, 0); + + return ret; +} + +static int get_frmr_from_pool(struct ib_device *device, + struct ib_frmr_pool *pool, struct ib_mr *mr) +{ + struct ib_frmr_pools *pools = device->frmr_pools; + u32 handle; + int err; + + spin_lock(&pool->lock); + if (pool->queue.ci == 0) { + if (pool->inactive_queue.ci > 0) { + handle = pop_handle_from_queue_locked( + &pool->inactive_queue); + } else { + spin_unlock(&pool->lock); + err = pools->pool_ops->create_frmrs(device, &pool->key, + &handle, 1); + if (err) + return err; + spin_lock(&pool->lock); + } + } else { + handle = pop_handle_from_queue_locked(&pool->queue); + } + + pool->in_use++; + if (pool->in_use > pool->max_in_use) + pool->max_in_use = pool->in_use; + + spin_unlock(&pool->lock); + + mr->frmr.pool = pool; + mr->frmr.handle = handle; + + return 0; +} + +/* + * Pop an FRMR handle from the pool. + * + * @device: The device to pop the FRMR handle from. + * @mr: The MR to pop the FRMR handle from. + * + * Returns 0 on success, negative error code on failure. + */ +int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr) +{ + struct ib_frmr_pools *pools = device->frmr_pools; + struct ib_frmr_pool *pool; + + WARN_ON_ONCE(!device->frmr_pools); + pool = ib_frmr_pool_find(pools, &mr->frmr.key); + if (!pool) { + pool = create_frmr_pool(device, &mr->frmr.key); + if (IS_ERR(pool)) + return PTR_ERR(pool); + } + + return get_frmr_from_pool(device, pool, mr); +} +EXPORT_SYMBOL(ib_frmr_pool_pop); + +/* + * Push an FRMR handle back to the pool. + * + * @device: The device to push the FRMR handle to. + * @mr: The MR containing the FRMR handle to push back to the pool. + * + * Returns 0 on success, negative error code on failure. + */ +int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr) +{ + struct ib_frmr_pool *pool = mr->frmr.pool; + struct ib_frmr_pools *pools = device->frmr_pools; + bool schedule_aging = false; + int ret; + + spin_lock(&pool->lock); + /* Schedule aging every time an empty pool becomes non-empty */ + if (pool->queue.ci == 0) + schedule_aging = true; + ret = push_handle_to_queue_locked(&pool->queue, mr->frmr.handle); + if (ret == 0) + pool->in_use--; + + spin_unlock(&pool->lock); + + if (ret == 0 && schedule_aging) + queue_delayed_work(pools->aging_wq, &pool->aging_work, + secs_to_jiffies(READ_ONCE(pools->aging_period_sec))); + + return ret; +} +EXPORT_SYMBOL(ib_frmr_pool_push); diff --git a/drivers/infiniband/core/frmr_pools.h b/drivers/infiniband/core/frmr_pools.h new file mode 100644 index 0000000000000..81149ff15e003 --- /dev/null +++ b/drivers/infiniband/core/frmr_pools.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#ifndef RDMA_CORE_FRMR_POOLS_H +#define RDMA_CORE_FRMR_POOLS_H + +#include +#include +#include +#include +#include +#include + +#define NUM_HANDLES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32)) + +struct frmr_handles_page { + struct list_head list; + u32 handles[NUM_HANDLES_PER_PAGE]; +}; + +/* FRMR queue holds a list of frmr_handles_page. + * num_pages: number of pages in the queue. + * ci: current index in the handles array across all pages. + */ +struct frmr_queue { + struct list_head pages_list; + u32 num_pages; + unsigned long ci; +}; + +struct ib_frmr_pool { + struct rb_node node; + struct ib_frmr_key key; /* Pool key */ + + /* Protect access to the queue */ + spinlock_t lock; + struct frmr_queue queue; + struct frmr_queue inactive_queue; + + struct delayed_work aging_work; + struct ib_device *device; + + u32 max_in_use; + u32 in_use; + u32 pinned_handles; +}; + +struct ib_frmr_pools { + struct rb_root rb_root; + rwlock_t rb_lock; + const struct ib_frmr_pool_ops *pool_ops; + + struct workqueue_struct *aging_wq; + u32 aging_period_sec; +}; + +int ib_frmr_pools_set_pinned(struct ib_device *device, struct ib_frmr_key *key, + u32 pinned_handles); +int ib_frmr_pools_set_aging_period(struct ib_device *device, u32 period_sec); +#endif /* RDMA_CORE_FRMR_POOLS_H */ diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 2220a2dfab240..80b0079f63ae8 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -37,11 +37,13 @@ #include #include #include +#include #include "core_priv.h" #include "cma_priv.h" #include "restrack.h" #include "uverbs.h" +#include "frmr_pools.h" /* * This determines whether a non-privileged user is allowed to specify a @@ -172,6 +174,19 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_FRMR_POOLS] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_FRMR_POOL_KEY] = { .type = NLA_NESTED }, + [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY] = { .type = NLA_U64 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -2637,6 +2652,270 @@ static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh, return ib_del_sub_device_and_put(device); } +static int fill_frmr_pool_key(struct sk_buff *msg, struct ib_frmr_key *key) +{ + struct nlattr *key_attr; + + key_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY); + if (!key_attr) + return -EMSGSIZE; + + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, key->ats)) + goto err; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS, + key->access_flags)) + goto err; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY, + key->vendor_key, RDMA_NLDEV_ATTR_PAD)) + goto err; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS, + key->num_dma_blocks, RDMA_NLDEV_ATTR_PAD)) + goto err; + + if (key->kernel_vendor_key && + nla_put_u64_64bit(msg, + RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY, + key->kernel_vendor_key, RDMA_NLDEV_ATTR_PAD)) + goto err; + + nla_nest_end(msg, key_attr); + return 0; + +err: + return -EMSGSIZE; +} + +static int fill_frmr_pool_entry(struct sk_buff *msg, struct ib_frmr_pool *pool) +{ + if (fill_frmr_pool_key(msg, &pool->key)) + return -EMSGSIZE; + + spin_lock(&pool->lock); + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES, + pool->queue.ci + pool->inactive_queue.ci)) + goto err_unlock; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE, + pool->max_in_use, RDMA_NLDEV_ATTR_PAD)) + goto err_unlock; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE, + pool->in_use, RDMA_NLDEV_ATTR_PAD)) + goto err_unlock; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES, + pool->pinned_handles)) + goto err_unlock; + spin_unlock(&pool->lock); + + return 0; + +err_unlock: + spin_unlock(&pool->lock); + return -EMSGSIZE; +} + +static int nldev_frmr_pools_parse_key(struct nlattr *tb[], + struct ib_frmr_key *key, + struct netlink_ext_ack *extack) +{ + if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]) + key->ats = nla_get_u8(tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS]); + + if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]) + key->access_flags = nla_get_u32( + tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS]); + + if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]) + key->vendor_key = nla_get_u64( + tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY]); + + if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]) + key->num_dma_blocks = nla_get_u64( + tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS]); + + if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY]) + return -EINVAL; + + return 0; +} + +static int nldev_frmr_pools_set_pinned(struct ib_device *device, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct nlattr *key_tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_frmr_key key = { 0 }; + u32 pinned_handles = 0; + int err = 0; + + pinned_handles = + nla_get_u32(tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]); + + if (!tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY]) + return -EINVAL; + + err = nla_parse_nested(key_tb, RDMA_NLDEV_ATTR_MAX - 1, + tb[RDMA_NLDEV_ATTR_FRMR_POOL_KEY], nldev_policy, + extack); + if (err) + return err; + + err = nldev_frmr_pools_parse_key(key_tb, &key, extack); + if (err) + return err; + + err = ib_frmr_pools_set_pinned(device, &key, pinned_handles); + + return err; +} + +static int nldev_frmr_pools_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_frmr_pools *pools; + int err, ret = 0, idx = 0; + struct ib_frmr_pool *pool; + struct nlattr *table_attr; + struct nlattr *entry_attr; + bool show_details = false; + struct ib_device *device; + int start = cb->args[0]; + struct rb_node *node; + struct nlmsghdr *nlh; + bool filled = false; + + err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NL_VALIDATE_LIBERAL, NULL); + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) + return -EINVAL; + + device = ib_device_get_by_index( + sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); + if (!device) + return -EINVAL; + + if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) + show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); + + pools = device->frmr_pools; + if (!pools) { + ib_device_put(device); + return 0; + } + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_FRMR_POOLS_GET), + 0, NLM_F_MULTI); + + if (!nlh || fill_nldev_handle(skb, device)) { + ret = -EMSGSIZE; + goto err; + } + + table_attr = nla_nest_start_noflag(skb, RDMA_NLDEV_ATTR_FRMR_POOLS); + if (!table_attr) { + ret = -EMSGSIZE; + goto err; + } + + read_lock(&pools->rb_lock); + for (node = rb_first(&pools->rb_root); node; node = rb_next(node)) { + pool = rb_entry(node, struct ib_frmr_pool, node); + if (pool->key.kernel_vendor_key && !show_details) + continue; + + if (idx < start) { + idx++; + continue; + } + + filled = true; + + entry_attr = nla_nest_start_noflag( + skb, RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY); + if (!entry_attr) { + ret = -EMSGSIZE; + goto end_msg; + } + + if (fill_frmr_pool_entry(skb, pool)) { + nla_nest_cancel(skb, entry_attr); + ret = -EMSGSIZE; + goto end_msg; + } + + nla_nest_end(skb, entry_attr); + idx++; + } +end_msg: + read_unlock(&pools->rb_lock); + + nla_nest_end(skb, table_attr); + nlmsg_end(skb, nlh); + cb->args[0] = idx; + + /* + * No more entries to fill, cancel the message and + * return 0 to mark end of dumpit. + */ + if (!filled) + goto err; + + ib_device_put(device); + return skb->len; + +err: + nlmsg_cancel(skb, nlh); + ib_device_put(device); + return ret; +} + +static int nldev_frmr_pools_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct ib_device *device; + struct nlattr **tb; + u32 aging_period; + int err; + + tb = kcalloc(RDMA_NLDEV_ATTR_MAX, sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + extack); + if (err) + goto free_tb; + + if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { + err = -EINVAL; + goto free_tb; + } + + device = ib_device_get_by_index( + sock_net(skb->sk), nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX])); + if (!device) { + err = -EINVAL; + goto free_tb; + } + + if (tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]) { + aging_period = nla_get_u32( + tb[RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD]); + err = ib_frmr_pools_set_aging_period(device, aging_period); + goto done; + } + + if (tb[RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES]) + err = nldev_frmr_pools_set_pinned(device, tb, extack); + +done: + ib_device_put(device); +free_tb: + kfree(tb); + return err; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -2743,6 +3022,13 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .doit = nldev_deldev, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_FRMR_POOLS_GET] = { + .dump = nldev_frmr_pools_get_dumpit, + }, + [RDMA_NLDEV_CMD_FRMR_POOLS_SET] = { + .doit = nldev_frmr_pools_set_doit, + .flags = RDMA_NL_ADMIN_PERM, + }, }; static int fill_mon_netdev_rename(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 40284bbb45d6d..7b0d6e8ce8d7d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4164,6 +4164,7 @@ static const struct uapi_definition mlx5_ib_defs[] = { static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); mlx5_ib_data_direct_cleanup(dev); mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); @@ -4229,6 +4230,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (err) goto err_mp; + mlx5_cmd_init_async_ctx(mdev, &dev->async_ctx); + return 0; err_mp: mlx5_ib_cleanup_multiport_master(dev); @@ -4606,7 +4609,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - mlx5_mkey_cache_cleanup(dev); + mlx5r_frmr_pools_cleanup(&dev->ib_dev); mlx5r_umr_resource_cleanup(dev); mlx5r_umr_cleanup(dev); } @@ -4624,9 +4627,10 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) if (ret) return ret; - ret = mlx5_mkey_cache_init(dev); + ret = mlx5r_frmr_pools_init(&dev->ib_dev); if (ret) - mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + mlx5_ib_warn(dev, "frmr pools init failed %d\n", ret); + return ret; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 09d82d5f95e35..da7bd4d6df3c2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -641,25 +641,12 @@ enum mlx5_mkey_type { /* Used for non-existent ph value */ #define MLX5_IB_NO_PH 0xff -struct mlx5r_cache_rb_key { - u8 ats:1; - u8 ph; - u16 st_index; - unsigned int access_mode; - unsigned int access_flags; - unsigned int ndescs; -}; - struct mlx5_ib_mkey { u32 key; enum mlx5_mkey_type type; unsigned int ndescs; struct wait_queue_head wait; refcount_t usecount; - /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */ - struct mlx5r_cache_rb_key rb_key; - struct mlx5_cache_ent *cache_ent; - u8 cacheable : 1; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) @@ -784,68 +771,6 @@ struct umr_common { struct mutex init_lock; }; -#define NUM_MKEYS_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32)) - -struct mlx5_mkeys_page { - u32 mkeys[NUM_MKEYS_PER_PAGE]; - struct list_head list; -}; -static_assert(sizeof(struct mlx5_mkeys_page) == PAGE_SIZE); - -struct mlx5_mkeys_queue { - struct list_head pages_list; - u32 num_pages; - unsigned long ci; - spinlock_t lock; /* sync list ops */ -}; - -struct mlx5_cache_ent { - struct mlx5_mkeys_queue mkeys_queue; - u32 pending; - - char name[4]; - - struct rb_node node; - struct mlx5r_cache_rb_key rb_key; - - u8 is_tmp:1; - u8 disabled:1; - u8 fill_to_high_water:1; - u8 tmp_cleanup_scheduled:1; - - /* - * - limit is the low water mark for stored mkeys, 2* limit is the - * upper water mark. - */ - u32 in_use; - u32 limit; - - /* Statistics */ - u32 miss; - - struct mlx5_ib_dev *dev; - struct delayed_work dwork; -}; - -struct mlx5r_async_create_mkey { - union { - u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; - u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; - }; - struct mlx5_async_work cb_work; - struct mlx5_cache_ent *ent; - u32 mkey; -}; - -struct mlx5_mkey_cache { - struct workqueue_struct *wq; - struct rb_root rb_root; - struct mutex rb_lock; - struct dentry *fs_root; - unsigned long last_add; -}; - struct mlx5_ib_port_resources { struct mlx5_ib_gsi_qp *gsi; struct work_struct pkey_change_work; @@ -1180,8 +1105,6 @@ struct mlx5_ib_dev { struct mlx5_ib_resources devr; atomic_t mkey_var; - struct mlx5_mkey_cache cache; - struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; struct ib_odp_caps odp_caps; @@ -1438,13 +1361,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u32 port, void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, u64 access_flags); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); -int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); -void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); -struct mlx5_cache_ent * -mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, - struct mlx5r_cache_rb_key rb_key, - bool persistent_entry); - +int mlx5r_frmr_pools_init(struct ib_device *device); +void mlx5r_frmr_pools_cleanup(struct ib_device *device); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int access_flags, int access_mode, int ndescs); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 325fa04cbe8ae..eec1a750ee9fa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -31,7 +31,6 @@ * SOFTWARE. */ - #include #include #include @@ -39,6 +38,7 @@ #include #include #include +#include #include #include "dm.h" #include "mlx5_ib.h" @@ -46,15 +46,15 @@ #include "data_direct.h" #include "dmah.h" -enum { - MAX_PENDING_REG_MR = 8, -}; - -#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4 #define MLX5_UMR_ALIGN 2048 -static void -create_mkey_callback(int status, struct mlx5_async_work *context); +static int mkey_max_umr_order(struct mlx5_ib_dev *dev) +{ + if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) + return MLX5_MAX_UMR_EXTENDED_SHIFT; + return MLX5_MAX_UMR_SHIFT; +} + static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, unsigned long page_size, bool populate, @@ -111,23 +111,6 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, return ret; } -static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create) -{ - struct mlx5_ib_dev *dev = async_create->ent->dev; - size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out); - - MLX5_SET(create_mkey_in, async_create->in, opcode, - MLX5_CMD_OP_CREATE_MKEY); - assign_mkey_variant(dev, &async_create->mkey, async_create->in); - return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen, - async_create->out, outlen, create_mkey_callback, - &async_create->cb_work); -} - -static int mkey_cache_max_order(struct mlx5_ib_dev *dev); -static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); - static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); @@ -135,94 +118,6 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); } -static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) -{ - if (status == -ENXIO) /* core driver is not available */ - return; - - mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); - if (status != -EREMOTEIO) /* driver specific failure */ - return; - - /* Failed in FW, print cmd out failure details */ - mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); -} - -static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey) -{ - unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE; - struct mlx5_mkeys_page *page; - - lockdep_assert_held(&ent->mkeys_queue.lock); - if (ent->mkeys_queue.ci >= - ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) { - page = kzalloc(sizeof(*page), GFP_ATOMIC); - if (!page) - return -ENOMEM; - ent->mkeys_queue.num_pages++; - list_add_tail(&page->list, &ent->mkeys_queue.pages_list); - } else { - page = list_last_entry(&ent->mkeys_queue.pages_list, - struct mlx5_mkeys_page, list); - } - - page->mkeys[tmp] = mkey; - ent->mkeys_queue.ci++; - return 0; -} - -static int pop_mkey_locked(struct mlx5_cache_ent *ent) -{ - unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE; - struct mlx5_mkeys_page *last_page; - u32 mkey; - - lockdep_assert_held(&ent->mkeys_queue.lock); - last_page = list_last_entry(&ent->mkeys_queue.pages_list, - struct mlx5_mkeys_page, list); - mkey = last_page->mkeys[tmp]; - last_page->mkeys[tmp] = 0; - ent->mkeys_queue.ci--; - if (ent->mkeys_queue.num_pages > 1 && !tmp) { - list_del(&last_page->list); - ent->mkeys_queue.num_pages--; - kfree(last_page); - } - return mkey; -} - -static void create_mkey_callback(int status, struct mlx5_async_work *context) -{ - struct mlx5r_async_create_mkey *mkey_out = - container_of(context, struct mlx5r_async_create_mkey, cb_work); - struct mlx5_cache_ent *ent = mkey_out->ent; - struct mlx5_ib_dev *dev = ent->dev; - unsigned long flags; - - if (status) { - create_mkey_warn(dev, status, mkey_out->out); - kfree(mkey_out); - spin_lock_irqsave(&ent->mkeys_queue.lock, flags); - ent->pending--; - WRITE_ONCE(dev->fill_delay, 1); - spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); - mod_timer(&dev->delay_timer, jiffies + HZ); - return; - } - - mkey_out->mkey |= mlx5_idx_to_mkey( - MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); - WRITE_ONCE(dev->cache.last_add, jiffies); - - spin_lock_irqsave(&ent->mkeys_queue.lock, flags); - push_mkey_locked(ent, mkey_out->mkey); - ent->pending--; - /* If we are doing fill_to_high_water then keep going. */ - queue_adjust_cache_locked(ent); - spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags); - kfree(mkey_out); -} - static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) { int ret = 0; @@ -242,538 +137,6 @@ static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) return ret; } -static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) -{ - set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, - ent->dev->umrc.pd); - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, - (ent->rb_key.access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats); - - MLX5_SET(mkc, mkc, translations_octword_size, - get_mkc_octo_size(ent->rb_key.access_mode, - ent->rb_key.ndescs)); - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - - if (ent->rb_key.ph != MLX5_IB_NO_PH) { - MLX5_SET(mkc, mkc, pcie_tph_en, 1); - MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph); - if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) - MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, - ent->rb_key.st_index); - } -} - -/* Asynchronously schedule new MRs to be populated in the cache. */ -static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) -{ - struct mlx5r_async_create_mkey *async_create; - void *mkc; - int err = 0; - int i; - - for (i = 0; i < num; i++) { - async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey), - GFP_KERNEL); - if (!async_create) - return -ENOMEM; - mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in, - memory_key_mkey_entry); - set_cache_mkc(ent, mkc); - async_create->ent = ent; - - spin_lock_irq(&ent->mkeys_queue.lock); - if (ent->pending >= MAX_PENDING_REG_MR) { - err = -EAGAIN; - goto free_async_create; - } - ent->pending++; - spin_unlock_irq(&ent->mkeys_queue.lock); - - err = mlx5_ib_create_mkey_cb(async_create); - if (err) { - mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); - goto err_create_mkey; - } - } - - return 0; - -err_create_mkey: - spin_lock_irq(&ent->mkeys_queue.lock); - ent->pending--; -free_async_create: - spin_unlock_irq(&ent->mkeys_queue.lock); - kfree(async_create); - return err; -} - -/* Synchronously create a MR in the cache */ -static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) -{ - size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - void *mkc; - u32 *in; - int err; - - in = kzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - set_cache_mkc(ent, mkc); - - err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); - if (err) - goto free_in; - - WRITE_ONCE(ent->dev->cache.last_add, jiffies); -free_in: - kfree(in); - return err; -} - -static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) -{ - u32 mkey; - - lockdep_assert_held(&ent->mkeys_queue.lock); - if (!ent->mkeys_queue.ci) - return; - mkey = pop_mkey_locked(ent); - spin_unlock_irq(&ent->mkeys_queue.lock); - mlx5_core_destroy_mkey(ent->dev->mdev, mkey); - spin_lock_irq(&ent->mkeys_queue.lock); -} - -static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, - bool limit_fill) - __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock) -{ - int err; - - lockdep_assert_held(&ent->mkeys_queue.lock); - - while (true) { - if (limit_fill) - target = ent->limit * 2; - if (target == ent->pending + ent->mkeys_queue.ci) - return 0; - if (target > ent->pending + ent->mkeys_queue.ci) { - u32 todo = target - (ent->pending + ent->mkeys_queue.ci); - - spin_unlock_irq(&ent->mkeys_queue.lock); - err = add_keys(ent, todo); - if (err == -EAGAIN) - usleep_range(3000, 5000); - spin_lock_irq(&ent->mkeys_queue.lock); - if (err) { - if (err != -EAGAIN) - return err; - } else - return 0; - } else { - remove_cache_mr_locked(ent); - } - } -} - -static ssize_t size_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) -{ - struct mlx5_cache_ent *ent = filp->private_data; - u32 target; - int err; - - err = kstrtou32_from_user(buf, count, 0, &target); - if (err) - return err; - - /* - * Target is the new value of total_mrs the user requests, however we - * cannot free MRs that are in use. Compute the target value for stored - * mkeys. - */ - spin_lock_irq(&ent->mkeys_queue.lock); - if (target < ent->in_use) { - err = -EINVAL; - goto err_unlock; - } - target = target - ent->in_use; - if (target < ent->limit || target > ent->limit*2) { - err = -EINVAL; - goto err_unlock; - } - err = resize_available_mrs(ent, target, false); - if (err) - goto err_unlock; - spin_unlock_irq(&ent->mkeys_queue.lock); - - return count; - -err_unlock: - spin_unlock_irq(&ent->mkeys_queue.lock); - return err; -} - -static ssize_t size_read(struct file *filp, char __user *buf, size_t count, - loff_t *pos) -{ - struct mlx5_cache_ent *ent = filp->private_data; - char lbuf[20]; - int err; - - err = snprintf(lbuf, sizeof(lbuf), "%ld\n", - ent->mkeys_queue.ci + ent->in_use); - if (err < 0) - return err; - - return simple_read_from_buffer(buf, count, pos, lbuf, err); -} - -static const struct file_operations size_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = size_write, - .read = size_read, -}; - -static ssize_t limit_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) -{ - struct mlx5_cache_ent *ent = filp->private_data; - u32 var; - int err; - - err = kstrtou32_from_user(buf, count, 0, &var); - if (err) - return err; - - /* - * Upon set we immediately fill the cache to high water mark implied by - * the limit. - */ - spin_lock_irq(&ent->mkeys_queue.lock); - ent->limit = var; - err = resize_available_mrs(ent, 0, true); - spin_unlock_irq(&ent->mkeys_queue.lock); - if (err) - return err; - return count; -} - -static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, - loff_t *pos) -{ - struct mlx5_cache_ent *ent = filp->private_data; - char lbuf[20]; - int err; - - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); - if (err < 0) - return err; - - return simple_read_from_buffer(buf, count, pos, lbuf, err); -} - -static const struct file_operations limit_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = limit_write, - .read = limit_read, -}; - -static bool someone_adding(struct mlx5_mkey_cache *cache) -{ - struct mlx5_cache_ent *ent; - struct rb_node *node; - bool ret; - - mutex_lock(&cache->rb_lock); - for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - spin_lock_irq(&ent->mkeys_queue.lock); - ret = ent->mkeys_queue.ci < ent->limit; - spin_unlock_irq(&ent->mkeys_queue.lock); - if (ret) { - mutex_unlock(&cache->rb_lock); - return true; - } - } - mutex_unlock(&cache->rb_lock); - return false; -} - -/* - * Check if the bucket is outside the high/low water mark and schedule an async - * update. The cache refill has hysteresis, once the low water mark is hit it is - * refilled up to the high mark. - */ -static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) -{ - lockdep_assert_held(&ent->mkeys_queue.lock); - - if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp) - return; - if (ent->mkeys_queue.ci < ent->limit) { - ent->fill_to_high_water = true; - mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); - } else if (ent->fill_to_high_water && - ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) { - /* - * Once we start populating due to hitting a low water mark - * continue until we pass the high water mark. - */ - mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); - } else if (ent->mkeys_queue.ci == 2 * ent->limit) { - ent->fill_to_high_water = false; - } else if (ent->mkeys_queue.ci > 2 * ent->limit) { - /* Queue deletion of excess entries */ - ent->fill_to_high_water = false; - if (ent->pending) - queue_delayed_work(ent->dev->cache.wq, &ent->dwork, - secs_to_jiffies(1)); - else - mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); - } -} - -static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent) -{ - u32 mkey; - - spin_lock_irq(&ent->mkeys_queue.lock); - while (ent->mkeys_queue.ci) { - mkey = pop_mkey_locked(ent); - spin_unlock_irq(&ent->mkeys_queue.lock); - mlx5_core_destroy_mkey(dev->mdev, mkey); - spin_lock_irq(&ent->mkeys_queue.lock); - } - ent->tmp_cleanup_scheduled = false; - spin_unlock_irq(&ent->mkeys_queue.lock); -} - -static void __cache_work_func(struct mlx5_cache_ent *ent) -{ - struct mlx5_ib_dev *dev = ent->dev; - struct mlx5_mkey_cache *cache = &dev->cache; - int err; - - spin_lock_irq(&ent->mkeys_queue.lock); - if (ent->disabled) - goto out; - - if (ent->fill_to_high_water && - ent->mkeys_queue.ci + ent->pending < 2 * ent->limit && - !READ_ONCE(dev->fill_delay)) { - spin_unlock_irq(&ent->mkeys_queue.lock); - err = add_keys(ent, 1); - spin_lock_irq(&ent->mkeys_queue.lock); - if (ent->disabled) - goto out; - if (err) { - /* - * EAGAIN only happens if there are pending MRs, so we - * will be rescheduled when storing them. The only - * failure path here is ENOMEM. - */ - if (err != -EAGAIN) { - mlx5_ib_warn( - dev, - "add keys command failed, err %d\n", - err); - queue_delayed_work(cache->wq, &ent->dwork, - secs_to_jiffies(1)); - } - } - } else if (ent->mkeys_queue.ci > 2 * ent->limit) { - bool need_delay; - - /* - * The remove_cache_mr() logic is performed as garbage - * collection task. Such task is intended to be run when no - * other active processes are running. - * - * The need_resched() will return TRUE if there are user tasks - * to be activated in near future. - * - * In such case, we don't execute remove_cache_mr() and postpone - * the garbage collection work to try to run in next cycle, in - * order to free CPU resources to other tasks. - */ - spin_unlock_irq(&ent->mkeys_queue.lock); - need_delay = need_resched() || someone_adding(cache) || - !time_after(jiffies, - READ_ONCE(cache->last_add) + 300 * HZ); - spin_lock_irq(&ent->mkeys_queue.lock); - if (ent->disabled) - goto out; - if (need_delay) { - queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); - goto out; - } - remove_cache_mr_locked(ent); - queue_adjust_cache_locked(ent); - } -out: - spin_unlock_irq(&ent->mkeys_queue.lock); -} - -static void delayed_cache_work_func(struct work_struct *work) -{ - struct mlx5_cache_ent *ent; - - ent = container_of(work, struct mlx5_cache_ent, dwork.work); - /* temp entries are never filled, only cleaned */ - if (ent->is_tmp) - clean_keys(ent->dev, ent); - else - __cache_work_func(ent); -} - -static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1, - struct mlx5r_cache_rb_key key2) -{ - int res; - - res = key1.ats - key2.ats; - if (res) - return res; - - res = key1.access_mode - key2.access_mode; - if (res) - return res; - - res = key1.access_flags - key2.access_flags; - if (res) - return res; - - res = key1.st_index - key2.st_index; - if (res) - return res; - - res = key1.ph - key2.ph; - if (res) - return res; - - /* - * keep ndescs the last in the compare table since the find function - * searches for an exact match on all properties and only closest - * match in size. - */ - return key1.ndescs - key2.ndescs; -} - -static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache, - struct mlx5_cache_ent *ent) -{ - struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL; - struct mlx5_cache_ent *cur; - int cmp; - - /* Figure out where to put new node */ - while (*new) { - cur = rb_entry(*new, struct mlx5_cache_ent, node); - parent = *new; - cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key); - if (cmp > 0) - new = &((*new)->rb_left); - if (cmp < 0) - new = &((*new)->rb_right); - if (cmp == 0) - return -EEXIST; - } - - /* Add new node and rebalance tree. */ - rb_link_node(&ent->node, parent, new); - rb_insert_color(&ent->node, &cache->rb_root); - - return 0; -} - -static struct mlx5_cache_ent * -mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev, - struct mlx5r_cache_rb_key rb_key) -{ - struct rb_node *node = dev->cache.rb_root.rb_node; - struct mlx5_cache_ent *cur, *smallest = NULL; - u64 ndescs_limit; - int cmp; - - /* - * Find the smallest ent with order >= requested_order. - */ - while (node) { - cur = rb_entry(node, struct mlx5_cache_ent, node); - cmp = cache_ent_key_cmp(cur->rb_key, rb_key); - if (cmp > 0) { - smallest = cur; - node = node->rb_left; - } - if (cmp < 0) - node = node->rb_right; - if (cmp == 0) - return cur; - } - - /* - * Limit the usage of mkeys larger than twice the required size while - * also allowing the usage of smallest cache entry for small MRs. - */ - ndescs_limit = max_t(u64, rb_key.ndescs * 2, - MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS); - - return (smallest && - smallest->rb_key.access_mode == rb_key.access_mode && - smallest->rb_key.access_flags == rb_key.access_flags && - smallest->rb_key.ats == rb_key.ats && - smallest->rb_key.st_index == rb_key.st_index && - smallest->rb_key.ph == rb_key.ph && - smallest->rb_key.ndescs <= ndescs_limit) ? - smallest : - NULL; -} - -static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent) -{ - struct mlx5_ib_mr *mr; - int err; - - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); - - spin_lock_irq(&ent->mkeys_queue.lock); - ent->in_use++; - - if (!ent->mkeys_queue.ci) { - queue_adjust_cache_locked(ent); - ent->miss++; - spin_unlock_irq(&ent->mkeys_queue.lock); - err = create_cache_mkey(ent, &mr->mmkey.key); - if (err) { - spin_lock_irq(&ent->mkeys_queue.lock); - ent->in_use--; - spin_unlock_irq(&ent->mkeys_queue.lock); - kfree(mr); - return ERR_PTR(err); - } - } else { - mr->mmkey.key = pop_mkey_locked(ent); - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->mkeys_queue.lock); - } - mr->mmkey.cache_ent = ent; - mr->mmkey.type = MLX5_MKEY_MR; - mr->mmkey.rb_key = ent->rb_key; - mr->mmkey.cacheable = true; - init_waitqueue_head(&mr->mmkey.wait); - return mr; -} - static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, int access_flags) { @@ -798,256 +161,201 @@ static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, return ret; } -struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - int access_flags, int access_mode, - int ndescs) -{ - struct mlx5r_cache_rb_key rb_key = { - .ndescs = ndescs, - .access_mode = access_mode, - .access_flags = get_unchangeable_access_flags(dev, access_flags), - .ph = MLX5_IB_NO_PH, - }; - struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key); +#define MLX5_FRMR_POOLS_KEY_ACCESS_MODE_KSM_MASK 1ULL +#define MLX5_FRMR_POOLS_KEY_VENDOR_KEY_SUPPORTED \ + MLX5_FRMR_POOLS_KEY_ACCESS_MODE_KSM_MASK - if (!ent) - return ERR_PTR(-EOPNOTSUPP); - - return _mlx5_mr_cache_alloc(dev, ent); -} - -static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!mlx5_debugfs_root || dev->is_rep) - return; +#define MLX5_FRMR_POOLS_KERNEL_KEY_PH_SHIFT 16 +#define MLX5_FRMR_POOLS_KERNEL_KEY_PH_MASK 0xFF0000 +#define MLX5_FRMR_POOLS_KERNEL_KEY_ST_INDEX_MASK 0xFFFF - debugfs_remove_recursive(dev->cache.fs_root); - dev->cache.fs_root = NULL; -} - -static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev, - struct mlx5_cache_ent *ent) +static struct mlx5_ib_mr * +_mlx5_frmr_pool_alloc(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int access_flags, int access_mode, + unsigned long page_size, u16 st_index, u8 ph) { - int order = order_base_2(ent->rb_key.ndescs); - struct dentry *dir; - - if (!mlx5_debugfs_root || dev->is_rep) - return; - - if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) - order = MLX5_IMR_KSM_CACHE_ENTRY + 2; - - sprintf(ent->name, "%d", order); - dir = debugfs_create_dir(ent->name, dev->cache.fs_root); - debugfs_create_file("size", 0600, dir, ent, &size_fops); - debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci); - debugfs_create_u32("miss", 0600, dir, &ent->miss); -} + struct mlx5_ib_mr *mr; + int err; -static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) -{ - struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev); - struct mlx5_mkey_cache *cache = &dev->cache; + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); - if (!mlx5_debugfs_root || dev->is_rep) - return; + mr->ibmr.frmr.key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); + mr->ibmr.frmr.key.access_flags = + get_unchangeable_access_flags(dev, access_flags); + mr->ibmr.frmr.key.num_dma_blocks = + ib_umem_num_dma_blocks(umem, page_size); + mr->ibmr.frmr.key.vendor_key = + access_mode == MLX5_MKC_ACCESS_MODE_KSM ? + MLX5_FRMR_POOLS_KEY_ACCESS_MODE_KSM_MASK : + 0; + + /* Normalize ph: swap 0 and MLX5_IB_NO_PH */ + if (ph == MLX5_IB_NO_PH || ph == 0) + ph ^= MLX5_IB_NO_PH; + + mr->ibmr.frmr.key.kernel_vendor_key = + st_index | (ph << MLX5_FRMR_POOLS_KERNEL_KEY_PH_SHIFT); + err = ib_frmr_pool_pop(&dev->ib_dev, &mr->ibmr); + if (err) { + kfree(mr); + return ERR_PTR(err); + } + mr->mmkey.key = mr->ibmr.frmr.handle; + init_waitqueue_head(&mr->mmkey.wait); - cache->fs_root = debugfs_create_dir("mr_cache", dbg_root); + return mr; } -static void delay_time_func(struct timer_list *t) +struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, + int access_flags, int access_mode, + int ndescs) { - struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer); - - WRITE_ONCE(dev->fill_delay, 0); -} + struct ib_frmr_key key = { + .access_flags = + get_unchangeable_access_flags(dev, access_flags), + .vendor_key = access_mode == MLX5_MKC_ACCESS_MODE_MTT ? + 0 : + MLX5_FRMR_POOLS_KEY_ACCESS_MODE_KSM_MASK, + .num_dma_blocks = ndescs, + .kernel_vendor_key = 0, /* no PH and no ST index */ + }; + struct mlx5_ib_mr *mr; + int ret; -static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent) -{ - struct mlx5_mkeys_page *page; + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); - page = kzalloc(sizeof(*page), GFP_KERNEL); - if (!page) - return -ENOMEM; - INIT_LIST_HEAD(&ent->mkeys_queue.pages_list); - spin_lock_init(&ent->mkeys_queue.lock); - list_add_tail(&page->list, &ent->mkeys_queue.pages_list); - ent->mkeys_queue.num_pages++; - return 0; -} + init_waitqueue_head(&mr->mmkey.wait); -static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent) -{ - struct mlx5_mkeys_page *page; + mr->ibmr.frmr.key = key; + ret = ib_frmr_pool_pop(&dev->ib_dev, &mr->ibmr); + if (ret) { + kfree(mr); + return ERR_PTR(ret); + } + mr->mmkey.key = mr->ibmr.frmr.handle; + mr->mmkey.type = MLX5_MKEY_MR; - WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1); - page = list_last_entry(&ent->mkeys_queue.pages_list, - struct mlx5_mkeys_page, list); - list_del(&page->list); - kfree(page); + return mr; } -struct mlx5_cache_ent * -mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev, - struct mlx5r_cache_rb_key rb_key, - bool persistent_entry) +static int mlx5r_create_mkeys(struct ib_device *device, struct ib_frmr_key *key, + u32 *handles, unsigned int count) { - struct mlx5_cache_ent *ent; - int order; - int ret; + int access_mode = + key->vendor_key & MLX5_FRMR_POOLS_KEY_ACCESS_MODE_KSM_MASK ? + MLX5_MKC_ACCESS_MODE_KSM : + MLX5_MKC_ACCESS_MODE_MTT; - ent = kzalloc(sizeof(*ent), GFP_KERNEL); - if (!ent) - return ERR_PTR(-ENOMEM); + struct mlx5_ib_dev *dev = to_mdev(device); + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + u16 st_index; + void *mkc; + u32 *in; + int err, i; + u8 ph; - ret = mlx5r_mkeys_init(ent); - if (ret) - goto mkeys_err; - ent->rb_key = rb_key; - ent->dev = dev; - ent->is_tmp = !persistent_entry; + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + set_mkc_access_pd_addr_fields(mkc, key->access_flags, 0, dev->umrc.pd); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, ma_translation_mode, !!key->ats); + MLX5_SET(mkc, mkc, translations_octword_size, + get_mkc_octo_size(access_mode, key->num_dma_blocks)); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - ret = mlx5_cache_ent_insert(&dev->cache, ent); - if (ret) - goto ent_insert_err; - - if (persistent_entry) { - if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM) - order = MLX5_IMR_KSM_CACHE_ENTRY; - else - order = order_base_2(rb_key.ndescs) - 2; - - if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5r_umr_can_load_pas(dev, 0)) - ent->limit = dev->mdev->profile.mr_cache[order].limit; - else - ent->limit = 0; - - mlx5_mkey_cache_debugfs_add_ent(dev, ent); + st_index = key->kernel_vendor_key & + MLX5_FRMR_POOLS_KERNEL_KEY_ST_INDEX_MASK; + ph = key->kernel_vendor_key & MLX5_FRMR_POOLS_KERNEL_KEY_PH_MASK; + if (ph) { + /* Normalize ph: swap MLX5_IB_NO_PH for 0 */ + if (ph == MLX5_IB_NO_PH) + ph = 0; + MLX5_SET(mkc, mkc, pcie_tph_en, 1); + MLX5_SET(mkc, mkc, pcie_tph_ph, ph); + if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) + MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, + st_index); } - return ent; -ent_insert_err: - mlx5r_mkeys_uninit(ent); -mkeys_err: - kfree(ent); - return ERR_PTR(ret); -} - -static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) -{ - struct rb_root *root = &dev->cache.rb_root; - struct mlx5_cache_ent *ent; - struct rb_node *node; - - mutex_lock(&dev->cache.rb_lock); - node = rb_first(root); - while (node) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - node = rb_next(node); - clean_keys(dev, ent); - rb_erase(&ent->node, root); - mlx5r_mkeys_uninit(ent); - kfree(ent); + for (i = 0; i < count; i++) { + assign_mkey_variant(dev, handles + i, in); + err = mlx5_core_create_mkey(dev->mdev, handles + i, in, inlen); + if (err) + goto free_in; } - mutex_unlock(&dev->cache.rb_lock); +free_in: + kfree(in); + if (err) + for (; i > 0; i--) + mlx5_core_destroy_mkey(dev->mdev, handles[i]); + return err; } -int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) +static void mlx5r_destroy_mkeys(struct ib_device *device, u32 *handles, + unsigned int count) { - struct mlx5_mkey_cache *cache = &dev->cache; - struct rb_root *root = &dev->cache.rb_root; - struct mlx5r_cache_rb_key rb_key = { - .access_mode = MLX5_MKC_ACCESS_MODE_MTT, - .ph = MLX5_IB_NO_PH, - }; - struct mlx5_cache_ent *ent; - struct rb_node *node; - int ret; - int i; + struct mlx5_ib_dev *dev = to_mdev(device); + int i, err; - mutex_init(&dev->slow_path_mutex); - mutex_init(&dev->cache.rb_lock); - dev->cache.rb_root = RB_ROOT; - cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); - if (!cache->wq) { - mlx5_ib_warn(dev, "failed to create work queue\n"); - return -ENOMEM; + for (i = 0; i < count; i++) { + err = mlx5_core_destroy_mkey(dev->mdev, handles[i]); + if (err) + pr_warn_ratelimited( + "mlx5_ib: failed to destroy mkey %d: %d", + handles[i], err); } +} - mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); - timer_setup(&dev->delay_timer, delay_time_func, 0); - mlx5_mkey_cache_debugfs_init(dev); - mutex_lock(&cache->rb_lock); - for (i = 0; i <= mkey_cache_max_order(dev); i++) { - rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i; - ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); - if (IS_ERR(ent)) { - ret = PTR_ERR(ent); - goto err; - } - } +static int mlx5r_build_frmr_key(struct ib_device *device, + const struct ib_frmr_key *in, + struct ib_frmr_key *out) +{ + struct mlx5_ib_dev *dev = to_mdev(device); - ret = mlx5_odp_init_mkey_cache(dev); - if (ret) - goto err; + /* check HW capabilities of users requested frmr key */ + if ((in->ats && !MLX5_CAP_GEN(dev->mdev, ats)) || + ilog2(in->num_dma_blocks) > mkey_max_umr_order(dev)) + return -EOPNOTSUPP; - mutex_unlock(&cache->rb_lock); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - spin_lock_irq(&ent->mkeys_queue.lock); - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->mkeys_queue.lock); - } + if (in->vendor_key & ~MLX5_FRMR_POOLS_KEY_VENDOR_KEY_SUPPORTED) + return -EOPNOTSUPP; - return 0; + out->ats = in->ats; + out->access_flags = + get_unchangeable_access_flags(dev, in->access_flags); + out->vendor_key = in->vendor_key; + out->num_dma_blocks = in->num_dma_blocks; -err: - mutex_unlock(&cache->rb_lock); - mlx5_mkey_cache_debugfs_cleanup(dev); - mlx5r_destroy_cache_entries(dev); - destroy_workqueue(cache->wq); - mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); - return ret; + return 0; } -void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) -{ - struct rb_root *root = &dev->cache.rb_root; - struct mlx5_cache_ent *ent; - struct rb_node *node; - - if (!dev->cache.wq) - return; - - mutex_lock(&dev->cache.rb_lock); - for (node = rb_first(root); node; node = rb_next(node)) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - spin_lock_irq(&ent->mkeys_queue.lock); - ent->disabled = true; - spin_unlock_irq(&ent->mkeys_queue.lock); - cancel_delayed_work(&ent->dwork); - } - mutex_unlock(&dev->cache.rb_lock); - - /* - * After all entries are disabled and will not reschedule on WQ, - * flush it and all async commands. - */ - flush_workqueue(dev->cache.wq); +static struct ib_frmr_pool_ops mlx5r_frmr_pool_ops = { + .create_frmrs = mlx5r_create_mkeys, + .destroy_frmrs = mlx5r_destroy_mkeys, + .build_key = mlx5r_build_frmr_key, +}; - mlx5_mkey_cache_debugfs_cleanup(dev); - mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); +int mlx5r_frmr_pools_init(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); - /* At this point all entries are disabled and have no concurrent work. */ - mlx5r_destroy_cache_entries(dev); + mutex_init(&dev->slow_path_mutex); + return ib_frmr_pools_init(device, &mlx5r_frmr_pool_ops); +} - destroy_workqueue(dev->cache.wq); - timer_delete_sync(&dev->delay_timer); +void mlx5r_frmr_pools_cleanup(struct ib_device *device) +{ + ib_frmr_pools_cleanup(device); } struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) @@ -1109,13 +417,6 @@ static int get_octo_len(u64 addr, u64 len, int page_shift) return (npages + 1) / 2; } -static int mkey_cache_max_order(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MKEY_CACHE_LAST_STD_ENTRY; - return MLX5_MAX_UMR_SHIFT; -} - static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u64 length, int access_flags, u64 iova) { @@ -1144,8 +445,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, u16 st_index, u8 ph) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5r_cache_rb_key rb_key = {}; - struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; unsigned long page_size; @@ -1157,33 +456,12 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); - rb_key.access_mode = access_mode; - rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size); - rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags); - rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags); - rb_key.st_index = st_index; - rb_key.ph = ph; - ent = mkey_cache_ent_from_rb_key(dev, rb_key); - /* - * If the MR can't come from the cache then synchronously create an uncached - * one. - */ - if (!ent) { - mutex_lock(&dev->slow_path_mutex); - mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode, - st_index, ph); - mutex_unlock(&dev->slow_path_mutex); - if (IS_ERR(mr)) - return mr; - mr->mmkey.rb_key = rb_key; - mr->mmkey.cacheable = true; - return mr; - } - - mr = _mlx5_mr_cache_alloc(dev, ent); + mr = _mlx5_frmr_pool_alloc(dev, umem, access_flags, access_mode, + page_size, st_index, ph); if (IS_ERR(mr)) return mr; + mr->mmkey.type = MLX5_MKEY_MR; mr->ibmr.pd = pd; mr->umem = umem; mr->page_shift = order_base_2(page_size); @@ -1812,18 +1090,24 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, unsigned long *page_size) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + u8 access_mode; - /* We only track the allocated sizes of MRs from the cache */ - if (!mr->mmkey.cache_ent) + /* We only track the allocated sizes of MRs from the frmr pools */ + if (!mr->ibmr.frmr.pool) return false; if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) return false; - *page_size = mlx5_umem_mkc_find_best_pgsz( - dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode); + access_mode = mr->ibmr.frmr.key.vendor_key & + MLX5_FRMR_POOLS_KEY_ACCESS_MODE_KSM_MASK ? + MLX5_MKC_ACCESS_MODE_KSM : + MLX5_MKC_ACCESS_MODE_MTT; + + *page_size = + mlx5_umem_mkc_find_best_pgsz(dev, new_umem, iova, access_mode); if (WARN_ON(!*page_size)) return false; - return (mr->mmkey.cache_ent->rb_key.ndescs) >= + return (mr->ibmr.frmr.key.num_dma_blocks) >= ib_umem_num_dma_blocks(new_umem, *page_size); } @@ -1884,7 +1168,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int err; if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct || - mr->mmkey.rb_key.ph != MLX5_IB_NO_PH) + (mr->ibmr.frmr.key.kernel_vendor_key & + MLX5_FRMR_POOLS_KERNEL_KEY_PH_MASK) != 0) return ERR_PTR(-EOPNOTSUPP); mlx5_ib_dbg( @@ -2025,47 +1310,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) } } -static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, - struct mlx5_ib_mr *mr) -{ - struct mlx5_mkey_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; - int ret; - - if (mr->mmkey.cache_ent) { - spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - goto end; - } - - mutex_lock(&cache->rb_lock); - ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); - if (ent) { - if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { - if (ent->disabled) { - mutex_unlock(&cache->rb_lock); - return -EOPNOTSUPP; - } - mr->mmkey.cache_ent = ent; - spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - mutex_unlock(&cache->rb_lock); - goto end; - } - } - - ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false); - mutex_unlock(&cache->rb_lock); - if (IS_ERR(ent)) - return PTR_ERR(ent); - - mr->mmkey.cache_ent = ent; - spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - -end: - ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key); - spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - return ret; -} - static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); @@ -2131,33 +1375,12 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) bool is_odp_dma_buf = is_dmabuf_mr(mr) && !to_ib_umem_dmabuf(mr->umem)->pinned; struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; bool is_odp = is_odp_mr(mr); - bool from_cache = !!ent; int ret; - if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && - !cache_ent_find_and_store(dev, mr)) { - ent = mr->mmkey.cache_ent; - /* upon storing to a clean temp entry - schedule its cleanup */ - spin_lock_irq(&ent->mkeys_queue.lock); - if (from_cache) - ent->in_use--; - if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { - mod_delayed_work(ent->dev->cache.wq, &ent->dwork, - secs_to_jiffies(30)); - ent->tmp_cleanup_scheduled = true; - } - spin_unlock_irq(&ent->mkeys_queue.lock); + if (mr->ibmr.frmr.pool && !mlx5_umr_revoke_mr_with_lock(mr) && + !ib_frmr_pool_push(mr->ibmr.device, &mr->ibmr)) return 0; - } - - if (ent) { - spin_lock_irq(&ent->mkeys_queue.lock); - ent->in_use--; - mr->mmkey.cache_ent = NULL; - spin_unlock_irq(&ent->mkeys_queue.lock); - } if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); @@ -2241,7 +1464,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) mlx5_ib_free_odp_mr(mr); } - if (!mr->mmkey.cache_ent) + if (!mr->ibmr.frmr.pool) mlx5_free_priv_descs(mr); kfree(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e71ee3d52eb04..c6c60b9ef1805 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1875,25 +1875,6 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) return err; } -int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev) -{ - struct mlx5r_cache_rb_key rb_key = { - .access_mode = MLX5_MKC_ACCESS_MODE_KSM, - .ndescs = mlx5_imr_ksm_entries, - .ph = MLX5_IB_NO_PH, - }; - struct mlx5_cache_ent *ent; - - if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) - return 0; - - ent = mlx5r_cache_create_ent_locked(dev, rb_key, true); - if (IS_ERR(ent)) - return PTR_ERR(ent); - - return 0; -} - static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, }; diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h index e9361f0140e7b..7eeaf6a94c974 100644 --- a/drivers/infiniband/hw/mlx5/umr.h +++ b/drivers/infiniband/hw/mlx5/umr.h @@ -9,6 +9,7 @@ #define MLX5_MAX_UMR_SHIFT 16 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) +#define MLX5_MAX_UMR_EXTENDED_SHIFT 43 #define MLX5_IB_UMR_OCTOWORD 16 #define MLX5_IB_UMR_XLT_ALIGNMENT 64 diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index e15e320db4763..cfaea6178a719 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig CAN_DEV - tristate "CAN Device Drivers" + bool "CAN Device Drivers" default y depends on CAN help @@ -17,10 +17,7 @@ menuconfig CAN_DEV virtual ones. If you own such devices or plan to use the virtual CAN interfaces to develop applications, say Y here. - To compile as a module, choose M here: the module will be called - can-dev. - -if CAN_DEV +if CAN_DEV && CAN config CAN_VCAN tristate "Virtual Local CAN Interface (vcan)" diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index d7bc10a6b8eae..37e2f1a2faecd 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_CAN_VCAN) += vcan.o obj-$(CONFIG_CAN_VXCAN) += vxcan.o obj-$(CONFIG_CAN_SLCAN) += slcan/ -obj-y += dev/ +obj-$(CONFIG_CAN_DEV) += dev/ obj-y += esd/ obj-y += rcar/ obj-y += rockchip/ diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile index 633687d6b6c0c..64226acf0f3d4 100644 --- a/drivers/net/can/dev/Makefile +++ b/drivers/net/can/dev/Makefile @@ -1,9 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CAN_DEV) += can-dev.o - -can-dev-y += skb.o +obj-$(CONFIG_CAN) += can-dev.o +can-dev-$(CONFIG_CAN_DEV) += skb.o can-dev-$(CONFIG_CAN_CALC_BITTIMING) += calc_bittiming.o can-dev-$(CONFIG_CAN_NETLINK) += bittiming.o can-dev-$(CONFIG_CAN_NETLINK) += dev.o diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index e29e85b67fd40..a0233e550a5ad 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1074,7 +1074,7 @@ static int gs_can_open(struct net_device *netdev) usb_free_urb(urb); out_usb_kill_anchored_urbs: if (!parent->active_channels) { - usb_kill_anchored_urbs(&dev->tx_submitted); + usb_kill_anchored_urbs(&parent->rx_submitted); if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) gs_usb_timestamp_stop(parent); diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a1a177713d99d..2c4131ed7e30b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2169,6 +2169,9 @@ static int b53_fdb_copy(int port, const struct b53_arl_entry *ent, if (!ent->is_valid) return 0; + if (is_multicast_ether_addr(ent->mac)) + return 0; + if (port != ent->port) return 0; diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.c b/drivers/net/dsa/lantiq/lantiq_gswip.c index 57dd063c07403..b094001a7c805 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip.c @@ -444,9 +444,6 @@ static void gswip_remove(struct platform_device *pdev) if (!priv) return; - /* disable the switch */ - gswip_disable_switch(priv); - dsa_unregister_switch(priv->ds); for (i = 0; i < priv->num_gphy_fw; i++) diff --git a/drivers/net/dsa/lantiq/lantiq_gswip.h b/drivers/net/dsa/lantiq/lantiq_gswip.h index 9c38e51a75e80..2e0f2afbadbbc 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip.h +++ b/drivers/net/dsa/lantiq/lantiq_gswip.h @@ -294,8 +294,6 @@ struct gswip_priv { u16 version; }; -void gswip_disable_switch(struct gswip_priv *priv); - int gswip_probe_common(struct gswip_priv *priv, u32 version); #endif /* __LANTIQ_GSWIP_H */ diff --git a/drivers/net/dsa/lantiq/lantiq_gswip_common.c b/drivers/net/dsa/lantiq/lantiq_gswip_common.c index 9da39edf8f574..e790f2ef75884 100644 --- a/drivers/net/dsa/lantiq/lantiq_gswip_common.c +++ b/drivers/net/dsa/lantiq/lantiq_gswip_common.c @@ -752,6 +752,13 @@ static int gswip_setup(struct dsa_switch *ds) return 0; } +static void gswip_teardown(struct dsa_switch *ds) +{ + struct gswip_priv *priv = ds->priv; + + regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); +} + static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) @@ -1629,6 +1636,7 @@ static const struct phylink_mac_ops gswip_phylink_mac_ops = { static const struct dsa_switch_ops gswip_switch_ops = { .get_tag_protocol = gswip_get_tag_protocol, .setup = gswip_setup, + .teardown = gswip_teardown, .port_setup = gswip_port_setup, .port_enable = gswip_port_enable, .port_disable = gswip_port_disable, @@ -1656,12 +1664,6 @@ static const struct dsa_switch_ops gswip_switch_ops = { .port_hsr_leave = dsa_port_simple_hsr_leave, }; -void gswip_disable_switch(struct gswip_priv *priv) -{ - regmap_clear_bits(priv->mdio, GSWIP_MDIO_GLOB, GSWIP_MDIO_GLOB_ENABLE); -} -EXPORT_SYMBOL_GPL(gswip_disable_switch); - static int gswip_validate_cpu_port(struct dsa_switch *ds) { struct gswip_priv *priv = ds->priv; @@ -1718,15 +1720,14 @@ int gswip_probe_common(struct gswip_priv *priv, u32 version) err = gswip_validate_cpu_port(priv->ds); if (err) - goto disable_switch; + goto unregister_switch; dev_info(priv->dev, "probed GSWIP version %lx mod %lx\n", GSWIP_VERSION_REV(version), GSWIP_VERSION_MOD(version)); return 0; -disable_switch: - gswip_disable_switch(priv); +unregister_switch: dsa_unregister_switch(priv->ds); return err; diff --git a/drivers/net/dsa/lantiq/mxl-gsw1xx.c b/drivers/net/dsa/lantiq/mxl-gsw1xx.c index 0816c61a47f12..f8ff8a604bf53 100644 --- a/drivers/net/dsa/lantiq/mxl-gsw1xx.c +++ b/drivers/net/dsa/lantiq/mxl-gsw1xx.c @@ -11,10 +11,12 @@ #include #include +#include #include #include #include #include +#include #include #include "lantiq_gswip.h" @@ -29,6 +31,7 @@ struct gsw1xx_priv { struct regmap *clk; struct regmap *shell; struct phylink_pcs pcs; + struct delayed_work clear_raneg; phy_interface_t tbi_interface; struct gswip_priv gswip; }; @@ -145,7 +148,9 @@ static void gsw1xx_pcs_disable(struct phylink_pcs *pcs) { struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); - /* Assert SGMII shell reset */ + cancel_delayed_work_sync(&priv->clear_raneg); + + /* Assert SGMII shell reset (will also clear RANEG bit) */ regmap_set_bits(priv->shell, GSW1XX_SHELL_RST_REQ, GSW1XX_RST_REQ_SGMII_SHELL); @@ -255,10 +260,16 @@ static int gsw1xx_pcs_reset(struct gsw1xx_priv *priv) FIELD_PREP(GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT, GSW1XX_SGMII_PHY_RX0_CFG2_FILT_CNT_DEF); - /* TODO: Take care of inverted RX pair once generic property is + /* RX lane seems to be inverted internally, so bit + * GSW1XX_SGMII_PHY_RX0_CFG2_INVERT needs to be set for normal + * (ie. non-inverted) operation. + * + * TODO: Take care of inverted RX pair once generic property is * available */ + val |= GSW1XX_SGMII_PHY_RX0_CFG2_INVERT; + ret = regmap_write(priv->sgmii, GSW1XX_SGMII_PHY_RX0_CFG2, val); if (ret < 0) return ret; @@ -422,12 +433,29 @@ static int gsw1xx_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, return 0; } +static void gsw1xx_pcs_clear_raneg(struct work_struct *work) +{ + struct gsw1xx_priv *priv = + container_of(work, struct gsw1xx_priv, clear_raneg.work); + + regmap_clear_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL, + GSW1XX_SGMII_TBI_ANEGCTL_RANEG); +} + static void gsw1xx_pcs_an_restart(struct phylink_pcs *pcs) { struct gsw1xx_priv *priv = pcs_to_gsw1xx(pcs); + cancel_delayed_work_sync(&priv->clear_raneg); + regmap_set_bits(priv->sgmii, GSW1XX_SGMII_TBI_ANEGCTL, GSW1XX_SGMII_TBI_ANEGCTL_RANEG); + + /* despite being documented as self-clearing, the RANEG bit + * sometimes remains set, preventing auto-negotiation from happening. + * MaxLinear advises to manually clear the bit after 10ms. + */ + schedule_delayed_work(&priv->clear_raneg, msecs_to_jiffies(10)); } static void gsw1xx_pcs_link_up(struct phylink_pcs *pcs, @@ -630,6 +658,8 @@ static int gsw1xx_probe(struct mdio_device *mdiodev) if (ret) return ret; + INIT_DELAYED_WORK(&priv->clear_raneg, gsw1xx_pcs_clear_raneg); + ret = gswip_probe_common(&priv->gswip, version); if (ret) return ret; @@ -642,25 +672,31 @@ static int gsw1xx_probe(struct mdio_device *mdiodev) static void gsw1xx_remove(struct mdio_device *mdiodev) { struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev); + struct gsw1xx_priv *gsw1xx_priv; if (!priv) return; - gswip_disable_switch(priv); - dsa_unregister_switch(priv->ds); + + gsw1xx_priv = container_of(priv, struct gsw1xx_priv, gswip); + cancel_delayed_work_sync(&gsw1xx_priv->clear_raneg); } static void gsw1xx_shutdown(struct mdio_device *mdiodev) { struct gswip_priv *priv = dev_get_drvdata(&mdiodev->dev); + struct gsw1xx_priv *gsw1xx_priv; if (!priv) return; + dsa_switch_shutdown(priv->ds); + dev_set_drvdata(&mdiodev->dev, NULL); - gswip_disable_switch(priv); + gsw1xx_priv = container_of(priv, struct gsw1xx_priv, gswip); + cancel_delayed_work_sync(&gsw1xx_priv->clear_raneg); } static const struct gswip_hw_info gsw12x_data = { diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 75893c90a0a17..315d97036ac1d 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2924,19 +2924,26 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, port->id = id; eth->ports[p] = port; - err = airoha_metadata_dst_alloc(port); - if (err) - return err; + return airoha_metadata_dst_alloc(port); +} - err = register_netdev(dev); - if (err) - goto free_metadata_dst; +static int airoha_register_gdm_devices(struct airoha_eth *eth) +{ + int i; - return 0; + for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { + struct airoha_gdm_port *port = eth->ports[i]; + int err; -free_metadata_dst: - airoha_metadata_dst_free(port); - return err; + if (!port) + continue; + + err = register_netdev(port->dev); + if (err) + return err; + } + + return 0; } static int airoha_probe(struct platform_device *pdev) @@ -3027,6 +3034,10 @@ static int airoha_probe(struct platform_device *pdev) } } + err = airoha_register_gdm_devices(eth); + if (err) + goto error_napi_stop; + return 0; error_napi_stop: @@ -3040,10 +3051,12 @@ static int airoha_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { struct airoha_gdm_port *port = eth->ports[i]; - if (port && port->dev->reg_state == NETREG_REGISTERED) { + if (!port) + continue; + + if (port->dev->reg_state == NETREG_REGISTERED) unregister_netdev(port->dev); - airoha_metadata_dst_free(port); - } + airoha_metadata_dst_free(port); } free_netdev(eth->napi_dev); platform_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index a68757e8fd22c..c63ddb12237ea 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -1928,6 +1928,7 @@ static void xgbe_set_rx_adap_mode(struct xgbe_prv_data *pdata, { if (pdata->rx_adapt_retries++ >= MAX_RX_ADAPT_RETRIES) { pdata->rx_adapt_retries = 0; + pdata->mode_set = false; return; } @@ -1974,6 +1975,7 @@ static void xgbe_rx_adaptation(struct xgbe_prv_data *pdata) */ netif_dbg(pdata, link, pdata->netdev, "Block_lock done"); pdata->rx_adapt_done = true; + pdata->rx_adapt_retries = 0; pdata->mode_set = false; return; } diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 666522d647751..ca565ace6e6ad 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -255,14 +255,14 @@ config BNXT_HWMON devices, via the hwmon sysfs interface. config BNGE - tristate "Broadcom Ethernet device support" + tristate "Broadcom ThorUltra Ethernet device support" depends on PCI select NET_DEVLINK select PAGE_POOL help - This driver supports Broadcom 50/100/200/400/800 gigabit Ethernet cards. - The module will be called bng_en. To compile this driver as a module, - choose M here. + This driver supports Broadcom ThorUltra 50/100/200/400/800 gigabit + Ethernet cards. The module will be called bng_en. To compile this + driver as a module, choose M here. config BCMASP tristate "Broadcom ASP 2.0 Ethernet support" diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 888f28f11406f..90df02e0039cb 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1790,6 +1790,9 @@ static int b44_nway_reset(struct net_device *dev) u32 bmcr; int r; + if (bp->flags & B44_FLAG_EXTERNAL_PHY) + return phy_ethtool_nway_reset(dev); + spin_lock_irq(&bp->lock); b44_readphy(bp, MII_BMCR, &bmcr); b44_readphy(bp, MII_BMCR, &bmcr); diff --git a/drivers/net/ethernet/broadcom/bnge/bnge.h b/drivers/net/ethernet/broadcom/bnge/bnge.h index 411744894349f..32fc16a37d02a 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge.h @@ -5,7 +5,7 @@ #define _BNGE_H_ #define DRV_NAME "bng_en" -#define DRV_SUMMARY "Broadcom 800G Ethernet Linux Driver" +#define DRV_SUMMARY "Broadcom ThorUltra NIC Ethernet Driver" #include #include diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_core.c b/drivers/net/ethernet/broadcom/bnge/bnge_core.c index c94e132bebc80..b4090283df0f2 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_core.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_core.c @@ -19,7 +19,7 @@ char bnge_driver_name[] = DRV_NAME; static const struct { char *name; } board_info[] = { - [BCM57708] = { "Broadcom BCM57708 50Gb/100Gb/200Gb/400Gb/800Gb Ethernet" }, + [BCM57708] = { "Broadcom BCM57708 ThorUltra 50Gb/100Gb/200Gb/400Gb/800Gb Ethernet" }, }; static const struct pci_device_id bnge_pci_tbl[] = { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 3e77a96e5a3e3..c94a391b1ba5b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -268,13 +268,11 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, case XDP_TX: rx_buf = &rxr->rx_buf_ring[cons]; mapping = rx_buf->mapping - bp->rx_dma_offset; - *event &= BNXT_TX_CMP_EVENT; if (unlikely(xdp_buff_has_frags(xdp))) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); tx_needed += sinfo->nr_frags; - *event = BNXT_AGG_EVENT; } if (tx_avail < tx_needed) { @@ -287,6 +285,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, dma_sync_single_for_device(&pdev->dev, mapping + offset, *len, bp->rx_dir); + *event &= ~BNXT_RX_EVENT; *event |= BNXT_TX_EVENT; __bnxt_xmit_xdp(bp, txr, mapping + offset, *len, NEXT_RX(rxr->rx_prod), xdp); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index e461f5072884e..6511ecd5856bd 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -708,7 +708,6 @@ static void macb_mac_link_up(struct phylink_config *config, /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down * cleared the pipeline and control registers. */ - bp->macbgem_ops.mog_init_rings(bp); macb_init_buffers(bp); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) @@ -2954,6 +2953,8 @@ static int macb_open(struct net_device *dev) goto pm_exit; } + bp->macbgem_ops.mog_init_rings(bp); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { napi_enable(&queue->napi_rx); napi_enable(&queue->napi_tx); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index d5e5800b84eff..53b26cece16a8 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1787,7 +1787,8 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames, int xdp_tx_bd_cnt, i, k; int xdp_tx_frm_cnt = 0; - if (unlikely(test_bit(ENETC_TX_DOWN, &priv->flags))) + if (unlikely(test_bit(ENETC_TX_DOWN, &priv->flags) || + !netif_carrier_ok(ndev))) return -ENETDOWN; enetc_lock_mdio(); diff --git a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c index 443983fdecd95..7fd39f8952901 100644 --- a/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c +++ b/drivers/net/ethernet/freescale/enetc/netc_blk_ctrl.c @@ -577,11 +577,17 @@ static int imx94_enetc_mdio_phyaddr_config(struct netc_blk_ctrl *priv, } addr = netc_get_phy_addr(np); - if (addr <= 0) { + if (addr < 0) { dev_err(dev, "Failed to get PHY address\n"); return addr; } + /* The default value of LaBCR[MDIO_PHYAD_PRTAD] is 0, + * so no need to set the register. + */ + if (!addr) + return 0; + if (phy_mask & BIT(addr)) { dev_err(dev, "Find same PHY address in EMDIO and ENETC node\n"); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c685a5c0cc51a..a753265961af5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3933,7 +3933,12 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; } diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 52500ae8348e6..9ed1d45294278 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -815,15 +815,19 @@ static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) return err; } +static u32 gve_get_rx_ring_count(struct net_device *netdev) +{ + struct gve_priv *priv = netdev_priv(netdev); + + return priv->rx_cfg.num_queues; +} + static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { struct gve_priv *priv = netdev_priv(netdev); int err = 0; switch (cmd->cmd) { - case ETHTOOL_GRXRINGS: - cmd->data = priv->rx_cfg.num_queues; - break; case ETHTOOL_GRXCLSRLCNT: if (!priv->max_flow_rules) return -EOPNOTSUPP; @@ -966,6 +970,7 @@ const struct ethtool_ops gve_ethtool_ops = { .get_channels = gve_get_channels, .set_rxnfc = gve_set_rxnfc, .get_rxnfc = gve_get_rxnfc, + .get_rx_ring_count = gve_get_rx_ring_count, .get_rxfh_indir_size = gve_get_rxfh_indir_size, .get_rxfh_key_size = gve_get_rxfh_key_size, .get_rxfh = gve_get_rxfh, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index a5a2b18d309b8..7eb64e1e4d858 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -558,7 +558,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) block->priv = priv; err = request_irq(priv->msix_vectors[msix_idx].vector, gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, - 0, block->name, block); + IRQF_NO_AUTOEN, block->name, block); if (err) { dev_err(&priv->pdev->dev, "Failed to receive msix vector %d\n", i); @@ -647,12 +647,9 @@ static int gve_setup_device_resources(struct gve_priv *priv) err = gve_alloc_counter_array(priv); if (err) goto abort_with_rss_config_cache; - err = gve_init_clock(priv); - if (err) - goto abort_with_counter; err = gve_alloc_notify_blocks(priv); if (err) - goto abort_with_clock; + goto abort_with_counter; err = gve_alloc_stats_report(priv); if (err) goto abort_with_ntfy_blocks; @@ -683,10 +680,16 @@ static int gve_setup_device_resources(struct gve_priv *priv) } } + err = gve_init_clock(priv); + if (err) { + dev_err(&priv->pdev->dev, "Failed to init clock"); + goto abort_with_ptype_lut; + } + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); if (err) { dev_err(&priv->pdev->dev, "Failed to init RSS config"); - goto abort_with_ptype_lut; + goto abort_with_clock; } err = gve_adminq_report_stats(priv, priv->stats_report_len, @@ -698,6 +701,8 @@ static int gve_setup_device_resources(struct gve_priv *priv) gve_set_device_resources_ok(priv); return 0; +abort_with_clock: + gve_teardown_clock(priv); abort_with_ptype_lut: kvfree(priv->ptype_lut_dqo); priv->ptype_lut_dqo = NULL; @@ -705,8 +710,6 @@ static int gve_setup_device_resources(struct gve_priv *priv) gve_free_stats_report(priv); abort_with_ntfy_blocks: gve_free_notify_blocks(priv); -abort_with_clock: - gve_teardown_clock(priv); abort_with_counter: gve_free_counter_array(priv); abort_with_rss_config_cache: diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index ace9b8698021f..b53b7fcdcdaf1 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -112,11 +112,13 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx, netif_napi_add_locked(priv->dev, &block->napi, gve_poll); netif_napi_set_irq_locked(&block->napi, block->irq); + enable_irq(block->irq); } void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + disable_irq(block->irq); netif_napi_del_locked(&block->napi); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index cf8abbe018402..c589baea7c775 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10555,6 +10555,9 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, bool writen_to_tbl = false; int ret = 0; + if (vlan_id >= VLAN_N_VID) + return -EINVAL; + /* When device is resetting or reset failed, firmware is unable to * handle mailbox. Just record the vlan id, and remove it after * reset finished. diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index c7ff12a6c0764..b7d4e06a55d40 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -193,10 +193,10 @@ static int hclge_get_ring_chain_from_mbx( return -EINVAL; for (i = 0; i < ring_num; i++) { - if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) { + if (req->msg.param[i].tqp_index >= vport->nic.kinfo.num_tqps) { dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n", req->msg.param[i].tqp_index, - vport->nic.kinfo.rss_size - 1U); + vport->nic.kinfo.num_tqps - 1U); return -EINVAL; } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 8fcf220a120d2..70327a73dee32 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -368,12 +368,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) new_tqps = kinfo->rss_size * num_tc; kinfo->num_tqps = min(new_tqps, hdev->num_tqps); - kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps, + kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps, sizeof(struct hnae3_queue *), GFP_KERNEL); if (!kinfo->tqp) return -ENOMEM; - for (i = 0; i < kinfo->num_tqps; i++) { + for (i = 0; i < hdev->num_tqps; i++) { hdev->htqp[i].q.handle = &hdev->nic; hdev->htqp[i].q.tqp_index = i; kinfo->tqp[i] = &hdev->htqp[i].q; diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 292389aceb2d4..7f078ec9c14c5 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -4094,7 +4094,15 @@ static bool e1000_tbi_should_accept(struct e1000_adapter *adapter, u32 length, const u8 *data) { struct e1000_hw *hw = &adapter->hw; - u8 last_byte = *(data + length - 1); + u8 last_byte; + + /* Guard against OOB on data[length - 1] */ + if (unlikely(!length)) + return false; + /* Upper bound: length must not exceed rx_buffer_len */ + if (unlikely(length > adapter->rx_buffer_len)) + return false; + last_byte = *(data + length - 1); if (TBI_ACCEPT(hw, status, errors, length, last_byte)) { unsigned long irq_flags; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d2d03db2acec6..dcb50c2e1aa27 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1422,4 +1422,15 @@ static inline struct i40e_veb *i40e_pf_get_main_veb(struct i40e_pf *pf) return (pf->lan_veb != I40E_NO_VEB) ? pf->veb[pf->lan_veb] : NULL; } +static inline u32 i40e_get_max_num_descriptors(const struct i40e_pf *pf) +{ + const struct i40e_hw *hw = &pf->hw; + + switch (hw->mac.type) { + case I40E_MAC_XL710: + return I40E_MAX_NUM_DESCRIPTORS_XL710; + default: + return I40E_MAX_NUM_DESCRIPTORS; + } +} #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index f2c2646ea2989..9fca52f42ce43 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2013,18 +2013,6 @@ static void i40e_get_drvinfo(struct net_device *netdev, drvinfo->n_priv_flags += I40E_GL_PRIV_FLAGS_STR_LEN; } -static u32 i40e_get_max_num_descriptors(struct i40e_pf *pf) -{ - struct i40e_hw *hw = &pf->hw; - - switch (hw->mac.type) { - case I40E_MAC_XL710: - return I40E_MAX_NUM_DESCRIPTORS_XL710; - default: - return I40E_MAX_NUM_DESCRIPTORS; - } -} - static void i40e_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -3637,6 +3625,7 @@ static int i40e_set_rxfh_fields(struct net_device *netdev, ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); DECLARE_BITMAP(flow_pctypes, FLOW_PCTYPES_SIZE); u64 i_set, i_setc; + u8 flow_id; bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE); @@ -3720,20 +3709,14 @@ static int i40e_set_rxfh_fields(struct net_device *netdev, return -EINVAL; } - if (bitmap_weight(flow_pctypes, FLOW_PCTYPES_SIZE)) { - u8 flow_id; + for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) { + i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) | + ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32); + i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); - for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) { - i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) | - ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32); - i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc); - - i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id), - (u32)i_set); - i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id), - (u32)(i_set >> 32)); - hena |= BIT_ULL(flow_id); - } + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id), (u32)i_set); + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id), (u32)(i_set >> 32)); + hena |= BIT_ULL(flow_id); } i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d8192aa232548..d3bc3207054f9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2234,6 +2234,7 @@ static void i40e_set_rx_mode(struct net_device *netdev) vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } + i40e_service_event_schedule(vsi->back); } /** @@ -9029,7 +9030,6 @@ int i40e_open(struct net_device *netdev) TCP_FLAG_FIN | TCP_FLAG_CWR) >> 16); wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); - udp_tunnel_get_rx_info(netdev); return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 8b30a3accd310..1fa877b52f618 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -656,7 +656,7 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, /* ring_len has to be multiple of 8 */ if (!IS_ALIGNED(info->ring_len, 8) || - info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) { + info->ring_len > i40e_get_max_num_descriptors(pf)) { ret = -EINVAL; goto error_context; } @@ -726,7 +726,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, /* ring_len has to be multiple of 32 */ if (!IS_ALIGNED(info->ring_len, 32) || - info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) { + info->ring_len > i40e_get_max_num_descriptors(pf)) { ret = -EINVAL; goto error_param; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index c2fbe443ef853..4b0fc8f354bc9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1726,11 +1726,11 @@ static int iavf_config_rss_reg(struct iavf_adapter *adapter) u16 i; dw = (u32 *)adapter->rss_key; - for (i = 0; i <= adapter->rss_key_size / 4; i++) + for (i = 0; i < adapter->rss_key_size / 4; i++) wr32(hw, IAVF_VFQF_HKEY(i), dw[i]); dw = (u32 *)adapter->rss_lut; - for (i = 0; i <= adapter->rss_lut_size / 4; i++) + for (i = 0; i < adapter->rss_lut_size / 4; i++) wr32(hw, IAVF_VFQF_HLUT(i), dw[i]); iavf_flush(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4bb68e7a00f5f..a91f96253db0a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -9631,9 +9631,6 @@ int ice_open_internal(struct net_device *netdev) netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); - /* Update existing tunnels information */ - udp_tunnel_get_rx_info(netdev); - return err; } diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 7a7e101afeb68..7ce4eb71a433c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1271,7 +1271,7 @@ void idpf_mbx_task(struct work_struct *work) idpf_mb_irq_enable(adapter); else queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, - msecs_to_jiffies(300)); + usecs_to_jiffies(300)); idpf_recv_mb_msg(adapter); } diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 44cd4b466c481..5bbe7d9294c14 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -1016,6 +1016,9 @@ static int idpf_send_get_lan_memory_regions(struct idpf_adapter *adapter) struct idpf_vc_xn_params xn_params = { .vc_op = VIRTCHNL2_OP_GET_LAN_MEMORY_REGIONS, .recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN, + .send_buf.iov_len = + sizeof(struct virtchnl2_get_lan_memory_regions) + + sizeof(struct virtchnl2_mem_region), .timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC, }; int num_regions, size; @@ -1028,6 +1031,8 @@ static int idpf_send_get_lan_memory_regions(struct idpf_adapter *adapter) return -ENOMEM; xn_params.recv_buf.iov_base = rcvd_regions; + rcvd_regions->num_memory_regions = cpu_to_le16(1); + xn_params.send_buf.iov_base = rcvd_regions; reply_sz = idpf_vc_xn_exec(adapter, &xn_params); if (reply_sz < 0) return reply_sz; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c index b5805969404fa..2a5cebbf1ff81 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c @@ -307,7 +307,7 @@ static void octep_setup_iq_regs_cn93_pf(struct octep_device *oct, int iq_no) } /* Setup registers for a hardware Rx Queue */ -static void octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no) +static int octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no) { u64 reg_val; u64 oq_ctl = 0ULL; @@ -355,6 +355,7 @@ static void octep_setup_oq_regs_cn93_pf(struct octep_device *oct, int oq_no) reg_val = ((u64)time_threshold << 32) | CFG_GET_OQ_INTR_PKT(oct->conf); octep_write_csr64(oct, CN93_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); + return 0; } /* Setup registers for a PF mailbox */ @@ -696,14 +697,26 @@ static void octep_enable_interrupts_cn93_pf(struct octep_device *oct) /* Disable all interrupts */ static void octep_disable_interrupts_cn93_pf(struct octep_device *oct) { - u64 intr_mask = 0ULL; + u64 reg_val, intr_mask = 0ULL; int srn, num_rings, i; srn = CFG_GET_PORTS_PF_SRN(oct->conf); num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (i = 0; i < num_rings; i++) - intr_mask |= (0x1ULL << (srn + i)); + for (i = 0; i < num_rings; i++) { + intr_mask |= (BIT_ULL(srn + i)); + reg_val = octep_read_csr64(oct, + CN93_SDP_R_IN_INT_LEVELS(srn + i)); + reg_val &= (~CN93_INT_ENA_BIT); + octep_write_csr64(oct, + CN93_SDP_R_IN_INT_LEVELS(srn + i), reg_val); + + reg_val = octep_read_csr64(oct, + CN93_SDP_R_OUT_INT_LEVELS(srn + i)); + reg_val &= (~CN93_INT_ENA_BIT); + octep_write_csr64(oct, + CN93_SDP_R_OUT_INT_LEVELS(srn + i), reg_val); + } octep_write_csr64(oct, CN93_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask); octep_write_csr64(oct, CN93_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c index 5de0b5ecbc5fd..1ca8da3f7dc72 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "octep_config.h" #include "octep_main.h" @@ -327,11 +328,13 @@ static void octep_setup_iq_regs_cnxk_pf(struct octep_device *oct, int iq_no) } /* Setup registers for a hardware Rx Queue */ -static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no) +static int octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no) { u64 reg_val; u64 oq_ctl = 0ULL; + u64 reg_ba_val; u32 time_threshold = 0; + unsigned long t_out_jiffies; struct octep_oq *oq = oct->oq[oq_no]; oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); @@ -343,6 +346,33 @@ static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no) reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); } while (!(reg_val & CNXK_R_OUT_CTL_IDLE)); } + octep_write_csr64(oct, CNXK_SDP_R_OUT_WMARK(oq_no), oq->max_count); + /* Wait for WMARK to get applied */ + usleep_range(10, 15); + + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no), + oq->desc_ring_dma); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no), + oq->max_count); + reg_ba_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no)); + + if (reg_ba_val != oq->desc_ring_dma) { + t_out_jiffies = jiffies + 10 * HZ; + do { + if (reg_ba_val == ULLONG_MAX) + return -EFAULT; + octep_write_csr64(oct, + CNXK_SDP_R_OUT_SLIST_BADDR(oq_no), + oq->desc_ring_dma); + octep_write_csr64(oct, + CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no), + oq->max_count); + reg_ba_val = + octep_read_csr64(oct, + CNXK_SDP_R_OUT_SLIST_BADDR(oq_no)); + } while ((reg_ba_val != oq->desc_ring_dma) && + time_before(jiffies, t_out_jiffies)); + } reg_val &= ~(CNXK_R_OUT_CTL_IMODE); reg_val &= ~(CNXK_R_OUT_CTL_ROR_P); @@ -356,10 +386,6 @@ static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no) reg_val |= (CNXK_R_OUT_CTL_ES_P); octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), reg_val); - octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no), - oq->desc_ring_dma); - octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no), - oq->max_count); oq_ctl = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); @@ -385,6 +411,7 @@ static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no) reg_val &= ~0xFFFFFFFFULL; reg_val |= CFG_GET_OQ_WMARK(oct->conf); octep_write_csr64(oct, CNXK_SDP_R_OUT_WMARK(oq_no), reg_val); + return 0; } /* Setup registers for a PF mailbox */ @@ -720,14 +747,26 @@ static void octep_enable_interrupts_cnxk_pf(struct octep_device *oct) /* Disable all interrupts */ static void octep_disable_interrupts_cnxk_pf(struct octep_device *oct) { - u64 intr_mask = 0ULL; + u64 reg_val, intr_mask = 0ULL; int srn, num_rings, i; srn = CFG_GET_PORTS_PF_SRN(oct->conf); num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (i = 0; i < num_rings; i++) - intr_mask |= (0x1ULL << (srn + i)); + for (i = 0; i < num_rings; i++) { + intr_mask |= BIT_ULL(srn + i); + reg_val = octep_read_csr64(oct, + CNXK_SDP_R_IN_INT_LEVELS(srn + i)); + reg_val &= (~CNXK_INT_ENA_BIT); + octep_write_csr64(oct, + CNXK_SDP_R_IN_INT_LEVELS(srn + i), reg_val); + + reg_val = octep_read_csr64(oct, + CNXK_SDP_R_OUT_INT_LEVELS(srn + i)); + reg_val &= (~CNXK_INT_ENA_BIT); + octep_write_csr64(oct, + CNXK_SDP_R_OUT_INT_LEVELS(srn + i), reg_val); + } octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask); octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index bcea3fc26a8c7..ccfb248d09148 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -555,28 +555,43 @@ static void octep_clean_irqs(struct octep_device *oct) } /** - * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * octep_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. * * @iq: Octeon Tx queue data structure. * @oq: Octeon Rx queue data structure. */ -static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) +static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq) { - u32 pkts_pend = oq->pkts_pending; + u32 pkts_pend = READ_ONCE(oq->pkts_pending); + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); + u32 pkts_processed = READ_ONCE(iq->pkts_processed); + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); - if (iq->pkts_processed) { - writel(iq->pkts_processed, iq->inst_cnt_reg); - iq->pkt_in_done -= iq->pkts_processed; - iq->pkts_processed = 0; + if (pkts_processed) { + writel(pkts_processed, iq->inst_cnt_reg); + readl(iq->inst_cnt_reg); + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); + WRITE_ONCE(iq->pkts_processed, 0); } - if (oq->last_pkt_count - pkts_pend) { - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); - oq->last_pkt_count = pkts_pend; + if (last_pkt_count - pkts_pend) { + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); + readl(oq->pkts_sent_reg); + WRITE_ONCE(oq->last_pkt_count, pkts_pend); } /* Flush the previous wrties before writing to RESEND bit */ - wmb(); + smp_wmb(); +} + +/** + * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * + * @iq: Octeon Tx queue data structure. + * @oq: Octeon Rx queue data structure. + */ +static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) +{ writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); } @@ -602,7 +617,8 @@ static int octep_napi_poll(struct napi_struct *napi, int budget) if (tx_pending || rx_done >= budget) return budget; - napi_complete(napi); + octep_update_pkt(ioq_vector->iq, ioq_vector->oq); + napi_complete_done(napi, rx_done); octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); return rx_done; } diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index 81ac4267811c8..35d0ff289a70d 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -77,7 +77,7 @@ struct octep_pci_win_regs { struct octep_hw_ops { void (*setup_iq_regs)(struct octep_device *oct, int q); - void (*setup_oq_regs)(struct octep_device *oct, int q); + int (*setup_oq_regs)(struct octep_device *oct, int q); void (*setup_mbox_regs)(struct octep_device *oct, int mbox); irqreturn_t (*mbox_intr_handler)(void *ioq_vector); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index ca473502d7a02..42cb199bd0855 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -386,5 +386,6 @@ #define CN93_PEM_BAR4_INDEX 7 #define CN93_PEM_BAR4_INDEX_SIZE 0x400000ULL #define CN93_PEM_BAR4_INDEX_OFFSET (CN93_PEM_BAR4_INDEX * CN93_PEM_BAR4_INDEX_SIZE) +#define CN93_INT_ENA_BIT (BIT_ULL(62)) #endif /* _OCTEP_REGS_CN9K_PF_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h index e637d7c8224d4..9eaadded9c503 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h @@ -412,5 +412,6 @@ #define CNXK_PEM_BAR4_INDEX 7 #define CNXK_PEM_BAR4_INDEX_SIZE 0x400000ULL #define CNXK_PEM_BAR4_INDEX_OFFSET (CNXK_PEM_BAR4_INDEX * CNXK_PEM_BAR4_INDEX_SIZE) +#define CNXK_INT_ENA_BIT (BIT_ULL(62)) #endif /* _OCTEP_REGS_CNXK_PF_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index 82b6b19e76b47..6f94ccafb6519 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -170,7 +170,9 @@ static int octep_setup_oq(struct octep_device *oct, int q_no) goto oq_fill_buff_err; octep_oq_reset_indices(oq); - oct->hw_ops.setup_oq_regs(oct, q_no); + if (oct->hw_ops.setup_oq_regs(oct, q_no)) + goto oq_fill_buff_err; + oct->num_oqs++; return 0; @@ -318,10 +320,16 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, struct octep_oq *oq) { u32 pkt_count, new_pkts; + u32 last_pkt_count, pkts_pending; pkt_count = readl(oq->pkts_sent_reg); - new_pkts = pkt_count - oq->last_pkt_count; + last_pkt_count = READ_ONCE(oq->last_pkt_count); + new_pkts = pkt_count - last_pkt_count; + if (pkt_count < last_pkt_count) { + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", + oq->q_no, pkt_count, last_pkt_count); + } /* Clear the hardware packets counter register if the rx queue is * being processed continuously with-in a single interrupt and * reached half its max value. @@ -332,8 +340,9 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, pkt_count = readl(oq->pkts_sent_reg); new_pkts += pkt_count; } - oq->last_pkt_count = pkt_count; - oq->pkts_pending += new_pkts; + WRITE_ONCE(oq->last_pkt_count, pkt_count); + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); return new_pkts; } @@ -408,7 +417,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, u16 rx_ol_flags; u32 read_idx; - read_idx = oq->host_read_idx; + read_idx = READ_ONCE(oq->host_read_idx); rx_bytes = 0; desc_used = 0; for (pkt = 0; pkt < pkts_to_process; pkt++) { @@ -493,7 +502,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, napi_gro_receive(oq->napi, skb); } - oq->host_read_idx = read_idx; + WRITE_ONCE(oq->host_read_idx, read_idx); oq->refill_count += desc_used; oq->stats->packets += pkt; oq->stats->bytes += rx_bytes; @@ -516,22 +525,26 @@ int octep_oq_process_rx(struct octep_oq *oq, int budget) { u32 pkts_available, pkts_processed, total_pkts_processed; struct octep_device *oct = oq->octep_dev; + u32 pkts_pending; pkts_available = 0; pkts_processed = 0; total_pkts_processed = 0; while (total_pkts_processed < budget) { /* update pending count only when current one exhausted */ - if (oq->pkts_pending == 0) + pkts_pending = READ_ONCE(oq->pkts_pending); + if (pkts_pending == 0) octep_oq_check_hw_for_pkts(oct, oq); + pkts_pending = READ_ONCE(oq->pkts_pending); pkts_available = min(budget - total_pkts_processed, - oq->pkts_pending); + pkts_pending); if (!pkts_available) break; pkts_processed = __octep_oq_process_rx(oct, oq, pkts_available); - oq->pkts_pending -= pkts_processed; + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); total_pkts_processed += pkts_processed; } diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c index 88937fce75f14..4c769b27c2789 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c @@ -196,7 +196,7 @@ static void octep_vf_setup_iq_regs_cn93(struct octep_vf_device *oct, int iq_no) } /* Setup registers for a hardware Rx Queue */ -static void octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no) +static int octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no) { struct octep_vf_oq *oq = oct->oq[oq_no]; u32 time_threshold = 0; @@ -239,6 +239,7 @@ static void octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no) time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf); reg_val = ((u64)time_threshold << 32) | CFG_GET_OQ_INTR_PKT(oct->conf); octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); + return 0; } /* Setup registers for a VF mailbox */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c index 1f79dfad42c62..3967d0e0de829 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c @@ -199,12 +199,14 @@ static void octep_vf_setup_iq_regs_cnxk(struct octep_vf_device *oct, int iq_no) } /* Setup registers for a hardware Rx Queue */ -static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no) +static int octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no) { struct octep_vf_oq *oq = oct->oq[oq_no]; u32 time_threshold = 0; u64 oq_ctl = ULL(0); + u64 reg_ba_val; u64 reg_val; + unsigned long t_out_jiffies; reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no)); @@ -214,6 +216,35 @@ static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no) reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no)); } while (!(reg_val & CNXK_VF_R_OUT_CTL_IDLE)); } + octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no), + oq->max_count); + /* Wait for WMARK to get applied */ + usleep_range(10, 15); + + octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no), + oq->desc_ring_dma); + octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(oq_no), + oq->max_count); + reg_ba_val = octep_vf_read_csr64(oct, + CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no)); + if (reg_ba_val != oq->desc_ring_dma) { + t_out_jiffies = jiffies + 10 * HZ; + do { + if (reg_ba_val == ULLONG_MAX) + return -EFAULT; + octep_vf_write_csr64(oct, + CNXK_VF_SDP_R_OUT_SLIST_BADDR + (oq_no), oq->desc_ring_dma); + octep_vf_write_csr64(oct, + CNXK_VF_SDP_R_OUT_SLIST_RSIZE + (oq_no), oq->max_count); + reg_ba_val = + octep_vf_read_csr64(oct, + CNXK_VF_SDP_R_OUT_SLIST_BADDR + (oq_no)); + } while ((reg_ba_val != oq->desc_ring_dma) && + time_before(jiffies, t_out_jiffies)); + } reg_val &= ~(CNXK_VF_R_OUT_CTL_IMODE); reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_P); @@ -227,8 +258,6 @@ static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no) reg_val |= (CNXK_VF_R_OUT_CTL_ES_P); octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no), reg_val); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no), oq->desc_ring_dma); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(oq_no), oq->max_count); oq_ctl = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no)); /* Clear the ISIZE and BSIZE (22-0) */ @@ -250,6 +279,7 @@ static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no) reg_val &= ~GENMASK_ULL(31, 0); reg_val |= CFG_GET_OQ_WMARK(oct->conf); octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no), reg_val); + return 0; } /* Setup registers for a VF mailbox */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index 420c3f4cf7417..27a5fc38bccb6 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -286,28 +286,45 @@ static void octep_vf_clean_irqs(struct octep_vf_device *oct) } /** - * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * octep_vf_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. * * @iq: Octeon Tx queue data structure. * @oq: Octeon Rx queue data structure. */ -static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq) + +static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq) { - u32 pkts_pend = oq->pkts_pending; + u32 pkts_pend = READ_ONCE(oq->pkts_pending); + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); + u32 pkts_processed = READ_ONCE(iq->pkts_processed); + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); - if (iq->pkts_processed) { - writel(iq->pkts_processed, iq->inst_cnt_reg); - iq->pkt_in_done -= iq->pkts_processed; - iq->pkts_processed = 0; + if (pkts_processed) { + writel(pkts_processed, iq->inst_cnt_reg); + readl(iq->inst_cnt_reg); + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); + WRITE_ONCE(iq->pkts_processed, 0); } - if (oq->last_pkt_count - pkts_pend) { - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); - oq->last_pkt_count = pkts_pend; + if (last_pkt_count - pkts_pend) { + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); + readl(oq->pkts_sent_reg); + WRITE_ONCE(oq->last_pkt_count, pkts_pend); } /* Flush the previous wrties before writing to RESEND bit */ smp_wmb(); +} + +/** + * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. + * + * @iq: Octeon Tx queue data structure. + * @oq: Octeon Rx queue data structure. + */ +static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, + struct octep_vf_oq *oq) +{ writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); } @@ -333,6 +350,7 @@ static int octep_vf_napi_poll(struct napi_struct *napi, int budget) if (tx_pending || rx_done >= budget) return budget; + octep_vf_update_pkt(ioq_vector->iq, ioq_vector->oq); if (likely(napi_complete_done(napi, rx_done))) octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h index b9f13506f4620..c74cd2369e90d 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h @@ -55,7 +55,7 @@ struct octep_vf_mmio { struct octep_vf_hw_ops { void (*setup_iq_regs)(struct octep_vf_device *oct, int q); - void (*setup_oq_regs)(struct octep_vf_device *oct, int q); + int (*setup_oq_regs)(struct octep_vf_device *oct, int q); void (*setup_mbox_regs)(struct octep_vf_device *oct, int mbox); irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector); diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c index d70c8be3cfc40..08b11c232f698 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c @@ -171,7 +171,9 @@ static int octep_vf_setup_oq(struct octep_vf_device *oct, int q_no) goto oq_fill_buff_err; octep_vf_oq_reset_indices(oq); - oct->hw_ops.setup_oq_regs(oct, q_no); + if (oct->hw_ops.setup_oq_regs(oct, q_no)) + goto oq_fill_buff_err; + oct->num_oqs++; return 0; @@ -319,9 +321,16 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, struct octep_vf_oq *oq) { u32 pkt_count, new_pkts; + u32 last_pkt_count, pkts_pending; pkt_count = readl(oq->pkts_sent_reg); - new_pkts = pkt_count - oq->last_pkt_count; + last_pkt_count = READ_ONCE(oq->last_pkt_count); + new_pkts = pkt_count - last_pkt_count; + + if (pkt_count < last_pkt_count) { + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", + oq->q_no, pkt_count, last_pkt_count); + } /* Clear the hardware packets counter register if the rx queue is * being processed continuously with-in a single interrupt and @@ -333,8 +342,9 @@ static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, pkt_count = readl(oq->pkts_sent_reg); new_pkts += pkt_count; } - oq->last_pkt_count = pkt_count; - oq->pkts_pending += new_pkts; + WRITE_ONCE(oq->last_pkt_count, pkt_count); + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); return new_pkts; } @@ -363,7 +373,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, struct sk_buff *skb; u32 read_idx; - read_idx = oq->host_read_idx; + read_idx = READ_ONCE(oq->host_read_idx); rx_bytes = 0; desc_used = 0; for (pkt = 0; pkt < pkts_to_process; pkt++) { @@ -457,7 +467,7 @@ static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, napi_gro_receive(oq->napi, skb); } - oq->host_read_idx = read_idx; + WRITE_ONCE(oq->host_read_idx, read_idx); oq->refill_count += desc_used; oq->stats->packets += pkt; oq->stats->bytes += rx_bytes; @@ -480,22 +490,26 @@ int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget) { u32 pkts_available, pkts_processed, total_pkts_processed; struct octep_vf_device *oct = oq->octep_vf_dev; + u32 pkts_pending; pkts_available = 0; pkts_processed = 0; total_pkts_processed = 0; while (total_pkts_processed < budget) { /* update pending count only when current one exhausted */ - if (oq->pkts_pending == 0) + pkts_pending = READ_ONCE(oq->pkts_pending); + if (pkts_pending == 0) octep_vf_oq_check_hw_for_pkts(oct, oq); + pkts_pending = READ_ONCE(oq->pkts_pending); pkts_available = min(budget - total_pkts_processed, - oq->pkts_pending); + pkts_pending); if (!pkts_available) break; pkts_processed = __octep_vf_oq_process_rx(oct, oq, pkts_available); - oq->pkts_pending -= pkts_processed; + pkts_pending = READ_ONCE(oq->pkts_pending); + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); total_pkts_processed += pkts_processed; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index b90e23dc49de9..b6449f0a9e7dd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -418,6 +418,14 @@ static int otx2_set_ringparam(struct net_device *netdev, */ if (rx_count < pfvf->hw.rq_skid) rx_count = pfvf->hw.rq_skid; + + if (ring->rx_pending < 16) { + netdev_err(netdev, + "rx ring size %u invalid, min is 16\n", + ring->rx_pending); + return -EINVAL; + } + rx_count = Q_COUNT(Q_SIZE(rx_count, 3)); /* Due pipelining impact minimum 2000 unused SQ CQE's diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 887adf4807d16..ea77fbd98396a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -197,6 +197,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, struct pci_dev *pdev = dev->pdev; int ret = 0; + if (mlx5_fw_reset_in_progress(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Can't reload during firmware reset"); + return -EBUSY; + } + if (mlx5_dev_is_lightweight(dev)) { if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 7bcf822a89f9f..6b4ec457ce227 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -33,6 +33,7 @@ #include "lib/eq.h" #include "fw_tracer.h" #include "fw_tracer_tracepoint.h" +#include static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) { @@ -358,6 +359,47 @@ static const char *VAL_PARM = "%llx"; static const char *REPLACE_64_VAL_PARM = "%x%x"; static const char *PARAM_CHAR = "%"; +static bool mlx5_is_valid_spec(const char *str) +{ + /* Parse format specifiers to find the actual type. + * Structure: %[flags][width][.precision][length]type + * Skip flags, width, precision & length. + */ + while (isdigit(*str) || *str == '#' || *str == '.' || *str == 'l') + str++; + + /* Check if it's a valid integer/hex specifier or %%: + * Valid formats: %x, %d, %i, %u, etc. + */ + if (*str != 'x' && *str != 'X' && *str != 'd' && *str != 'i' && + *str != 'u' && *str != 'c' && *str != '%') + return false; + + return true; +} + +static bool mlx5_tracer_validate_params(const char *str) +{ + const char *substr = str; + + if (!str) + return false; + + substr = strstr(substr, PARAM_CHAR); + while (substr) { + if (!mlx5_is_valid_spec(substr + 1)) + return false; + + if (*(substr + 1) == '%') + substr = strstr(substr + 2, PARAM_CHAR); + else + substr = strstr(substr + 1, PARAM_CHAR); + + } + + return true; +} + static int mlx5_tracer_message_hash(u32 message_id) { return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1); @@ -419,6 +461,10 @@ static int mlx5_tracer_get_num_of_params(char *str) char *substr, *pstr = str; int num_of_params = 0; + /* Validate that all parameters are valid before processing */ + if (!mlx5_tracer_validate_params(str)) + return -EINVAL; + /* replace %llx with %x%x */ substr = strstr(pstr, VAL_PARM); while (substr) { @@ -427,11 +473,15 @@ static int mlx5_tracer_get_num_of_params(char *str) substr = strstr(pstr, VAL_PARM); } - /* count all the % characters */ + /* count all the % characters, but skip %% (escaped percent) */ substr = strstr(str, PARAM_CHAR); while (substr) { - num_of_params += 1; - str = substr + 1; + if (*(substr + 1) != '%') { + num_of_params += 1; + str = substr + 1; + } else { + str = substr + 2; + } substr = strstr(str, PARAM_CHAR); } @@ -570,14 +620,17 @@ void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, { char tmp[512]; - snprintf(tmp, sizeof(tmp), str_frmt->string, - str_frmt->params[0], - str_frmt->params[1], - str_frmt->params[2], - str_frmt->params[3], - str_frmt->params[4], - str_frmt->params[5], - str_frmt->params[6]); + if (str_frmt->invalid_string) + snprintf(tmp, sizeof(tmp), "BAD_FORMAT: %s", str_frmt->string); + else + snprintf(tmp, sizeof(tmp), str_frmt->string, + str_frmt->params[0], + str_frmt->params[1], + str_frmt->params[2], + str_frmt->params[3], + str_frmt->params[4], + str_frmt->params[5], + str_frmt->params[6]); trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, str_frmt->event_id, tmp); @@ -609,6 +662,13 @@ static int mlx5_tracer_handle_raw_string(struct mlx5_fw_tracer *tracer, return 0; } +static void mlx5_tracer_handle_bad_format_string(struct mlx5_fw_tracer *tracer, + struct tracer_string_format *cur_string) +{ + cur_string->invalid_string = true; + list_add_tail(&cur_string->list, &tracer->ready_strings_list); +} + static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, struct tracer_event *tracer_event) { @@ -619,12 +679,18 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, if (!cur_string) return mlx5_tracer_handle_raw_string(tracer, tracer_event); - cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); - cur_string->last_param_num = 0; cur_string->event_id = tracer_event->event_id; cur_string->tmsn = tracer_event->string_event.tmsn; cur_string->timestamp = tracer_event->string_event.timestamp; cur_string->lost = tracer_event->lost_event; + cur_string->last_param_num = 0; + cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); + if (cur_string->num_of_params < 0) { + pr_debug("%s Invalid format string parameters\n", + __func__); + mlx5_tracer_handle_bad_format_string(tracer, cur_string); + return 0; + } if (cur_string->num_of_params == 0) /* trace with no params */ list_add_tail(&cur_string->list, &tracer->ready_strings_list); } else { @@ -634,6 +700,11 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, __func__, tracer_event->string_event.tmsn); return mlx5_tracer_handle_raw_string(tracer, tracer_event); } + if (cur_string->num_of_params < 0) { + pr_debug("%s string parameter of invalid string, dumping\n", + __func__); + return 0; + } cur_string->last_param_num += 1; if (cur_string->last_param_num > TRACER_MAX_PARAMS) { pr_debug("%s Number of params exceeds the max (%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 5c548bb74f07b..30d0bcba88479 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -125,6 +125,7 @@ struct tracer_string_format { struct list_head list; u32 timestamp; bool lost; + bool invalid_string; }; enum mlx5_fw_tracer_ownership_state { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 811178d8976cf..262dc032e276a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -69,7 +69,7 @@ struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) #define MLX5E_METADATA_ETHER_LEN 8 -#define MLX5E_ETH_HARD_MTU (ETH_HLEN + PSP_ENCAP_HLEN + PSP_TRL_SIZE + VLAN_HLEN + ETH_FCS_LEN) +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 35d9530037a65..a8fb4bec369cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -342,9 +342,8 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, rt_dst_entry = &rt->dst; break; case AF_INET6: - rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow( - dev_net(netdev), NULL, &fl6, NULL); - if (IS_ERR(rt_dst_entry)) + if (!IS_ENABLED(CONFIG_IPV6) || + ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6)) goto neigh; break; default: @@ -359,6 +358,9 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, neigh_ha_snapshot(addr, n, netdev); ether_addr_copy(dst, addr); + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT && + is_zero_ether_addr(addr)) + neigh_event_send(n, NULL); dst_release(rt_dst_entry); neigh_release(n); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6168f08144148..07fc4d2c8fadd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6825,7 +6825,6 @@ static void _mlx5e_remove(struct auxiliary_device *adev) * is already unregistered before changing to NIC profile. */ if (priv->netdev->reg_state == NETREG_REGISTERED) { - mlx5e_psp_unregister(priv); unregister_netdev(priv->netdev); _mlx5e_suspend(adev, false); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 14884b9ea7f39..a01ee656a1e7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -939,7 +939,11 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) sq->dma_fifo_cc = dma_fifo_cc; sq->cc = sqcc; - netdev_tx_completed_queue(sq->txq, npkts, nbytes); + /* Do not update BQL for TXQs that got replaced by new active ones, as + * netdev_tx_reset_queue() is called for them in mlx5e_activate_txqsq(). + */ + if (sq == sq->priv->txq2sq[sq->txq_ix]) + netdev_tx_completed_queue(sq->txq, npkts, nbytes); } #ifdef CONFIG_MLX5_CORE_IPOIB diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8de6c7f6c2944..ea94a727633f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -52,6 +52,7 @@ #include "devlink.h" #include "lag/lag.h" #include "en/tc/post_meter.h" +#include "fw_reset.h" /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. @@ -3991,6 +3992,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (IS_ERR(esw)) return PTR_ERR(esw); + if (mlx5_fw_reset_in_progress(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, "Can't change eswitch mode during firmware reset"); + return -EBUSY; + } + if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 2bceb42c98cc2..ae10665c53f32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -15,6 +15,7 @@ enum { MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, + MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, }; struct mlx5_fw_reset { @@ -128,6 +129,16 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL); } +bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!fw_reset) + return false; + + return test_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); +} + static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev, u8 *reset_method) { @@ -243,6 +254,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); devl_unlock(devlink); } + + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); } static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) @@ -462,27 +475,48 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, reset_request_work); struct mlx5_core_dev *dev = fw_reset->dev; + bool nack_request = false; + struct devlink *devlink; int err; err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method); - if (err) + if (err) { + nack_request = true; mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err); + } else if (!mlx5_is_reset_now_capable(dev, fw_reset->reset_method) || + test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, + &fw_reset->reset_flags)) { + nack_request = true; + } - if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || - !mlx5_is_reset_now_capable(dev, fw_reset->reset_method)) { + devlink = priv_to_devlink(dev); + /* For external resets, try to acquire devl_lock. Skip if devlink reset is + * pending (lock already held) + */ + if (nack_request || + (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, + &fw_reset->reset_flags) && + !devl_trylock(devlink))) { err = mlx5_fw_reset_set_reset_sync_nack(dev); mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", err ? "Failed" : "Sent"); return; } + if (mlx5_sync_reset_set_reset_requested(dev)) - return; + goto unlock; + + set_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); err = mlx5_fw_reset_set_reset_sync_ack(dev); if (err) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); else mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); + +unlock: + if (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + devl_unlock(devlink); } static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id) @@ -722,6 +756,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work) if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; + + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } @@ -758,6 +794,7 @@ static void mlx5_sync_reset_timeout_work(struct work_struct *work) if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n"); } @@ -844,7 +881,8 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) cancel_work_sync(&fw_reset->reset_reload_work); cancel_work_sync(&fw_reset->reset_now_work); cancel_work_sync(&fw_reset->reset_abort_work); - cancel_delayed_work(&fw_reset->reset_timeout_work); + if (test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + mlx5_sync_reset_clear_reset_requested(dev, true); } static const struct devlink_param mlx5_fw_reset_devlink_params[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index d5b28525c960d..2d96b2adc1cdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -10,6 +10,7 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, struct netlink_ext_ack *extack); int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); +bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 1ac933cd8f02b..a459a30f36cae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1413,6 +1413,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) { mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp); + dev->priv.hca_devcom_comp = NULL; } static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index aad52d3a90e68..2d86af8f0d9b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -67,12 +67,19 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) static int enable_mpesw(struct mlx5_lag *ldev) { - int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev0; int err; + int idx; int i; - if (idx < 0 || ldev->mode != MLX5_LAG_MODE_NONE) + if (ldev->mode == MLX5_LAG_MODE_MPESW) + return 0; + + if (ldev->mode != MLX5_LAG_MODE_NONE) + return -EINVAL; + + idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + if (idx < 0) return -EINVAL; dev0 = ldev->pf[idx].dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1ab569ce3fcfd..4f1e2bd970179 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -110,74 +110,9 @@ static struct mlx5_profile profile[] = { }, [2] = { - .mask = MLX5_PROF_MASK_QP_SIZE | - MLX5_PROF_MASK_MR_CACHE, + .mask = MLX5_PROF_MASK_QP_SIZE, .log_max_qp = LOG_MAX_SUPPORTED_QPS, .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, - .mr_cache[0] = { - .size = 500, - .limit = 250 - }, - .mr_cache[1] = { - .size = 500, - .limit = 250 - }, - .mr_cache[2] = { - .size = 500, - .limit = 250 - }, - .mr_cache[3] = { - .size = 500, - .limit = 250 - }, - .mr_cache[4] = { - .size = 500, - .limit = 250 - }, - .mr_cache[5] = { - .size = 500, - .limit = 250 - }, - .mr_cache[6] = { - .size = 500, - .limit = 250 - }, - .mr_cache[7] = { - .size = 500, - .limit = 250 - }, - .mr_cache[8] = { - .size = 500, - .limit = 250 - }, - .mr_cache[9] = { - .size = 500, - .limit = 250 - }, - .mr_cache[10] = { - .size = 500, - .limit = 250 - }, - .mr_cache[11] = { - .size = 500, - .limit = 250 - }, - .mr_cache[12] = { - .size = 64, - .limit = 32 - }, - .mr_cache[13] = { - .size = 32, - .limit = 16 - }, - .mr_cache[14] = { - .size = 16, - .limit = 8 - }, - .mr_cache[15] = { - .size = 8, - .limit = 4 - }, }, [3] = { .mask = MLX5_PROF_MASK_QP_SIZE, @@ -2231,6 +2166,7 @@ static void shutdown(struct pci_dev *pdev) mlx5_core_info(dev, "Shutdown was called\n"); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + mlx5_drain_fw_reset(dev); mlx5_drain_health_wq(dev); err = mlx5_try_fast_unload(dev); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 5afe6b155ef0d..81935f87bfcd7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -440,7 +440,9 @@ int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table, rhashtable_remove_fast(&mr_table->route_ht, &mr_orig_route->ht_node, mlxsw_sp_mr_route_ht_params); + mutex_lock(&mr_table->route_list_lock); list_del(&mr_orig_route->node); + mutex_unlock(&mr_table->route_list_lock); mlxsw_sp_mr_route_destroy(mr_table, mr_orig_route); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a2033837182e8..2d0e89bd2fb9c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2265,6 +2265,7 @@ mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, if (!neigh_entry) return NULL; + neigh_hold(n); neigh_entry->key.n = n; neigh_entry->rif = rif; INIT_LIST_HEAD(&neigh_entry->nexthop_list); @@ -2274,6 +2275,7 @@ mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry) { + neigh_release(neigh_entry->key.n); kfree(neigh_entry); } @@ -2858,6 +2860,11 @@ static int mlxsw_sp_router_schedule_work(struct net *net, if (!net_work) return NOTIFY_BAD; + /* Take a reference to ensure the neighbour won't be destructed until + * we drop the reference in the work item. + */ + neigh_clone(n); + INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; net_work->n = n; @@ -2881,11 +2888,6 @@ static int mlxsw_sp_router_schedule_neigh_work(struct mlxsw_sp_router *router, struct net *net; net = neigh_parms_net(n->parms); - - /* Take a reference to ensure the neighbour won't be destructed until we - * drop the reference in delayed work. - */ - neigh_clone(n); return mlxsw_sp_router_schedule_work(net, router, n, mlxsw_sp_router_neigh_event_work); } @@ -4320,6 +4322,8 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, if (err) goto err_neigh_entry_insert; + neigh_release(old_n); + read_lock_bh(&n->lock); nud_state = n->nud_state; dead = n->dead; @@ -4328,14 +4332,10 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { - neigh_release(old_n); - neigh_clone(n); __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } - neigh_release(n); - return 0; err_neigh_entry_insert: @@ -4428,6 +4428,11 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, } } + /* Release the reference taken by neigh_lookup() / neigh_create() since + * neigh_entry already holds one. + */ + neigh_release(n); + /* If that is the first nexthop connected to that neigh, add to * nexthop_neighs_list */ @@ -4454,11 +4459,9 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; - struct neighbour *n; if (!neigh_entry) return; - n = neigh_entry->key.n; __mlxsw_sp_nexthop_neigh_update(nh, true); list_del(&nh->neigh_list_node); @@ -4472,8 +4475,6 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); - - neigh_release(n); } static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 9240673c7533d..4a44d5261f009 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -218,6 +218,9 @@ static void fbnic_service_task(struct work_struct *work) fbnic_fw_check_heartbeat(fbd); + if (!netif_carrier_ok(netdev)) + netif_carrier_on(fbd->netdev); + fbnic_health_check(fbd); fbnic_bmc_rpc_check(fbd); diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index efb4e412ec7e4..0055c231acf6d 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -481,7 +481,7 @@ static void mana_serv_reset(struct pci_dev *pdev) /* Perform PCI rescan on device if we failed on HWC */ dev_err(&pdev->dev, "MANA service: resume failed, rescanning\n"); mana_serv_rescan(pdev); - goto out; + return; } if (ret) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 08bee56aea35f..c345d9b17c892 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2307,14 +2307,16 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < ocelot->num_phys_ports; lag++) { - struct net_device *bond = ocelot->ports[lag]->bond; + struct ocelot_port *ocelot_port = ocelot->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!ocelot_port || !ocelot_port->bond || (visited & BIT(lag))) continue; + bond = ocelot_port->bond; bond_mask = ocelot_get_bond_mask(ocelot, bond); for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) { diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 405e91eb3141f..755083852eef2 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2655,9 +2655,6 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) static void rtl_prepare_power_down(struct rtl8169_private *tp) { - if (tp->dash_enabled) - return; - if (tp->mac_version == RTL_GIGA_MAC_VER_32 || tp->mac_version == RTL_GIGA_MAC_VER_33) rtl_ephy_write(tp, 0x19, 0xff64); @@ -4812,7 +4809,7 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); - if (tp->dash_type != RTL_DASH_NONE) + if (tp->dash_type != RTL_DASH_NONE && !tp->saved_wolopts) rtl8168_driver_stop(tp); } diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 9d1a83a5fa7e5..d16c178d10344 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -516,15 +516,7 @@ static inline void smc_rcv(struct net_device *dev) * any other concurrent access and C would always interrupt B. But life * isn't that easy in a SMP world... */ -#define smc_special_trylock(lock, flags) \ -({ \ - int __ret; \ - local_irq_save(flags); \ - __ret = spin_trylock(lock); \ - if (!__ret) \ - local_irq_restore(flags); \ - __ret; \ -}) +#define smc_special_trylock(lock, flags) spin_trylock_irqsave(lock, flags) #define smc_special_lock(lock, flags) spin_lock_irqsave(lock, flags) #define smc_special_unlock(lock, flags) spin_unlock_irqrestore(lock, flags) #else diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 907fe2e927f01..1310312e3e099 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -135,7 +135,7 @@ config DWMAC_MESON config DWMAC_QCOM_ETHQOS tristate "Qualcomm ETHQOS support" default ARCH_QCOM - depends on OF && (ARCH_QCOM || COMPILE_TEST) + depends on ARCH_QCOM || COMPILE_TEST help Support for the Qualcomm ETHQOS core. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 0826a7bd32ff5..54f8ef3cfd7d5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -1,12 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018-19, Linaro Limited +#include #include -#include #include #include #include #include +#include +#include #include "stmmac.h" #include "stmmac_platform.h" @@ -81,6 +83,13 @@ #define SGMII_10M_RX_CLK_DVDR 0x31 +enum ethqos_pd_selector { + ETHQOS_PD_CORE = 0, + ETHQOS_PD_MDIO, + ETHQOS_PD_SERDES, + ETHQOS_NUM_PDS, +}; + struct ethqos_emac_por { unsigned int offset; unsigned int value; @@ -91,23 +100,46 @@ struct ethqos_emac_driver_data { unsigned int num_por; bool rgmii_config_loopback_en; bool has_emac_ge_3; - const char *link_clk_name; u32 dma_addr_width; struct dwmac4_addrs dwmac4_addrs; bool needs_sgmii_loopback; }; +struct ethqos_emac_pm_data { + const char *link_clk_name; + bool use_domains; + struct dev_pm_domain_attach_data pd; + unsigned int clk_ptp_rate; +}; + +struct ethqos_emac_match_data { + const struct ethqos_emac_driver_data *drv_data; + const struct ethqos_emac_pm_data *pm_data; +}; + +struct ethqos_emac_pm_ctx { + struct clk *link_clk; + unsigned int link_clk_rate; + struct phy *serdes_phy; +}; + +struct ethqos_emac_pd_ctx { + struct dev_pm_domain_list *pd_list; +}; + struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; void __iomem *mac_base; int (*configure_func)(struct qcom_ethqos *ethqos, int speed); - unsigned int link_clk_rate; - struct clk *link_clk; - struct phy *serdes_phy; - int serdes_speed; + union { + struct ethqos_emac_pm_ctx pm; + struct ethqos_emac_pd_ctx pd; + }; phy_interface_t phy_mode; + int serdes_speed; + int (*set_serdes_speed)(struct qcom_ethqos *ethqos); const struct ethqos_emac_por *por; unsigned int num_por; @@ -185,9 +217,9 @@ ethqos_update_link_clk(struct qcom_ethqos *ethqos, int speed) rate = rgmii_clock(speed); if (rate > 0) - ethqos->link_clk_rate = rate * 2; + ethqos->pm.link_clk_rate = rate * 2; - clk_set_rate(ethqos->link_clk, ethqos->link_clk_rate); + clk_set_rate(ethqos->pm.link_clk, ethqos->pm.link_clk_rate); } static void @@ -225,6 +257,10 @@ static const struct ethqos_emac_driver_data emac_v2_3_0_data = { .has_emac_ge_3 = false, }; +static const struct ethqos_emac_match_data emac_qcs404_data = { + .drv_data = &emac_v2_3_0_data, +}; + static const struct ethqos_emac_por emac_v2_1_0_por[] = { { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40C01343 }, { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642C }, @@ -241,6 +277,10 @@ static const struct ethqos_emac_driver_data emac_v2_1_0_data = { .has_emac_ge_3 = false, }; +static const struct ethqos_emac_match_data emac_sm8150_data = { + .drv_data = &emac_v2_1_0_data, +}; + static const struct ethqos_emac_por emac_v3_0_0_por[] = { { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40c01343 }, { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642c }, @@ -273,6 +313,10 @@ static const struct ethqos_emac_driver_data emac_v3_0_0_data = { }, }; +static const struct ethqos_emac_match_data emac_sc8280xp_data = { + .drv_data = &emac_v3_0_0_data, +}; + static const struct ethqos_emac_por emac_v4_0_0_por[] = { { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40c01343 }, { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642c }, @@ -287,7 +331,6 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { .num_por = ARRAY_SIZE(emac_v4_0_0_por), .rgmii_config_loopback_en = false, .has_emac_ge_3 = true, - .link_clk_name = "phyaux", .needs_sgmii_loopback = true, .dma_addr_width = 36, .dwmac4_addrs = { @@ -308,6 +351,34 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { }, }; +static const struct ethqos_emac_pm_data emac_sa8775p_pm_data = { + .link_clk_name = "phyaux", +}; + +static const struct ethqos_emac_match_data emac_sa8775p_data = { + .drv_data = &emac_v4_0_0_data, + .pm_data = &emac_sa8775p_pm_data, +}; + +static const char * const emac_sa8255p_pd_names[] = { + "core", "mdio", "serdes" +}; + +static const struct ethqos_emac_pm_data emac_sa8255p_pm_data = { + .pd = { + .pd_flags = PD_FLAG_NO_DEV_LINK, + .pd_names = emac_sa8255p_pd_names, + .num_pd_names = ETHQOS_NUM_PDS, + }, + .use_domains = true, + .clk_ptp_rate = 230400000, +}; + +static const struct ethqos_emac_match_data emac_sa8255p_data = { + .drv_data = &emac_v4_0_0_data, + .pm_data = &emac_sa8255p_pm_data, +}; + static int ethqos_dll_configure(struct qcom_ethqos *ethqos) { struct device *dev = ðqos->pdev->dev; @@ -374,6 +445,28 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) return 0; } +static int qcom_ethqos_domain_on(struct qcom_ethqos *ethqos, + enum ethqos_pd_selector sel) +{ + struct device *dev = ethqos->pd.pd_list->pd_devs[sel]; + int ret; + + ret = pm_runtime_resume_and_get(dev); + if (ret < 0) + dev_err(ðqos->pdev->dev, + "Failed to enable the power domain for %s\n", + dev_name(dev)); + return ret; +} + +static void qcom_ethqos_domain_off(struct qcom_ethqos *ethqos, + enum ethqos_pd_selector sel) +{ + struct device *dev = ethqos->pd.pd_list->pd_devs[sel]; + + pm_runtime_put_sync(dev); +} + static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed) { struct device *dev = ðqos->pdev->dev; @@ -585,11 +678,23 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed) return 0; } +static int ethqos_set_serdes_speed_phy(struct qcom_ethqos *ethqos) +{ + return phy_set_speed(ethqos->pm.serdes_phy, ethqos->serdes_speed); +} + +static int ethqos_set_serdes_speed_pd(struct qcom_ethqos *ethqos) +{ + struct device *dev = ethqos->pd.pd_list->pd_devs[ETHQOS_PD_SERDES]; + + return dev_pm_opp_set_level(dev, ethqos->serdes_speed); +} + static void ethqos_set_serdes_speed(struct qcom_ethqos *ethqos, int speed) { if (ethqos->serdes_speed != speed) { - phy_set_speed(ethqos->serdes_phy, speed); ethqos->serdes_speed = speed; + ethqos->set_serdes_speed(ethqos); } } @@ -655,23 +760,45 @@ static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) struct qcom_ethqos *ethqos = priv; int ret; - ret = phy_init(ethqos->serdes_phy); + ret = phy_init(ethqos->pm.serdes_phy); if (ret) return ret; - ret = phy_power_on(ethqos->serdes_phy); + ret = phy_power_on(ethqos->pm.serdes_phy); if (ret) return ret; - return phy_set_speed(ethqos->serdes_phy, ethqos->serdes_speed); + return phy_set_speed(ethqos->pm.serdes_phy, ethqos->serdes_speed); } static void qcom_ethqos_serdes_powerdown(struct net_device *ndev, void *priv) { struct qcom_ethqos *ethqos = priv; - phy_power_off(ethqos->serdes_phy); - phy_exit(ethqos->serdes_phy); + phy_power_off(ethqos->pm.serdes_phy); + phy_exit(ethqos->pm.serdes_phy); +} + +static int qcom_ethqos_pd_serdes_powerup(struct net_device *ndev, void *priv) +{ + struct qcom_ethqos *ethqos = priv; + struct device *dev = ethqos->pd.pd_list->pd_devs[ETHQOS_PD_SERDES]; + int ret; + + ret = qcom_ethqos_domain_on(ethqos, ETHQOS_PD_SERDES); + if (ret < 0) + return ret; + + return dev_pm_opp_set_level(dev, ethqos->serdes_speed); +} + +static void qcom_ethqos_pd_serdes_powerdown(struct net_device *ndev, void *priv) +{ + struct qcom_ethqos *ethqos = priv; + struct device *dev = ethqos->pd.pd_list->pd_devs[ETHQOS_PD_SERDES]; + + dev_pm_opp_set_level(dev, 0); + qcom_ethqos_domain_off(ethqos, ETHQOS_PD_SERDES); } static int ethqos_clks_config(void *priv, bool enabled) @@ -680,7 +807,7 @@ static int ethqos_clks_config(void *priv, bool enabled) int ret = 0; if (enabled) { - ret = clk_prepare_enable(ethqos->link_clk); + ret = clk_prepare_enable(ethqos->pm.link_clk); if (ret) { dev_err(ðqos->pdev->dev, "link_clk enable failed\n"); return ret; @@ -693,7 +820,7 @@ static int ethqos_clks_config(void *priv, bool enabled) */ ethqos_set_func_clk_en(ethqos); } else { - clk_disable_unprepare(ethqos->link_clk); + clk_disable_unprepare(ethqos->pm.link_clk); } return ret; @@ -704,6 +831,68 @@ static void ethqos_clks_disable(void *data) ethqos_clks_config(data, false); } +static void ethqos_disable_serdes(void *data) +{ + struct qcom_ethqos *ethqos = data; + + qcom_ethqos_domain_on(ethqos, ETHQOS_PD_SERDES); +} + +static int ethqos_pd_clks_config(void *priv, bool enabled) +{ + struct qcom_ethqos *ethqos = priv; + int ret = 0; + + if (enabled) { + ret = qcom_ethqos_domain_on(ethqos, ETHQOS_PD_MDIO); + if (ret < 0) { + dev_err(ðqos->pdev->dev, + "Failed to enable the MDIO power domain\n"); + return ret; + } + + ethqos_set_func_clk_en(ethqos); + } else { + qcom_ethqos_domain_off(ethqos, ETHQOS_PD_MDIO); + } + + return ret; +} + +static int qcom_ethqos_pd_init(struct device *dev, void *priv) +{ + struct qcom_ethqos *ethqos = priv; + int ret; + + /* + * Enable functional clock to prevent DMA reset after timeout due + * to no PHY clock being enabled after the hardware block has been + * power cycled. The actual configuration will be adjusted once + * ethqos_fix_mac_speed() is called. + */ + ethqos_set_func_clk_en(ethqos); + + ret = qcom_ethqos_domain_on(ethqos, ETHQOS_PD_CORE); + if (ret) + return ret; + + ret = qcom_ethqos_domain_on(ethqos, ETHQOS_PD_MDIO); + if (ret) { + qcom_ethqos_domain_off(ethqos, ETHQOS_PD_CORE); + return ret; + } + + return 0; +} + +static void qcom_ethqos_pd_exit(struct device *dev, void *data) +{ + struct qcom_ethqos *ethqos = data; + + qcom_ethqos_domain_off(ethqos, ETHQOS_PD_MDIO); + qcom_ethqos_domain_off(ethqos, ETHQOS_PD_CORE); +} + static void ethqos_ptp_clk_freq_config(struct stmmac_priv *priv) { struct plat_stmmacenet_data *plat_dat = priv->plat; @@ -723,12 +912,14 @@ static void ethqos_ptp_clk_freq_config(struct stmmac_priv *priv) static int qcom_ethqos_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; - const struct ethqos_emac_driver_data *data; + const struct ethqos_emac_driver_data *drv_data; + const struct ethqos_emac_pm_data *pm_data; + const struct ethqos_emac_match_data *data; struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; struct device *dev = &pdev->dev; struct qcom_ethqos *ethqos; + const char *clk_name; int ret, i; ret = stmmac_get_platform_resources(pdev, &stmmac_res); @@ -742,8 +933,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) "dt configuration failed\n"); } - plat_dat->clks_config = ethqos_clks_config; - ethqos = devm_kzalloc(dev, sizeof(*ethqos), GFP_KERNEL); if (!ethqos) return -ENOMEM; @@ -774,33 +963,78 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->mac_base = stmmac_res.addr; - data = of_device_get_match_data(dev); - ethqos->por = data->por; - ethqos->num_por = data->num_por; - ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en; - ethqos->has_emac_ge_3 = data->has_emac_ge_3; - ethqos->needs_sgmii_loopback = data->needs_sgmii_loopback; + data = device_get_match_data(dev); + drv_data = data->drv_data; + pm_data = data->pm_data; + clk_name = pm_data && pm_data->link_clk_name ? + pm_data->link_clk_name : "rgmii"; + + ethqos->por = drv_data->por; + ethqos->num_por = drv_data->num_por; + ethqos->rgmii_config_loopback_en = drv_data->rgmii_config_loopback_en; + ethqos->has_emac_ge_3 = drv_data->has_emac_ge_3; + ethqos->needs_sgmii_loopback = drv_data->needs_sgmii_loopback; + ethqos->serdes_speed = SPEED_1000; - ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii"); - if (IS_ERR(ethqos->link_clk)) - return dev_err_probe(dev, PTR_ERR(ethqos->link_clk), - "Failed to get link_clk\n"); + if (pm_data && pm_data->use_domains) { + ethqos->set_serdes_speed = ethqos_set_serdes_speed_pd; - ret = ethqos_clks_config(ethqos, true); - if (ret) - return ret; + ret = devm_pm_domain_attach_list(dev, &pm_data->pd, + ðqos->pd.pd_list); + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to attach power domains\n"); - ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos); - if (ret) - return ret; + plat_dat->clks_config = ethqos_pd_clks_config; + plat_dat->serdes_powerup = qcom_ethqos_pd_serdes_powerup; + plat_dat->serdes_powerdown = qcom_ethqos_pd_serdes_powerdown; + plat_dat->exit = qcom_ethqos_pd_exit; + plat_dat->init = qcom_ethqos_pd_init; + plat_dat->clk_ptp_rate = pm_data->clk_ptp_rate; + + ret = qcom_ethqos_pd_init(dev, ethqos); + if (ret) + return ret; - ethqos->serdes_phy = devm_phy_optional_get(dev, "serdes"); - if (IS_ERR(ethqos->serdes_phy)) - return dev_err_probe(dev, PTR_ERR(ethqos->serdes_phy), - "Failed to get serdes phy\n"); + ret = qcom_ethqos_domain_on(ethqos, ETHQOS_PD_SERDES); + if (ret) + return dev_err_probe(dev, ret, + "Failed to enable the serdes power domain\n"); + + ret = devm_add_action_or_reset(dev, ethqos_disable_serdes, ethqos); + if (ret) + return ret; + } else { + ethqos->set_serdes_speed = ethqos_set_serdes_speed_phy; + + ethqos->pm.link_clk = devm_clk_get(dev, clk_name); + if (IS_ERR(ethqos->pm.link_clk)) + return dev_err_probe(dev, PTR_ERR(ethqos->pm.link_clk), + "Failed to get link_clk\n"); + + ret = ethqos_clks_config(ethqos, true); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos); + if (ret) + return ret; + + ethqos->pm.serdes_phy = devm_phy_optional_get(dev, "serdes"); + if (IS_ERR(ethqos->pm.serdes_phy)) + return dev_err_probe(dev, PTR_ERR(ethqos->pm.serdes_phy), + "Failed to get serdes phy\n"); + + ethqos_update_link_clk(ethqos, SPEED_1000); + + plat_dat->clks_config = ethqos_clks_config; + plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config; + + if (ethqos->pm.serdes_phy) { + plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; + plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; + } + } - ethqos->serdes_speed = SPEED_1000; - ethqos_update_link_clk(ethqos, SPEED_1000); ethqos_set_func_clk_en(ethqos); plat_dat->bsp_priv = ethqos; @@ -809,19 +1043,14 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->ptp_clk_freq_config = ethqos_ptp_clk_freq_config; plat_dat->core_type = DWMAC_CORE_GMAC4; if (ethqos->has_emac_ge_3) - plat_dat->dwmac4_addrs = &data->dwmac4_addrs; + plat_dat->dwmac4_addrs = &drv_data->dwmac4_addrs; plat_dat->pmt = 1; - if (of_property_read_bool(np, "snps,tso")) + if (device_property_present(dev, "snps,tso")) plat_dat->flags |= STMMAC_FLAG_TSO_EN; - if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) + if (device_is_compatible(dev, "qcom,qcs404-ethqos")) plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI; - if (data->dma_addr_width) - plat_dat->host_dma_width = data->dma_addr_width; - - if (ethqos->serdes_phy) { - plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; - plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; - } + if (drv_data->dma_addr_width) + plat_dat->host_dma_width = drv_data->dma_addr_width; /* Enable TSO on queue0 and enable TBS on rest of the queues */ for (i = 1; i < plat_dat->tx_queues_to_use; i++) @@ -831,10 +1060,11 @@ static int qcom_ethqos_probe(struct platform_device *pdev) } static const struct of_device_id qcom_ethqos_match[] = { - { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data}, - { .compatible = "qcom,sa8775p-ethqos", .data = &emac_v4_0_0_data}, - { .compatible = "qcom,sc8280xp-ethqos", .data = &emac_v3_0_0_data}, - { .compatible = "qcom,sm8150-ethqos", .data = &emac_v2_1_0_data}, + { .compatible = "qcom,qcs404-ethqos", .data = &emac_qcs404_data}, + { .compatible = "qcom,sa8255p-ethqos", .data = &emac_sa8255p_data}, + { .compatible = "qcom,sa8775p-ethqos", .data = &emac_sa8775p_data}, + { .compatible = "qcom,sc8280xp-ethqos", .data = &emac_sc8280xp_data}, + { .compatible = "qcom,sm8150-ethqos", .data = &emac_sm8150_data}, { } }; MODULE_DEVICE_TABLE(of, qcom_ethqos_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index b155e71aac51f..c1e26965d9b5d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -914,20 +914,11 @@ static int stmmac_set_per_queue_coalesce(struct net_device *dev, u32 queue, return __stmmac_set_coalesce(dev, ec, queue); } -static int stmmac_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *rxnfc, u32 *rule_locs) +static u32 stmmac_get_rx_ring_count(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - switch (rxnfc->cmd) { - case ETHTOOL_GRXRINGS: - rxnfc->data = priv->plat->rx_queues_to_use; - break; - default: - return -EOPNOTSUPP; - } - - return 0; + return priv->plat->rx_queues_to_use; } static u32 stmmac_get_rxfh_key_size(struct net_device *dev) @@ -1121,7 +1112,7 @@ static const struct ethtool_ops stmmac_ethtool_ops = { .get_eee = stmmac_ethtool_op_get_eee, .set_eee = stmmac_ethtool_op_set_eee, .get_sset_count = stmmac_get_sset_count, - .get_rxnfc = stmmac_get_rxnfc, + .get_rx_ring_count = stmmac_get_rx_ring_count, .get_rxfh_key_size = stmmac_get_rxfh_key_size, .get_rxfh_indir_size = stmmac_get_rxfh_indir_size, .get_rxfh = stmmac_get_rxfh, diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index a54d71155263c..fe5b2926d8ab0 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -209,6 +209,7 @@ config TI_ICSSG_PRUETH_SR1 depends on PRU_REMOTEPROC depends on NET_SWITCHDEV depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER + depends on PTP_1588_CLOCK_OPTIONAL help Support dual Gigabit Ethernet ports over the ICSSG PRU Subsystem. This subsystem is available on the AM65 SR1.0 platform. @@ -234,7 +235,7 @@ config TI_PRUETH depends on PRU_REMOTEPROC depends on NET_SWITCHDEV select TI_ICSS_IEP - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK_OPTIONAL help Some TI SoCs has Programmable Realtime Unit (PRU) cores which can support Single or Dual Ethernet ports with the help of firmware code diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index d138dea7d208d..ec278f99d2955 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -21,6 +21,7 @@ config LIBWX depends on PTP_1588_CLOCK_OPTIONAL select PAGE_POOL select DIMLIB + select PHYLINK help Common library for Wangxun(R) Ethernet drivers. @@ -29,7 +30,6 @@ config NGBE depends on PCI depends on PTP_1588_CLOCK_OPTIONAL select LIBWX - select PHYLINK help This driver supports Wangxun(R) GbE PCI Express family of adapters. @@ -48,7 +48,6 @@ config TXGBE depends on PTP_1588_CLOCK_OPTIONAL select MARVELL_10G_PHY select REGMAP - select PHYLINK select HWMON if TXGBE=y select SFP select GPIOLIB @@ -71,7 +70,6 @@ config TXGBEVF depends on PCI_MSI depends on PTP_1588_CLOCK_OPTIONAL select LIBWX - select PHYLINK help This driver supports virtual functions for SP1000A, WX1820AL, WX5XXX, WX5XXXAL. diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c index b9b5554ea8620..5ad2673f213d6 100644 --- a/drivers/net/fjes/fjes_hw.c +++ b/drivers/net/fjes/fjes_hw.c @@ -334,7 +334,7 @@ int fjes_hw_init(struct fjes_hw *hw) ret = fjes_hw_reset(hw); if (ret) - return ret; + goto err_iounmap; fjes_hw_set_irqmask(hw, REG_ICTL_MASK_ALL, true); @@ -347,8 +347,10 @@ int fjes_hw_init(struct fjes_hw *hw) hw->max_epid = fjes_hw_get_max_epid(hw); hw->my_epid = fjes_hw_get_my_epid(hw); - if ((hw->max_epid == 0) || (hw->my_epid >= hw->max_epid)) - return -ENXIO; + if ((hw->max_epid == 0) || (hw->my_epid >= hw->max_epid)) { + ret = -ENXIO; + goto err_iounmap; + } ret = fjes_hw_setup(hw); @@ -356,6 +358,10 @@ int fjes_hw_init(struct fjes_hw *hw) hw->hw_info.trace_size = FJES_DEBUG_BUFFER_SIZE; return ret; + +err_iounmap: + fjes_hw_iounmap(hw); + return ret; } void fjes_hw_exit(struct fjes_hw *hw) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 50de3ee204dbc..80f84fc87008b 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -69,7 +69,6 @@ struct ipvl_dev { DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; u32 msg_enable; - spinlock_t addrs_lock; }; struct ipvl_addr { @@ -90,6 +89,7 @@ struct ipvl_port { struct net_device *dev; possible_net_t pnet; struct hlist_head hlhead[IPVLAN_HASH_SIZE]; + spinlock_t addrs_lock; /* guards hash-table and addrs */ struct list_head ipvlans; u16 mode; u16 flags; diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index dea411e132dba..22cb5ee7a2310 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -109,14 +109,14 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, { struct ipvl_addr *addr, *ret = NULL; - rcu_read_lock(); - list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { + assert_spin_locked(&ipvlan->port->addrs_lock); + + list_for_each_entry(addr, &ipvlan->addrs, anode) { if (addr_equal(is_v6, addr, iaddr)) { ret = addr; break; } } - rcu_read_unlock(); return ret; } @@ -125,14 +125,14 @@ bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) struct ipvl_dev *ipvlan; bool ret = false; - rcu_read_lock(); - list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { + assert_spin_locked(&port->addrs_lock); + + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) { ret = true; break; } } - rcu_read_unlock(); return ret; } @@ -737,6 +737,9 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, struct ethhdr *eth = eth_hdr(skb); rx_handler_result_t ret = RX_HANDLER_PASS; + if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) + return RX_HANDLER_PASS; + if (is_multicast_ether_addr(eth->h_dest)) { if (ipvlan_external_frame(skb, port)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 660f3db117664..b0b4f747f162e 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -75,6 +75,7 @@ static int ipvlan_port_create(struct net_device *dev) for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) INIT_HLIST_HEAD(&port->hlhead[idx]); + spin_lock_init(&port->addrs_lock); skb_queue_head_init(&port->backlog); INIT_WORK(&port->wq, ipvlan_process_multicast); ida_init(&port->ida); @@ -181,18 +182,18 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); + struct ipvl_port *port = ipvlan->port; struct ipvl_addr *addr; - if (ipvlan->port->mode == IPVLAN_MODE_L3 || - ipvlan->port->mode == IPVLAN_MODE_L3S) + if (port->mode == IPVLAN_MODE_L3 || port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; - rcu_read_lock(); - list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) + spin_lock_bh(&port->addrs_lock); + list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); - rcu_read_unlock(); + spin_unlock_bh(&port->addrs_lock); return 0; } @@ -206,10 +207,10 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - rcu_read_lock(); - list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) + spin_lock_bh(&ipvlan->port->addrs_lock); + list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); - rcu_read_unlock(); + spin_unlock_bh(&ipvlan->port->addrs_lock); return 0; } @@ -579,7 +580,6 @@ int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params, if (!tb[IFLA_MTU]) ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); - spin_lock_init(&ipvlan->addrs_lock); /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing @@ -657,13 +657,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr); list_del_rcu(&addr->anode); kfree_rcu(addr, rcu); } - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); ida_free(&ipvlan->port->ida, dev->dev_id); list_del_rcu(&ipvlan->pnode); @@ -817,6 +817,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; + assert_spin_locked(&ipvlan->port->addrs_lock); + addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); if (!addr) return -ENOMEM; @@ -847,16 +849,16 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); if (!addr) { - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); return; } ipvlan_ht_addr_del(addr); list_del_rcu(&addr->anode); - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); kfree_rcu(addr, rcu); } @@ -878,14 +880,14 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { int ret = -EINVAL; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv6=%pI6c addr for %s intf\n", ip6_addr, ipvlan->dev->name); else ret = ipvlan_add_addr(ipvlan, ip6_addr, true); - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); return ret; } @@ -924,21 +926,24 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr; struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); + int ret = NOTIFY_OK; if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) { NL_SET_ERR_MSG(i6vi->extack, "Address already assigned to an ipvlan device"); - return notifier_from_errno(-EADDRINUSE); + ret = notifier_from_errno(-EADDRINUSE); } + spin_unlock_bh(&ipvlan->port->addrs_lock); break; } - return NOTIFY_OK; + return ret; } #endif @@ -946,14 +951,14 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { int ret = -EINVAL; - spin_lock_bh(&ipvlan->addrs_lock); + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv4=%pI4 on %s intf.\n", ip4_addr, ipvlan->dev->name); else ret = ipvlan_add_addr(ipvlan, ip4_addr, false); - spin_unlock_bh(&ipvlan->addrs_lock); + spin_unlock_bh(&ipvlan->port->addrs_lock); return ret; } @@ -995,21 +1000,24 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused, struct in_validator_info *ivi = (struct in_validator_info *)ptr; struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); + int ret = NOTIFY_OK; if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: + spin_lock_bh(&ipvlan->port->addrs_lock); if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) { NL_SET_ERR_MSG(ivi->extack, "Address already assigned to an ipvlan device"); - return notifier_from_errno(-EADDRINUSE); + ret = notifier_from_errno(-EADDRINUSE); } + spin_unlock_bh(&ipvlan->port->addrs_lock); break; } - return NOTIFY_OK; + return ret; } static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index e55be6dc9ae70..d6b9004c61dc1 100644 --- a/drivers/net/mdio/mdio-aspeed.c +++ b/drivers/net/mdio/mdio-aspeed.c @@ -63,6 +63,13 @@ static int aspeed_mdio_op(struct mii_bus *bus, u8 st, u8 op, u8 phyad, u8 regad, iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL); + /* Workaround for read-after-write issue. + * The controller may return stale data if a read follows immediately + * after a write. A dummy read forces the hardware to update its + * internal state, ensuring that the next real read returns correct data. + */ + ioread32(ctx->base + ASPEED_MDIO_CTRL); + return readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl, !(ctrl & ASPEED_MDIO_CTRL_FIRE), ASPEED_MDIO_INTERVAL_US, diff --git a/drivers/net/mdio/mdio-realtek-rtl9300.c b/drivers/net/mdio/mdio-realtek-rtl9300.c index 33694c3ff9a71..405a07075dd11 100644 --- a/drivers/net/mdio/mdio-realtek-rtl9300.c +++ b/drivers/net/mdio/mdio-realtek-rtl9300.c @@ -354,7 +354,6 @@ static int rtl9300_mdiobus_probe_one(struct device *dev, struct rtl9300_mdio_pri struct fwnode_handle *node) { struct rtl9300_mdio_chan *chan; - struct fwnode_handle *child; struct mii_bus *bus; u32 mdio_bus; int err; @@ -371,7 +370,7 @@ static int rtl9300_mdiobus_probe_one(struct device *dev, struct rtl9300_mdio_pri * compatible = "ethernet-phy-ieee802.3-c45". This does mean we can't * support both c45 and c22 on the same MDIO bus. */ - fwnode_for_each_child_node(node, child) + fwnode_for_each_child_node_scoped(node, child) if (fwnode_device_is_compatible(child, "ethernet-phy-ieee802.3-c45")) priv->smi_bus_is_c45[mdio_bus] = true; @@ -409,7 +408,6 @@ static int rtl9300_mdiobus_map_ports(struct device *dev) { struct rtl9300_mdio_priv *priv = dev_get_drvdata(dev); struct device *parent = dev->parent; - struct fwnode_handle *port; int err; struct fwnode_handle *ports __free(fwnode_handle) = @@ -418,7 +416,7 @@ static int rtl9300_mdiobus_map_ports(struct device *dev) return dev_err_probe(dev, -EINVAL, "%pfwP missing ethernet-ports\n", dev_fwnode(parent)); - fwnode_for_each_child_node(ports, port) { + fwnode_for_each_child_node_scoped(ports, port) { struct device_node *mdio_dn; u32 addr; u32 bus; diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 9cb4dfc242f5f..248b401bcaa42 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1709,53 +1709,74 @@ static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg, sysdata_len); } -static void write_ext_msg(struct console *con, const char *msg, - unsigned int len) +static void send_msg_udp(struct netconsole_target *nt, const char *msg, + unsigned int len) { - struct netconsole_target *nt; - unsigned long flags; - - if ((oops_only && !oops_in_progress) || list_empty(&target_list)) - return; + const char *tmp = msg; + int frag, left = len; - spin_lock_irqsave(&target_list_lock, flags); - list_for_each_entry(nt, &target_list, list) - if (nt->extended && nt->enabled && netif_running(nt->np.dev)) - send_ext_msg_udp(nt, msg, len); - spin_unlock_irqrestore(&target_list_lock, flags); + while (left > 0) { + frag = min(left, MAX_PRINT_CHUNK); + send_udp(nt, tmp, frag); + tmp += frag; + left -= frag; + } } -static void write_msg(struct console *con, const char *msg, unsigned int len) +/** + * netconsole_write - Generic function to send a msg to all targets + * @wctxt: nbcon write context + * @extended: "true" for extended console mode + * + * Given a nbcon write context, send the message to the netconsole + * targets + */ +static void netconsole_write(struct nbcon_write_context *wctxt, + bool extended) { - int frag, left; - unsigned long flags; struct netconsole_target *nt; - const char *tmp; if (oops_only && !oops_in_progress) return; - /* Avoid taking lock and disabling interrupts unnecessarily */ - if (list_empty(&target_list)) - return; - spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { - if (!nt->extended && nt->enabled && netif_running(nt->np.dev)) { - /* - * We nest this inside the for-each-target loop above - * so that we're able to get as much logging out to - * at least one target if we die inside here, instead - * of unnecessarily keeping all targets in lock-step. - */ - tmp = msg; - for (left = len; left;) { - frag = min(left, MAX_PRINT_CHUNK); - send_udp(nt, tmp, frag); - tmp += frag; - left -= frag; - } - } + if (nt->extended != extended || !nt->enabled || + !netif_running(nt->np.dev)) + continue; + + if (!nbcon_enter_unsafe(wctxt)) + continue; + + if (extended) + send_ext_msg_udp(nt, wctxt->outbuf, wctxt->len); + else + send_msg_udp(nt, wctxt->outbuf, wctxt->len); + + nbcon_exit_unsafe(wctxt); } +} + +static void netconsole_write_ext(struct console *con __always_unused, + struct nbcon_write_context *wctxt) +{ + netconsole_write(wctxt, true); +} + +static void netconsole_write_basic(struct console *con __always_unused, + struct nbcon_write_context *wctxt) +{ + netconsole_write(wctxt, false); +} + +static void netconsole_device_lock(struct console *con __always_unused, + unsigned long *flags) +{ + spin_lock_irqsave(&target_list_lock, *flags); +} + +static void netconsole_device_unlock(struct console *con __always_unused, + unsigned long flags) +{ spin_unlock_irqrestore(&target_list_lock, flags); } @@ -1918,15 +1939,21 @@ static void free_param_target(struct netconsole_target *nt) } static struct console netconsole_ext = { - .name = "netcon_ext", - .flags = CON_ENABLED | CON_EXTENDED, - .write = write_ext_msg, + .name = "netcon_ext", + .flags = CON_ENABLED | CON_EXTENDED | CON_NBCON | CON_NBCON_ATOMIC_UNSAFE, + .write_thread = netconsole_write_ext, + .write_atomic = netconsole_write_ext, + .device_lock = netconsole_device_lock, + .device_unlock = netconsole_device_unlock, }; static struct console netconsole = { - .name = "netcon", - .flags = CON_ENABLED, - .write = write_msg, + .name = "netcon", + .flags = CON_ENABLED | CON_NBCON | CON_NBCON_ATOMIC_UNSAFE, + .write_thread = netconsole_write_basic, + .write_atomic = netconsole_write_basic, + .device_lock = netconsole_device_lock, + .device_unlock = netconsole_device_unlock, }; static int __init init_netconsole(void) diff --git a/drivers/net/phy/ax88796b_rust.rs b/drivers/net/phy/ax88796b_rust.rs index bc73ebccc2aac..2d24628a4e581 100644 --- a/drivers/net/phy/ax88796b_rust.rs +++ b/drivers/net/phy/ax88796b_rust.rs @@ -5,7 +5,6 @@ //! //! C version of this driver: [`drivers/net/phy/ax88796b.c`](./ax88796b.c) use kernel::{ - c_str, net::phy::{self, reg::C22, DeviceId, Driver}, prelude::*, uapi, @@ -41,7 +40,7 @@ struct PhyAX88772A; #[vtable] impl Driver for PhyAX88772A { const FLAGS: u32 = phy::flags::IS_INTERNAL; - const NAME: &'static CStr = c_str!("Asix Electronics AX88772A"); + const NAME: &'static CStr = c"Asix Electronics AX88772A"; const PHY_DEVICE_ID: DeviceId = DeviceId::new_with_exact_mask(0x003b1861); // AX88772A is not working properly with some old switches (NETGEAR EN 108TP): @@ -105,7 +104,7 @@ struct PhyAX88772C; #[vtable] impl Driver for PhyAX88772C { const FLAGS: u32 = phy::flags::IS_INTERNAL; - const NAME: &'static CStr = c_str!("Asix Electronics AX88772C"); + const NAME: &'static CStr = c"Asix Electronics AX88772C"; const PHY_DEVICE_ID: DeviceId = DeviceId::new_with_exact_mask(0x003b1881); fn suspend(dev: &mut phy::Device) -> Result { @@ -125,7 +124,7 @@ struct PhyAX88796B; #[vtable] impl Driver for PhyAX88796B { - const NAME: &'static CStr = c_str!("Asix Electronics AX88796B"); + const NAME: &'static CStr = c"Asix Electronics AX88796B"; const PHY_DEVICE_ID: DeviceId = DeviceId::new_with_model_mask(0x003b1841); fn soft_reset(dev: &mut phy::Device) -> Result { diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index f3d83b04c9535..201dee1a16985 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -698,7 +698,7 @@ static int mv88q2xxx_hwmon_write(struct device *dev, switch (attr) { case hwmon_temp_max: - clamp_val(val, -75000, 180000); + val = clamp(val, -75000, 180000); val = (val / 1000) + 75; val = FIELD_PREP(MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK, val); diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c index cd09fbf92ef23..2c4bbc236202b 100644 --- a/drivers/net/phy/mediatek/mtk-ge-soc.c +++ b/drivers/net/phy/mediatek/mtk-ge-soc.c @@ -1167,9 +1167,9 @@ static int mt798x_phy_calibration(struct phy_device *phydev) } buf = (u32 *)nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); if (IS_ERR(buf)) return PTR_ERR(buf); - nvmem_cell_put(cell); if (!buf[0] || !buf[1] || !buf[2] || !buf[3] || len < 4 * sizeof(u32)) { phydev_err(phydev, "invalid efuse data\n"); diff --git a/drivers/net/phy/mxl-86110.c b/drivers/net/phy/mxl-86110.c index e5d137a37a1d4..42a5fe3f115f4 100644 --- a/drivers/net/phy/mxl-86110.c +++ b/drivers/net/phy/mxl-86110.c @@ -938,6 +938,9 @@ static struct phy_driver mxl_phy_drvs[] = { PHY_ID_MATCH_EXACT(PHY_ID_MXL86110), .name = "MXL86110 Gigabit Ethernet", .config_init = mxl86110_config_init, + .suspend = genphy_suspend, + .resume = genphy_resume, + .soft_reset = genphy_soft_reset, .get_wol = mxl86110_get_wol, .set_wol = mxl86110_set_wol, .led_brightness_set = mxl86110_led_brightness_set, diff --git a/drivers/net/phy/qt2025.rs b/drivers/net/phy/qt2025.rs index aaaead6512a01..470d89a0ac006 100644 --- a/drivers/net/phy/qt2025.rs +++ b/drivers/net/phy/qt2025.rs @@ -9,7 +9,6 @@ //! //! The QT2025 PHY integrates an Intel 8051 micro-controller. -use kernel::c_str; use kernel::error::code; use kernel::firmware::Firmware; use kernel::io::poll::read_poll_timeout; @@ -38,7 +37,7 @@ struct PhyQT2025; #[vtable] impl Driver for PhyQT2025 { - const NAME: &'static CStr = c_str!("QT2025 10Gpbs SFP+"); + const NAME: &'static CStr = c"QT2025 10Gpbs SFP+"; const PHY_DEVICE_ID: phy::DeviceId = phy::DeviceId::new_with_exact_mask(0x0043a400); fn probe(dev: &mut phy::Device) -> Result<()> { @@ -71,7 +70,7 @@ impl Driver for PhyQT2025 { // The micro-controller will start running from the boot ROM. dev.write(C45::new(Mmd::PCS, 0xe854), 0x00c0)?; - let fw = Firmware::request(c_str!("qt2025-2.0.3.3.fw"), dev.as_ref())?; + let fw = Firmware::request(c"qt2025-2.0.3.3.fw", dev.as_ref())?; if fw.data().len() > SZ_16K + SZ_8K { return Err(code::EFBIG); } diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 67ecf3d4af2b1..6ff0385201a57 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -691,10 +691,6 @@ static int rtl8211f_config_aldps(struct phy_device *phydev) static int rtl8211f_config_phy_eee(struct phy_device *phydev) { - /* RTL8211FVD has no PHYCR2 register */ - if (phydev->drv->phy_id == RTL_8211FVD_PHYID) - return 0; - /* Disable PHY-mode EEE so LPI is passed to the MAC */ return phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2, RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 0401fa6b24d25..6166e91963644 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -497,6 +497,8 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK("ALCATELLUCENT", "3FE46541AA", sfp_quirk_2500basex, sfp_fixup_nokia), + SFP_QUIRK_F("BIDB", "X-ONU-SFPP", sfp_fixup_potron), + // FLYPRO SFP-10GT-CS-30M uses Rollball protocol to talk to the PHY. SFP_QUIRK_F("FLYPRO", "SFP-10GT-CS-30M", sfp_fixup_rollball), diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 4d5c9ae8f2219..c08a5c1bd6e4d 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -878,7 +878,7 @@ static void __team_queue_override_enabled_check(struct team *team) static void team_queue_override_port_prio_changed(struct team *team, struct team_port *port) { - if (!port->queue_id || team_port_enabled(port)) + if (!port->queue_id || !team_port_enabled(port)) return; __team_queue_override_port_del(team, port); __team_queue_override_port_add(team, port); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 7fd763917ae2c..6ab3486072cb0 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -335,6 +335,11 @@ int asix_read_phy_addr(struct usbnet *dev, bool internal) offset = (internal ? 1 : 0); ret = buf[offset]; + if (ret >= PHY_MAX_ADDR) { + netdev_err(dev->net, "invalid PHY address: %d\n", ret); + return -ENODEV; + } + netdev_dbg(dev->net, "%s PHY address 0x%x\n", internal ? "internal" : "external", ret); diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index f613e4bc68c85..758a423a459b8 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -210,11 +210,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) ret = asix_read_phy_addr(dev, priv->use_embdphy); if (ret < 0) goto free; - if (ret >= PHY_MAX_ADDR) { - netdev_err(dev->net, "Invalid PHY address %#x\n", ret); - ret = -ENODEV; - goto free; - } + priv->phy_addr = ret; ax88172a_reset_phy(dev, priv->use_embdphy); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 81ca64debc5b9..7a70207e7364e 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -168,6 +168,8 @@ static int update_eth_regs_async(pegasus_t *pegasus) netif_device_detach(pegasus->net); netif_err(pegasus, drv, pegasus->net, "%s returned %d\n", __func__, ret); + kfree(req); + usb_free_urb(async_urb); } return ret; } diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 278e6cb6f4d99..e40b0669d9f4b 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -211,6 +211,8 @@ static int async_set_registers(rtl8150_t *dev, u16 indx, u16 size, u16 reg) if (res == -ENODEV) netif_device_detach(dev->netdev); dev_err(&dev->udev->dev, "%s failed with %d\n", __func__, res); + kfree(req); + usb_free_urb(async_urb); } return res; } diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 091bc2aca7e8e..820c4c5069792 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -52,7 +52,7 @@ static int sr_read_reg(struct usbnet *dev, u8 reg, u8 *value) static int sr_write_reg(struct usbnet *dev, u8 reg, u8 value) { - return usbnet_write_cmd(dev, SR_WR_REGS, SR_REQ_WR_REG, + return usbnet_write_cmd(dev, SR_WR_REG, SR_REQ_WR_REG, value, reg, NULL, 0); } @@ -65,7 +65,7 @@ static void sr_write_async(struct usbnet *dev, u8 reg, u16 length, static void sr_write_reg_async(struct usbnet *dev, u8 reg, u8 value) { - usbnet_write_cmd_async(dev, SR_WR_REGS, SR_REQ_WR_REG, + usbnet_write_cmd_async(dev, SR_WR_REG, SR_REQ_WR_REG, value, reg, NULL, 0); } @@ -539,6 +539,11 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0fe6, 0x9700), /* SR9700 device */ .driver_info = (unsigned long)&sr9700_driver_info, }, + { + /* SR9700 with virtual driver CD-ROM - interface 0 is the CD-ROM device */ + USB_DEVICE_INTERFACE_NUMBER(0x0fe6, 0x9702, 1), + .driver_info = (unsigned long)&sr9700_driver_info, + }, {}, /* END */ }; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1d9faa70ba3b7..36742e64cff75 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -831,7 +831,6 @@ int usbnet_stop(struct net_device *net) clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue(net); - netdev_reset_queue(net); netif_info(dev, ifdown, dev->net, "stop stats: rx/tx %lu/%lu, errs %lu/%lu\n", @@ -875,6 +874,8 @@ int usbnet_stop(struct net_device *net) timer_delete_sync(&dev->delay); cancel_work_sync(&dev->kevent); + netdev_reset_queue(net); + if (!pm) usb_autopm_put_interface(dev->intf); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 3391f07b01de3..f8fc6f30fbe5f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1597,7 +1597,7 @@ static void _iwl_op_mode_stop(struct iwl_drv *drv) */ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) { - unsigned int min_core, max_core, loaded_core; + int min_core, max_core, loaded_core; struct iwl_drv *drv = context; struct iwl_fw *fw = &drv->fw; const struct iwl_ucode_header *ucode; @@ -1676,7 +1676,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) if (loaded_core < min_core || loaded_core > max_core) { IWL_ERR(drv, "Driver unable to support your firmware API. " - "Driver supports FW core %u..%u, firmware is %u.\n", + "Driver supports FW core %d..%d, firmware is %d.\n", min_core, max_core, loaded_core); goto try_again; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c index ffeb37a7f830e..231920425c066 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/ptp.c @@ -121,6 +121,12 @@ static int iwl_mld_ptp_gettime(struct ptp_clock_info *ptp, return 0; } +static int iwl_mld_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + static int iwl_mld_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct iwl_mld *mld = container_of(ptp, struct iwl_mld, @@ -279,6 +285,7 @@ void iwl_mld_ptp_init(struct iwl_mld *mld) mld->ptp_data.ptp_clock_info.owner = THIS_MODULE; mld->ptp_data.ptp_clock_info.gettime64 = iwl_mld_ptp_gettime; + mld->ptp_data.ptp_clock_info.settime64 = iwl_mld_ptp_settime; mld->ptp_data.ptp_clock_info.max_adj = 0x7fffffff; mld->ptp_data.ptp_clock_info.adjtime = iwl_mld_ptp_adjtime; mld->ptp_data.ptp_clock_info.adjfine = iwl_mld_ptp_adjfine; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c index 06a4c9f74797a..ad156b82eaa94 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ptp.c @@ -220,6 +220,12 @@ static int iwl_mvm_ptp_gettime(struct ptp_clock_info *ptp, return 0; } +static int iwl_mvm_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + static int iwl_mvm_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct iwl_mvm *mvm = container_of(ptp, struct iwl_mvm, @@ -281,6 +287,7 @@ void iwl_mvm_ptp_init(struct iwl_mvm *mvm) mvm->ptp_data.ptp_clock_info.adjfine = iwl_mvm_ptp_adjfine; mvm->ptp_data.ptp_clock_info.adjtime = iwl_mvm_ptp_adjtime; mvm->ptp_data.ptp_clock_info.gettime64 = iwl_mvm_ptp_gettime; + mvm->ptp_data.ptp_clock_info.settime64 = iwl_mvm_ptp_settime; mvm->ptp_data.scaled_freq = SCALE_FACTOR; /* Give a short 'friendly name' to identify the PHC clock */ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c index aa702ba7c9f54..d6c35e8d02a58 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c @@ -511,7 +511,8 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw, if (sta) { sta_entry = (struct rtl_sta_info *)sta->drv_priv; tid = ieee80211_get_tid(hdr); - agg_state = sta_entry->tids[tid].agg.agg_state; + if (tid < MAX_TID_COUNT) + agg_state = sta_entry->tids[tid].agg.agg_state; ampdu_density = sta->deflink.ht_cap.ampdu_density; } diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c index 99d7c629eac6f..e35de52d8eb43 100644 --- a/drivers/net/wireless/realtek/rtw88/sdio.c +++ b/drivers/net/wireless/realtek/rtw88/sdio.c @@ -144,8 +144,10 @@ static u32 rtw_sdio_to_io_address(struct rtw_dev *rtwdev, u32 addr, static bool rtw_sdio_use_direct_io(struct rtw_dev *rtwdev, u32 addr) { + bool might_indirect_under_power_off = rtwdev->chip->id == RTW_CHIP_TYPE_8822C; + if (!test_bit(RTW_FLAG_POWERON, rtwdev->flags) && - !rtw_sdio_is_bus_addr(addr)) + !rtw_sdio_is_bus_addr(addr) && might_indirect_under_power_off) return false; return !rtw_sdio_is_sdio30_supported(rtwdev) || diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index 009202c627d25..3b5126ffc81a1 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -965,8 +965,7 @@ static int rtw_usb_init_rx(struct rtw_dev *rtwdev) struct sk_buff *rx_skb; int i; - rtwusb->rxwq = alloc_workqueue("rtw88_usb: rx wq", WQ_BH | WQ_UNBOUND, - 0); + rtwusb->rxwq = alloc_workqueue("rtw88_usb: rx wq", WQ_BH, 0); if (!rtwusb->rxwq) { rtw_err(rtwdev, "failed to create RX work queue\n"); return -ENOMEM; diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c index f76087be2f758..6241866d39df6 100644 --- a/drivers/net/wireless/ti/wlcore/tx.c +++ b/drivers/net/wireless/ti/wlcore/tx.c @@ -207,6 +207,11 @@ static int wl1271_tx_allocate(struct wl1271 *wl, struct wl12xx_vif *wlvif, total_blocks = wlcore_hw_calc_tx_blocks(wl, total_len, spare_blocks); if (total_blocks <= wl->tx_blocks_available) { + if (skb_headroom(skb) < (total_len - skb->len) && + pskb_expand_head(skb, (total_len - skb->len), 0, GFP_ATOMIC)) { + wl1271_free_tx_id(wl, id); + return -EAGAIN; + } desc = skb_push(skb, total_len - skb->len); wlcore_hw_set_tx_desc_blocks(wl, desc, total_blocks, diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index ffd7367ce1194..018a80674f06e 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -406,7 +406,7 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) if (rc || (transferred != sizeof(cmd))) { nfc_err(&phy->udev->dev, "Reader power on cmd error %d\n", rc); - return rc; + return rc ?: -EINVAL; } rc = usb_submit_urb(phy->in_urb, GFP_KERNEL); diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index c61cf9edac48b..06f71011fb041 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -195,6 +195,8 @@ static long ptp_clock_getcaps(struct ptp_clock *ptp, void __user *arg) if (caps.adjust_phase) caps.max_phase_adj = ptp->info->getmaxphase(ptp->info); + memset(caps.rsv, 0, sizeof(caps.rsv)); + return copy_to_user(arg, &caps, sizeof(caps)) ? -EFAULT : 0; } diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 01873c2a34ad2..5bad43e85ee1a 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -77,7 +77,6 @@ config EXT4_DEBUG config EXT4_KUNIT_TESTS tristate "KUnit tests for ext4" if !KUNIT_ALL_TESTS depends on EXT4_FS && KUNIT - default KUNIT_ALL_TESTS help This builds the ext4 KUnit tests. diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index 25fae1c83725b..79c8502ba9115 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -121,7 +121,6 @@ config FAT_DEFAULT_UTF8 config FAT_KUNIT_TEST tristate "Unit Tests for FAT filesystems" if !KUNIT_ALL_TESTS depends on KUNIT && FAT_FS - default KUNIT_ALL_TESTS help This builds the FAT KUnit tests diff --git a/fs/file_attr.c b/fs/file_attr.c index 4c4916632f11d..13cdb31a3e947 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -298,6 +299,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, err = inode->i_op->fileattr_set(idmap, dentry, fa); if (err) goto out; + fsnotify_xattr(dentry); } out: diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index d27ff5e5f165b..71bd44e5ab6da 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -270,8 +270,15 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, /* * Include parent/name in notification either if some notification * groups require parent info or the parent is interested in this event. + * The parent interest in ACCESS/MODIFY events does not apply to special + * files, where read/write are not on the filesystem of the parent and + * events can provide an undesirable side-channel for information + * exfiltration. */ - parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS; + parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS && + !(data_type == FSNOTIFY_EVENT_PATH && + d_is_special(dentry) && + (mask & (FS_ACCESS | FS_MODIFY))); if (parent_needed || parent_interested) { /* When notifying parent, child should be passed as data */ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type)); diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index e9534258d1efd..75d372ceb6553 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 57 -#define CIFS_VERSION "2.57" +#define SMB3_PRODUCT_BUILD 58 +#define CIFS_VERSION "2.58" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index eeb4011cb217d..fdd84369e7b8b 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -12,7 +12,7 @@ #include #include #include "../common/smbfsctl.h" -#include "../common/smb2pdu.h" +#include "../common/smb1pdu.h" #define CIFS_PROT 0 #define POSIX_PROT (CIFS_PROT+1) diff --git a/fs/smb/common/smb1pdu.h b/fs/smb/common/smb1pdu.h new file mode 100644 index 0000000000000..df6d4e11ae929 --- /dev/null +++ b/fs/smb/common/smb1pdu.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * + * Copyright (C) International Business Machines Corp., 2002,2009 + * 2018 Samsung Electronics Co., Ltd. + * Author(s): Steve French + * Namjae Jeon + * + */ + +#ifndef _COMMON_SMB1_PDU_H +#define _COMMON_SMB1_PDU_H + +#define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) + +/* + * See MS-CIFS 2.2.3.1 + * MS-SMB 2.2.3.1 + */ +struct smb_hdr { + __u8 Protocol[4]; + __u8 Command; + union { + struct { + __u8 ErrorClass; + __u8 Reserved; + __le16 Error; + } __packed DosError; + __le32 CifsError; + } __packed Status; + __u8 Flags; + __le16 Flags2; /* note: le */ + __le16 PidHigh; + union { + struct { + __le32 SequenceNumber; /* le */ + __u32 Reserved; /* zero */ + } __packed Sequence; + __u8 SecuritySignature[8]; /* le */ + } __packed Signature; + __u8 pad[2]; + __u16 Tid; + __le16 Pid; + __u16 Uid; + __le16 Mid; + __u8 WordCount; +} __packed; + +/* See MS-CIFS 2.2.4.52.1 */ +typedef struct smb_negotiate_req { + struct smb_hdr hdr; /* wct = 0 */ + __le16 ByteCount; + unsigned char DialectsArray[]; +} __packed SMB_NEGOTIATE_REQ; + +#endif /* _COMMON_SMB1_PDU_H */ diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 3c8d8a4e74393..f5ebbe31384ae 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1293,6 +1293,7 @@ struct create_durable_handle_reconnect_v2 { struct create_context_hdr ccontext; __u8 Name[8]; struct durable_reconnect_context_v2 dcontext; + __u8 Pad[4]; } __packed; /* See MS-SMB2 2.2.14.2.12 */ @@ -1985,39 +1986,6 @@ struct smb2_lease_ack { __le64 LeaseDuration; } __packed; -/* - * See MS-CIFS 2.2.3.1 - * MS-SMB 2.2.3.1 - */ -struct smb_hdr { - __u8 Protocol[4]; - __u8 Command; - union { - struct { - __u8 ErrorClass; - __u8 Reserved; - __le16 Error; - } __packed DosError; - __le32 CifsError; - } __packed Status; - __u8 Flags; - __le16 Flags2; /* note: le */ - __le16 PidHigh; - union { - struct { - __le32 SequenceNumber; /* le */ - __u32 Reserved; /* zero */ - } __packed Sequence; - __u8 SecuritySignature[8]; /* le */ - } __packed Signature; - __u8 pad[2]; - __u16 Tid; - __le16 Pid; - __u16 Uid; - __le16 Mid; - __u8 WordCount; -} __packed; - #define OP_BREAK_STRUCT_SIZE_20 24 #define OP_BREAK_STRUCT_SIZE_21 36 @@ -2122,11 +2090,4 @@ struct smb_hdr { #define SET_MINIMUM_RIGHTS (FILE_READ_EA | FILE_READ_ATTRIBUTES \ | READ_CONTROL | SYNCHRONIZE) -/* See MS-CIFS 2.2.4.52.1 */ -typedef struct smb_negotiate_req { - struct smb_hdr hdr; /* wct = 0 */ - __le16 ByteCount; - unsigned char DialectsArray[]; -} __packed SMB_NEGOTIATE_REQ; - #endif /* _COMMON_SMB2PDU_H */ diff --git a/fs/smb/common/smbglob.h b/fs/smb/common/smbglob.h index 9562845a56175..4e33d91cdc9db 100644 --- a/fs/smb/common/smbglob.h +++ b/fs/smb/common/smbglob.h @@ -11,8 +11,6 @@ #ifndef _COMMON_SMB_GLOB_H #define _COMMON_SMB_GLOB_H -#define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) - struct smb_version_values { char *version_string; __u16 protocol_id; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index 067b45048c732..95bf1465387b9 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -10,6 +10,7 @@ #include "glob.h" #include "../common/smbglob.h" +#include "../common/smb1pdu.h" #include "../common/smb2pdu.h" #include "../common/fscc.h" #include "smb2pdu.h" diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1c54aa6f74fbc..e13a206408030 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -705,23 +705,12 @@ struct mlx5_st; enum { MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, - MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, -}; - -enum { - MKEY_CACHE_LAST_STD_ENTRY = 20, - MLX5_IMR_KSM_CACHE_ENTRY, - MAX_MKEY_CACHE_ENTRIES }; struct mlx5_profile { u64 mask; u8 log_max_qp; u8 num_cmd_caches; - struct { - int size; - int limit; - } mr_cache[MAX_MKEY_CACHE_ENTRIES]; }; struct mlx5_hca_cap { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5870a9e514a51..660f05868654a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5323,7 +5323,8 @@ netdev_features_t netdev_increment_features(netdev_features_t all, static inline netdev_features_t netdev_add_tso_features(netdev_features_t features, netdev_features_t mask) { - return netdev_increment_features(features, NETIF_F_ALL_TSO, mask); + return (features & NETIF_F_ALL_TSO) ? features : + netdev_increment_features(features, NETIF_F_ALL_TSO, mask); } int __netdev_update_features(struct net_device *dev); diff --git a/include/net/dsa.h b/include/net/dsa.h index cced1a8667578..6b2b5ed64ea4c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -302,6 +302,7 @@ struct dsa_port { struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; + netdevice_tracker conduit_tracker; struct dsa_lag *lag; struct net_device *hsr_dev; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0eccd9c3a883f..365925c9d2628 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -123,27 +123,15 @@ void inet_frags_fini(struct inet_frags *); int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); -static inline void fqdir_pre_exit(struct fqdir *fqdir) -{ - /* Prevent creation of new frags. - * Pairs with READ_ONCE() in inet_frag_find(). - */ - WRITE_ONCE(fqdir->high_thresh, 0); - - /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() - * and ip6frag_expire_frag_queue(). - */ - WRITE_ONCE(fqdir->dead, true); -} +void fqdir_pre_exit(struct fqdir *fqdir); void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q, int *refs); void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); -/* Free all skbs in the queue; return the sum of their truesizes. */ -unsigned int inet_frag_rbtree_purge(struct rb_root *root, - enum skb_drop_reason reason); +void inet_frag_queue_flush(struct inet_frag_queue *q, + enum skb_drop_reason reason); static inline void inet_frag_putn(struct inet_frag_queue *q, int refs) { diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 38ef66826939e..41d9fc6965f9a 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -69,9 +69,6 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) int refs = 1; rcu_read_lock(); - /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ - if (READ_ONCE(fq->q.fqdir->dead)) - goto out_rcu_unlock; spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) @@ -80,6 +77,12 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) fq->q.flags |= INET_FRAG_DROP; inet_frag_kill(&fq->q, &refs); + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) { + inet_frag_queue_flush(&fq->q, 0); + goto out; + } + dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) goto out; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fab7dc73f738c..0e266c2d0e7f0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1091,6 +1091,29 @@ struct nft_rule_blob { __attribute__((aligned(__alignof__(struct nft_rule_dp)))); }; +enum nft_chain_types { + NFT_CHAIN_T_DEFAULT = 0, + NFT_CHAIN_T_ROUTE, + NFT_CHAIN_T_NAT, + NFT_CHAIN_T_MAX +}; + +/** + * struct nft_chain_validate_state - validation state + * + * If a chain is encountered again during table validation it is + * possible to avoid revalidation provided the calling context is + * compatible. This structure stores relevant calling context of + * previous validations. + * + * @hook_mask: the hook numbers and locations the chain is linked to + * @depth: the deepest call chain level the chain is linked to + */ +struct nft_chain_validate_state { + u8 hook_mask[NFT_CHAIN_T_MAX]; + u8 depth; +}; + /** * struct nft_chain - nf_tables chain * @@ -1109,6 +1132,7 @@ struct nft_rule_blob { * @udlen: user data length * @udata: user data in the chain * @blob_next: rule blob pointer to the next in the chain + * @vstate: validation state */ struct nft_chain { struct nft_rule_blob __rcu *blob_gen_0; @@ -1128,9 +1152,10 @@ struct nft_chain { /* Only used during control plane commit phase: */ struct nft_rule_blob *blob_next; + struct nft_chain_validate_state vstate; }; -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain); int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_elem_priv *elem_priv); @@ -1138,13 +1163,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); -enum nft_chain_types { - NFT_CHAIN_T_DEFAULT = 0, - NFT_CHAIN_T_ROUTE, - NFT_CHAIN_T_NAT, - NFT_CHAIN_T_MAX -}; - /** * struct nft_chain_type - nf_tables chain type info * diff --git a/include/rdma/frmr_pools.h b/include/rdma/frmr_pools.h new file mode 100644 index 0000000000000..333ce31fc762e --- /dev/null +++ b/include/rdma/frmr_pools.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#ifndef FRMR_POOLS_H +#define FRMR_POOLS_H + +#include +#include + +struct ib_device; +struct ib_mr; + +struct ib_frmr_key { + u64 vendor_key; + /* A pool with non-zero kernel_vendor_key is a kernel-only pool. */ + u64 kernel_vendor_key; + size_t num_dma_blocks; + int access_flags; + u8 ats:1; +}; + +struct ib_frmr_pool_ops { + int (*create_frmrs)(struct ib_device *device, struct ib_frmr_key *key, + u32 *handles, u32 count); + void (*destroy_frmrs)(struct ib_device *device, u32 *handles, + u32 count); + int (*build_key)(struct ib_device *device, const struct ib_frmr_key *in, + struct ib_frmr_key *out); +}; + +int ib_frmr_pools_init(struct ib_device *device, + const struct ib_frmr_pool_ops *pool_ops); +void ib_frmr_pools_cleanup(struct ib_device *device); +int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr); +int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr); + +#endif /* FRMR_POOLS_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6aad66bc5dd74..62c6f1b86f1f3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -43,6 +43,7 @@ #include #include #include +#include #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN @@ -1887,6 +1888,11 @@ struct ib_mr { struct ib_dm *dm; struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */ struct ib_dmah *dmah; + struct { + struct ib_frmr_pool *pool; + struct ib_frmr_key key; + u32 handle; + } frmr; /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -2880,6 +2886,8 @@ struct ib_device { struct list_head subdev_list; enum rdma_nl_name_assign_type name_assign_type; + + struct ib_frmr_pools *frmr_pools; }; static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, diff --git a/include/uapi/linux/energy_model.h b/include/uapi/linux/energy_model.h index 4ec4c0eabbbbc..0bcad967854ff 100644 --- a/include/uapi/linux/energy_model.h +++ b/include/uapi/linux/energy_model.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/em.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_ENERGY_MODEL_H #define _UAPI_LINUX_ENERGY_MODEL_H diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 04eea6d1d0a9b..72a5d030154e1 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -40,6 +40,7 @@ #define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3) #define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4) #define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5) +#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0) struct mptcp_info { __u8 mptcpi_subflows; diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index f41f0228fcd0e..aac9782ddc09a 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -308,6 +308,10 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_MONITOR, + RDMA_NLDEV_CMD_FRMR_POOLS_GET, /* can dump */ + + RDMA_NLDEV_CMD_FRMR_POOLS_SET, + RDMA_NLDEV_NUM_OPS }; @@ -582,6 +586,24 @@ enum rdma_nldev_attr { RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */ RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, /* u8 */ + + /* + * FRMR Pools attributes + */ + RDMA_NLDEV_ATTR_FRMR_POOLS, /* nested table */ + RDMA_NLDEV_ATTR_FRMR_POOL_ENTRY, /* nested table */ + RDMA_NLDEV_ATTR_FRMR_POOL_KEY, /* nested table */ + RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ATS, /* u8 */ + RDMA_NLDEV_ATTR_FRMR_POOL_KEY_ACCESS_FLAGS, /* u32 */ + RDMA_NLDEV_ATTR_FRMR_POOL_KEY_VENDOR_KEY, /* u64 */ + RDMA_NLDEV_ATTR_FRMR_POOL_KEY_NUM_DMA_BLOCKS, /* u64 */ + RDMA_NLDEV_ATTR_FRMR_POOL_QUEUE_HANDLES, /* u32 */ + RDMA_NLDEV_ATTR_FRMR_POOL_MAX_IN_USE, /* u64 */ + RDMA_NLDEV_ATTR_FRMR_POOL_IN_USE, /* u64 */ + RDMA_NLDEV_ATTR_FRMR_POOLS_AGING_PERIOD, /* u32 */ + RDMA_NLDEV_ATTR_FRMR_POOL_PINNED_HANDLES, /* u32 */ + RDMA_NLDEV_ATTR_FRMR_POOL_KEY_KERNEL_VENDOR_KEY, /* u64 */ + /* * Always the end */ diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config index 9f6ab7dabf672..de2c0ede59c45 100644 --- a/kernel/configs/debug.config +++ b/kernel/configs/debug.config @@ -58,7 +58,7 @@ CONFIG_DEBUG_NET=y CONFIG_PAGE_EXTENSION=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_KMEMLEAK=y -CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y +CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=n CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 CONFIG_DEBUG_OBJECTS_FREE=y diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 1b4254d19a73e..c49dee35fc520 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -144,7 +144,6 @@ config IRQ_KUNIT_TEST bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS depends on KUNIT=y depends on SPARSE_IRQ - default KUNIT_ALL_TESTS select IRQ_DOMAIN imply SMP help diff --git a/kernel/power/em_netlink_autogen.c b/kernel/power/em_netlink_autogen.c index a7a09ab1d1c21..ceb3b2bb6ebe0 100644 --- a/kernel/power/em_netlink_autogen.c +++ b/kernel/power/em_netlink_autogen.c @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/em.yaml */ /* YNL-GEN kernel source */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #include #include diff --git a/kernel/power/em_netlink_autogen.h b/kernel/power/em_netlink_autogen.h index 78ce609641f11..140ab548103ce 100644 --- a/kernel/power/em_netlink_autogen.h +++ b/kernel/power/em_netlink_autogen.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/em.yaml */ /* YNL-GEN kernel header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _LINUX_EM_GEN_H #define _LINUX_EM_GEN_H diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e575956ef9b5a..6f2148df14d96 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10507,7 +10507,7 @@ static int __remove_instance(struct trace_array *tr) /* Disable all the flags that were enabled coming in */ for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { - if ((1 << i) & ZEROED_TRACE_FLAGS) + if ((1ULL << i) & ZEROED_TRACE_FLAGS) set_tracer_flag(tr, 1ULL << i, 0); } diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index 61d435c450bb4..be06bd634b3f8 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -3,7 +3,6 @@ config CRYPTO_LIB_BLAKE2B_KUNIT_TEST tristate "KUnit tests for BLAKE2b" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_BLAKE2B help @@ -12,7 +11,6 @@ config CRYPTO_LIB_BLAKE2B_KUNIT_TEST config CRYPTO_LIB_BLAKE2S_KUNIT_TEST tristate "KUnit tests for BLAKE2s" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE # No need to select CRYPTO_LIB_BLAKE2S here, as that option doesn't # exist; the BLAKE2s code is always built-in for the /dev/random driver. @@ -22,7 +20,6 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST config CRYPTO_LIB_CURVE25519_KUNIT_TEST tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_CURVE25519 help @@ -31,7 +28,6 @@ config CRYPTO_LIB_CURVE25519_KUNIT_TEST config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_MD5 help @@ -41,7 +37,6 @@ config CRYPTO_LIB_MD5_KUNIT_TEST config CRYPTO_LIB_POLY1305_KUNIT_TEST tristate "KUnit tests for Poly1305" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_POLY1305 help @@ -50,7 +45,6 @@ config CRYPTO_LIB_POLY1305_KUNIT_TEST config CRYPTO_LIB_POLYVAL_KUNIT_TEST tristate "KUnit tests for POLYVAL" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_POLYVAL help @@ -59,7 +53,6 @@ config CRYPTO_LIB_POLYVAL_KUNIT_TEST config CRYPTO_LIB_SHA1_KUNIT_TEST tristate "KUnit tests for SHA-1" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_SHA1 help @@ -71,7 +64,6 @@ config CRYPTO_LIB_SHA1_KUNIT_TEST config CRYPTO_LIB_SHA256_KUNIT_TEST tristate "KUnit tests for SHA-224 and SHA-256" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_SHA256 help @@ -83,7 +75,6 @@ config CRYPTO_LIB_SHA256_KUNIT_TEST config CRYPTO_LIB_SHA512_KUNIT_TEST tristate "KUnit tests for SHA-384 and SHA-512" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_SHA512 help @@ -93,7 +84,6 @@ config CRYPTO_LIB_SHA512_KUNIT_TEST config CRYPTO_LIB_SHA3_KUNIT_TEST tristate "KUnit tests for SHA-3" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_SHA3 help diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c11cdef42b6f6..5be9b8c919490 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -849,6 +849,12 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (cis_peripheral_capable(hdev)) settings |= MGMT_SETTING_CIS_PERIPHERAL; + if (bis_capable(hdev)) + settings |= MGMT_SETTING_ISO_BROADCASTER; + + if (sync_recv_capable(hdev)) + settings |= MGMT_SETTING_ISO_SYNC_RECEIVER; + if (ll_privacy_capable(hdev)) settings |= MGMT_SETTING_LL_PRIVACY; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 7280c4e9305f3..b9b2981c48414 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -247,6 +247,7 @@ struct net_bridge_vlan { * struct net_bridge_vlan_group * * @vlan_hash: VLAN entry rhashtable + * @tunnel_hash: Hash table to map from tunnel key ID (e.g. VXLAN VNI) to VLAN * @vlan_list: sorted VLAN entry list * @num_vlans: number of total VLAN entries * @pvid: PVID VLAN id diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 6651a8dc62e04..d4d63586053ad 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -92,8 +92,15 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) len = le16_to_cpu(tmp); /* Subtract for FCS on length if FCS is not used. */ - if (!this->dofcs) + if (!this->dofcs) { + if (len < 2) { + ++cffrml_rcv_error; + pr_err("Invalid frame length (%d)\n", len); + cfpkt_destroy(pkt); + return -EPROTO; + } len -= 2; + } if (cfpkt_setlen(pkt, len) < 0) { ++cffrml_rcv_error; diff --git a/net/can/Kconfig b/net/can/Kconfig index e4ccf731a24ce..af64a6f764588 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -5,7 +5,6 @@ menuconfig CAN tristate "CAN bus subsystem support" - select CAN_DEV help Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial communications protocol. Development of the CAN bus started in diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 6272326dd614a..ff9c4fd7b4337 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -482,6 +482,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr_unsized *uaddr, in goto out_release_sock; } + if (ndev->reg_state != NETREG_REGISTERED) { + dev_put(ndev); + ret = -ENODEV; + goto out_release_sock; + } + can_ml = can_get_ml_priv(ndev); if (!can_ml) { dev_put(ndev); diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fbf5c8001c9df..613a911dda100 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1567,6 +1567,8 @@ int j1939_session_activate(struct j1939_session *session) if (active) { j1939_session_put(active); ret = -EAGAIN; + } else if (priv->ndev->reg_state != NETREG_REGISTERED) { + ret = -ENODEV; } else { WARN_ON_ONCE(session->state != J1939_SESSION_NEW); list_add_tail(&session->active_session_list_entry, diff --git a/net/core/dev.c b/net/core/dev.c index 9094c0fb8c689..36dc5199037ed 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4241,9 +4241,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, int count = 0; llist_for_each_entry_safe(skb, next, ll_list, ll_node) { - prefetch(next); - prefetch(&next->priority); - skb_mark_not_on_list(skb); + if (next) { + prefetch(next); + prefetch(&next->priority); + skb_mark_not_on_list(skb); + } rc = dev_qdisc_enqueue(skb, q, &to_free, txq); count++; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a00808f7be6a1..419bda42560ad 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -622,7 +622,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, return obj; } -/* Allocate a new skbuff. We do this ourselves so we can fill in a few +/* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. * @@ -5157,11 +5157,14 @@ void __init skb_init(void) NULL); skbuff_cache_size = kmem_cache_size(net_hotdata.skbuff_cache); - net_hotdata.skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", - sizeof(struct sk_buff_fclones), - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); + net_hotdata.skbuff_fclone_cache = kmem_cache_create_usercopy("skbuff_fclone_cache", + sizeof(struct sk_buff_fclones), + 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC, + offsetof(struct sk_buff, cb), + sizeof(struct sk_buff) + sizeof_field(struct sk_buff, cb), + NULL); + /* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes. * struct skb_shared_info is located at the end of skb->head, * and should not be copied to/from user. diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index a20efabe778fc..99ede37698ac5 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -367,16 +367,10 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst) { - struct device_node *ethernet; - struct net_device *conduit; struct dsa_port *cpu_dp; cpu_dp = dsa_tree_find_first_cpu(dst); - ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); - conduit = of_find_net_device_by_node(ethernet); - of_node_put(ethernet); - - return conduit; + return cpu_dp->conduit; } /* Assign the default CPU port (the first one in the tree) to all ports of the @@ -1253,14 +1247,25 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) if (ethernet) { struct net_device *conduit; const char *user_protocol; + int err; + rtnl_lock(); conduit = of_find_net_device_by_node(ethernet); of_node_put(ethernet); - if (!conduit) + if (!conduit) { + rtnl_unlock(); return -EPROBE_DEFER; + } + + netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL); + put_device(&conduit->dev); + rtnl_unlock(); user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); - return dsa_port_parse_cpu(dp, conduit, user_protocol); + err = dsa_port_parse_cpu(dp, conduit, user_protocol); + if (err) + netdev_put(conduit, &dp->conduit_tracker); + return err; } if (link) @@ -1393,37 +1398,30 @@ static struct device *dev_find_class(struct device *parent, char *class) return device_find_child(parent, class, dev_is_class); } -static struct net_device *dsa_dev_to_net_device(struct device *dev) -{ - struct device *d; - - d = dev_find_class(dev, "net"); - if (d != NULL) { - struct net_device *nd; - - nd = to_net_dev(d); - dev_hold(nd); - put_device(d); - - return nd; - } - - return NULL; -} - static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { if (!strcmp(name, "cpu")) { struct net_device *conduit; + struct device *d; + int err; - conduit = dsa_dev_to_net_device(dev); - if (!conduit) + rtnl_lock(); + d = dev_find_class(dev, "net"); + if (!d) { + rtnl_unlock(); return -EPROBE_DEFER; + } - dev_put(conduit); + conduit = to_net_dev(d); + netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL); + put_device(d); + rtnl_unlock(); - return dsa_port_parse_cpu(dp, conduit, NULL); + err = dsa_port_parse_cpu(dp, conduit, NULL); + if (err) + netdev_put(conduit, &dp->conduit_tracker); + return err; } if (!strcmp(name, "dsa")) @@ -1491,6 +1489,9 @@ static void dsa_switch_release_ports(struct dsa_switch *ds) struct dsa_vlan *v, *n; dsa_switch_for_each_port_safe(dp, next, ds) { + if (dsa_port_is_cpu(dp) && dp->conduit) + netdev_put(dp->conduit, &dp->conduit_tracker); + /* These are either entries that upper layers lost track of * (probably due to bugs), or installed through interfaces * where one does not necessarily have to remove them, like @@ -1635,8 +1636,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds) /* Disconnect from further netdevice notifiers on the conduit, * since netdev_uses_dsa() will now return false. */ - dsa_switch_for_each_cpu_port(dp, ds) + dsa_switch_for_each_cpu_port(dp, ds) { dp->conduit->dsa_ptr = NULL; + netdev_put(dp->conduit, &dp->conduit_tracker); + } rtnl_unlock(); out: diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index fa83ddade4f81..9431e305b2333 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2383,7 +2383,10 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) return -ENOMEM; WARN_ON_ONCE(!ret); - gstrings.len = ret; + if (gstrings.len && gstrings.len != ret) + gstrings.len = 0; + else + gstrings.len = ret; if (gstrings.len) { data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN)); @@ -2509,10 +2512,13 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; - stats.n_stats = n_stats; + if (stats.n_stats && stats.n_stats != n_stats) + stats.n_stats = 0; + else + stats.n_stats = n_stats; - if (n_stats) { - data = vzalloc(array_size(n_stats, sizeof(u64))); + if (stats.n_stats) { + data = vzalloc(array_size(stats.n_stats, sizeof(u64))); if (!data) return -ENOMEM; ops->get_ethtool_stats(dev, &stats, data); @@ -2524,7 +2530,9 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) if (copy_to_user(useraddr, &stats, sizeof(stats))) goto out; useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64)))) + if (stats.n_stats && + copy_to_user(useraddr, data, + array_size(stats.n_stats, sizeof(u64)))) goto out; ret = 0; @@ -2560,6 +2568,10 @@ static int ethtool_get_phy_stats_phydev(struct phy_device *phydev, return -EOPNOTSUPP; n_stats = phy_ops->get_sset_count(phydev); + if (stats->n_stats && stats->n_stats != n_stats) { + stats->n_stats = 0; + return 0; + } ret = ethtool_vzalloc_stats_array(n_stats, data); if (ret) @@ -2580,6 +2592,10 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev, return -EOPNOTSUPP; n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); + if (stats->n_stats && stats->n_stats != n_stats) { + stats->n_stats = 0; + return 0; + } ret = ethtool_vzalloc_stats_array(n_stats, data); if (ret) @@ -2616,7 +2632,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) } useraddr += sizeof(stats); - if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64)))) + if (stats.n_stats && + copy_to_user(useraddr, data, + array_size(stats.n_stats, sizeof(u64)))) ret = -EFAULT; out: diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index 1d33a4675a483..b989456fc4c5f 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -126,7 +126,8 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) } out_complete: - handshake_complete(req, -EIO, NULL); + if (req) + handshake_complete(req, -EIO, NULL); out_status: trace_handshake_cmd_accept_err(net, req, NULL, err); return err; diff --git a/net/handshake/request.c b/net/handshake/request.c index 274d2c89b6b20..6b7e3e0bf3996 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -276,6 +276,8 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, out_unlock: spin_unlock(&hn->hn_lock); out_err: + /* Restore original destructor so socket teardown still runs on failure */ + req->hr_sk->sk_destruct = req->hr_odestruct; trace_handshake_submit_err(net, req, req->hr_sk, ret); handshake_req_destroy(req); return ret; @@ -324,7 +326,11 @@ bool handshake_req_cancel(struct sock *sk) hn = handshake_pernet(net); if (hn && remove_pending(hn, req)) { - /* Request hadn't been accepted */ + /* Request hadn't been accepted - mark cancelled */ + if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + trace_handshake_cancel_busy(net, req, sk); + return false; + } goto out_true; } if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 339f0d2202129..aefc9b6936ba0 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -205,6 +205,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, __pskb_copy(frame->skb_prp, skb_headroom(frame->skb_prp), GFP_ATOMIC); + if (!frame->skb_std) + return NULL; } else { /* Unexpected */ WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a5f3c8459758f..0caf38e44c738 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2167,8 +2167,8 @@ void fib_select_multipath(struct fib_result *res, int hash, { struct fib_info *fi = res->fi; struct net *net = fi->fib_net; - bool found = false; bool use_neigh; + int score = -1; __be32 saddr; if (unlikely(res->fi->nh)) { @@ -2180,7 +2180,7 @@ void fib_select_multipath(struct fib_result *res, int hash, saddr = fl4 ? fl4->saddr : 0; change_nexthops(fi) { - int nh_upper_bound; + int nh_upper_bound, nh_score = 0; /* Nexthops without a carrier are assigned an upper bound of * minus one when "ignore_routes_with_linkdown" is set. @@ -2190,24 +2190,18 @@ void fib_select_multipath(struct fib_result *res, int hash, (use_neigh && !fib_good_nh(nexthop_nh))) continue; - if (!found) { + if (saddr && nexthop_nh->nh_saddr == saddr) + nh_score += 2; + if (hash <= nh_upper_bound) + nh_score++; + if (score < nh_score) { res->nh_sel = nhsel; res->nhc = &nexthop_nh->nh_common; - found = !saddr || nexthop_nh->nh_saddr == saddr; + if (nh_score == 3 || (!saddr && nh_score == 1)) + return; + score = nh_score; } - if (hash > nh_upper_bound) - continue; - - if (!saddr || nexthop_nh->nh_saddr == saddr) { - res->nh_sel = nhsel; - res->nhc = &nexthop_nh->nh_common; - return; - } - - if (found) - return; - } endfor_nexthops(fi); } #endif diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 59a6f0a9638f9..7e2c17fec3fc4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2053,10 +2053,11 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) continue; } - /* Do not flush error routes if network namespace is - * not being dismantled + /* When not flushing the entire table, skip error + * routes that are not marked for deletion. */ - if (!flush_all && fib_props[fa->fa_type].error) { + if (!flush_all && fib_props[fa->fa_type].error && + !(fi->fib_flags & RTNH_F_DEAD)) { slen = fa->fa_slen; continue; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 025895eb6ec59..001ee5c4d962e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -218,6 +218,41 @@ static int __init inet_frag_wq_init(void) pure_initcall(inet_frag_wq_init); +void fqdir_pre_exit(struct fqdir *fqdir) +{ + struct inet_frag_queue *fq; + struct rhashtable_iter hti; + + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); + + rhashtable_walk_enter(&fqdir->rhashtable, &hti); + rhashtable_walk_start(&hti); + + while ((fq = rhashtable_walk_next(&hti))) { + if (IS_ERR(fq)) { + if (PTR_ERR(fq) != -EAGAIN) + break; + continue; + } + spin_lock_bh(&fq->lock); + if (!(fq->flags & INET_FRAG_COMPLETE)) + inet_frag_queue_flush(fq, 0); + spin_unlock_bh(&fq->lock); + } + + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +} +EXPORT_SYMBOL(fqdir_pre_exit); + void fqdir_exit(struct fqdir *fqdir) { INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); @@ -263,8 +298,8 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) kmem_cache_free(f->frags_cachep, q); } -unsigned int inet_frag_rbtree_purge(struct rb_root *root, - enum skb_drop_reason reason) +static unsigned int +inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason) { struct rb_node *p = rb_first(root); unsigned int sum = 0; @@ -284,7 +319,17 @@ unsigned int inet_frag_rbtree_purge(struct rb_root *root, } return sum; } -EXPORT_SYMBOL(inet_frag_rbtree_purge); + +void inet_frag_queue_flush(struct inet_frag_queue *q, + enum skb_drop_reason reason) +{ + unsigned int sum; + + reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT; + sum = inet_frag_rbtree_purge(&q->rb_fragments, reason); + sub_frag_mem_limit(q->fqdir, sum); +} +EXPORT_SYMBOL(inet_frag_queue_flush); void inet_frag_destroy(struct inet_frag_queue *q) { @@ -327,7 +372,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); - /* One reference for the timer, one for the hash table. */ + /* One reference for the timer, one for the hash table. + * We never take any extra references, only decrement this field. + */ refcount_set(&q->refcnt, 2); return q; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index f7012479713ba..56b0f738d2f27 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -134,11 +134,6 @@ static void ip_expire(struct timer_list *t) net = qp->q.fqdir->net; rcu_read_lock(); - - /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ - if (READ_ONCE(qp->q.fqdir->dead)) - goto out_rcu_unlock; - spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -146,6 +141,13 @@ static void ip_expire(struct timer_list *t) qp->q.flags |= INET_FRAG_DROP; inet_frag_kill(&qp->q, &refs); + + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) { + inet_frag_queue_flush(&qp->q, 0); + goto out; + } + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); @@ -240,16 +242,10 @@ static int ip_frag_too_far(struct ipq *qp) static int ip_frag_reinit(struct ipq *qp) { - unsigned int sum_truesize = 0; - - if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) { - refcount_inc(&qp->q.refcnt); + if (!mod_timer_pending(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) return -ETIMEDOUT; - } - sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments, - SKB_DROP_REASON_FRAG_TOO_FAR); - sub_frag_mem_limit(qp->q.fqdir, sum_truesize); + inet_frag_queue_flush(&qp->q, SKB_DROP_REASON_FRAG_TOO_FAR); qp->q.flags = 0; qp->q.len = 0; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 761a53c6a89a6..8178c44a3cdd4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -330,6 +330,10 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (!tun_dst) return PACKET_REJECT; + /* MUST set options_len before referencing options */ + info = &tun_dst->u.tun_info; + info->options_len = sizeof(*md); + /* skb can be uncloned in __iptunnel_pull_header, so * old pkt_md is no longer valid and we need to reset * it @@ -344,10 +348,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE); - info = &tun_dst->u.tun_info; __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); - info->options_len = sizeof(*md); } skb_reset_mac_header(skb); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fdda18b1abda0..162a384a15bbe 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -104,7 +104,8 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) { + if ((skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) && skb_has_frag_list(skb) && + (skb->protocol == skb_shinfo(skb)->frag_list->protocol)) { struct tcphdr *th = tcp_hdr(skb); if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 19d0b5b09ffae..704fb32d10d75 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -512,7 +512,8 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return NULL; } - if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) { + if ((skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) && skb_has_frag_list(gso_skb) && + (gso_skb->protocol == skb_shinfo(gso_skb)->frag_list->protocol)) { /* Detect modified geometry and pass those to skb_segment. */ if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) return __udp_gso_segment_list(gso_skb, features, is_ipv6); diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index df1986973430c..21f6ed126253a 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1342,7 +1342,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb, /* At this point new_end aligns to 4n, so (new_end & 4) pads to 8n */ pad = ((new_end & 4) + (end & 7)) & 7; len_delta = new_end - (int)end + pad; - ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); + ret_val = skb_cow(skb, + skb_headroom(skb) + (len_delta > 0 ? len_delta : 0)); if (ret_val < 0) return ret_val; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c82a75510c0e2..d19d86ed43766 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -535,6 +535,10 @@ static int ip6erspan_rcv(struct sk_buff *skb, if (!tun_dst) return PACKET_REJECT; + /* MUST set options_len before referencing options */ + info = &tun_dst->u.tun_info; + info->options_len = sizeof(*md); + /* skb can be uncloned in __iptunnel_pull_header, so * old pkt_md is no longer valid and we need to reset * it @@ -543,7 +547,6 @@ static int ip6erspan_rcv(struct sk_buff *skb, skb_network_header_len(skb); pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + sizeof(*ershdr)); - info = &tun_dst->u.tun_info; md = ip_tunnel_info_opts(info); md->version = ver; md2 = &md->u.md2; @@ -551,7 +554,6 @@ static int ip6erspan_rcv(struct sk_buff *skb, ERSPAN_V2_MDSIZE); __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags); - info->options_len = sizeof(*md); ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); @@ -1366,9 +1368,16 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, { struct ip6_tnl *t = netdev_priv(dev); struct ipv6hdr *ipv6h; + int needed; __be16 *p; - ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h)); + needed = t->hlen + sizeof(*ipv6h); + if (skb_headroom(skb) < needed && + pskb_expand_head(skb, HH_DATA_ALIGN(needed - skb_headroom(skb)), + 0, GFP_ATOMIC)) + return -needed; + + ipv6h = skb_push(skb, needed); ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, t->fl.u.ip6.flowlabel, true, &t->fl.u.ip6)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aee6a10b112aa..9e7afda7cba2b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1470,7 +1470,18 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net, p = this_cpu_ptr(res->nh->rt6i_pcpu); prev = cmpxchg(p, NULL, pcpu_rt); - BUG_ON(prev); + if (unlikely(prev)) { + /* + * Another task on this CPU already installed a pcpu_rt. + * This can happen on PREEMPT_RT where preemption is possible. + * Free our allocation and return the existing one. + */ + BUG_ON(!IS_ENABLED(CONFIG_PREEMPT_RT)); + + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + return prev; + } if (res->f6i->fib6_destroying) { struct fib6_info *from; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b51c2c8584ae0..c81091a5cc3a3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1345,7 +1345,6 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, size = sizeof(*new) + new_head_len + new_tail_len; - /* new or old multiple BSSID elements? */ if (params->mbssid_ies) { mbssid = params->mbssid_ies; size += struct_size(new->mbssid_ies, elem, mbssid->cnt); @@ -1355,15 +1354,6 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, } size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, mbssid->cnt); - } else if (old && old->mbssid_ies) { - mbssid = old->mbssid_ies; - size += struct_size(new->mbssid_ies, elem, mbssid->cnt); - if (old && old->rnr_ies) { - rnr = old->rnr_ies; - size += struct_size(new->rnr_ies, elem, rnr->cnt); - } - size += ieee80211_get_mbssid_beacon_len(mbssid, rnr, - mbssid->cnt); } new = kzalloc(size, GFP_KERNEL); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 4f04d95c19d49..7b0aa24c1f97c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1251,7 +1251,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local, if (!creator_sdata) { struct ieee80211_sub_if_data *other; - list_for_each_entry(other, &local->mon_list, list) { + list_for_each_entry_rcu(other, &local->mon_list, u.mntr.list) { if (!other->vif.bss_conf.mu_mimo_owner) continue; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e56ad4b9330f2..ad53dedd929c2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1126,7 +1126,10 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, while (!ieee80211_chandef_usable(sdata, &chanreq->oper, IEEE80211_CHAN_DISABLED)) { - if (WARN_ON(chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT)) { + if (chanreq->oper.width == NL80211_CHAN_WIDTH_20_NOHT) { + link_id_info(sdata, link_id, + "unusable channel (%d MHz) for connection\n", + chanreq->oper.chan->center_freq); ret = -EINVAL; goto free; } diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index a5d4358f122ae..ebb4f4d88c237 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -47,6 +47,9 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; int band; + if (!ifocb->joined) + return; + /* XXX: Consider removing the least recently used entry and * allow new one to be added. */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6a1899512d078..e0ccd97498536 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3511,6 +3511,11 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) rx->skb->len < IEEE80211_MIN_ACTION_SIZE) return RX_DROP_U_RUNT_ACTION; + /* Drop non-broadcast Beacon frames */ + if (ieee80211_is_beacon(mgmt->frame_control) && + !is_broadcast_ether_addr(mgmt->da)) + return RX_DROP; + if (rx->sdata->vif.type == NL80211_IFTYPE_AP && ieee80211_is_beacon(mgmt->frame_control) && !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) { diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 9e4631fade90c..a1222c1b62b3c 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -328,8 +328,14 @@ static int mac802154_set_header_security(struct ieee802154_sub_if_data *sdata, mac802154_llsec_get_params(&sdata->sec, ¶ms); - if (!params.enabled && cb->secen_override && cb->secen) - return -EINVAL; + if (!cb->secen_override) { + if (!params.enabled) + return 0; + } else { + if (cb->secen && !params.enabled) + return -EINVAL; + } + if (!params.enabled || (cb->secen_override && !cb->secen) || !params.out_level) @@ -366,7 +372,7 @@ static int ieee802154_header_create(struct sk_buff *skb, if (!daddr) return -EINVAL; - memset(&hdr.fc, 0, sizeof(hdr.fc)); + memset(&hdr, 0, sizeof(hdr)); hdr.fc.type = cb->type; hdr.fc.security_enabled = cb->secen; hdr.fc.ack_request = cb->ackreq; @@ -432,7 +438,7 @@ static int mac802154_header_create(struct sk_buff *skb, if (!daddr) return -EINVAL; - memset(&hdr.fc, 0, sizeof(hdr.fc)); + memset(&hdr, 0, sizeof(hdr)); hdr.fc.type = IEEE802154_FC_TYPE_DATA; hdr.fc.ack_request = wpan_dev->ackreq; hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF; diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 20328920f6ed1..be71fc9b46381 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -4,7 +4,7 @@ config MPTCP depends on INET select SKB_EXTENSIONS select CRYPTO_LIB_SHA256 - select CRYPTO + select CRYPTO_LIB_UTILS help Multipath TCP (MPTCP) connections send and receive data over multiple subflows in order to utilize multiple network paths. Each subflow diff --git a/net/mptcp/options.c b/net/mptcp/options.c index f24ae7d40e883..43df4293f58bf 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -408,6 +408,16 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, */ subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { + if (unlikely(subflow_simultaneous_connect(sk))) { + WARN_ON_ONCE(!mptcp_try_fallback(sk, MPTCP_MIB_SIMULTCONNFALLBACK)); + + /* Ensure mptcp_finish_connect() will not process the + * MPC handshake. + */ + subflow->request_mptcp = 0; + return false; + } + opts->suboptions = OPTION_MPTCP_MPC_SYN; opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d5b383870f799..7aa42de9c47b5 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, } if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) - entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); + entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) & + MPTCP_PM_ADDR_FLAGS_MASK; if (tb[MPTCP_PM_ADDR_ATTR_PORT]) entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e212c1374bd04..f505b780f7139 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1623,7 +1623,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) struct mptcp_sendmsg_info info = { .flags = flags, }; - bool do_check_data_fin = false; + bool copied = false; int push_count = 1; while (mptcp_send_head(sk) && (push_count > 0)) { @@ -1665,7 +1665,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) push_count--; continue; } - do_check_data_fin = true; + copied = true; } } } @@ -1674,11 +1674,14 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) if (ssk) mptcp_push_release(ssk, &info); - /* ensure the rtx timer is running */ - if (!mptcp_rtx_timer_pending(sk)) - mptcp_reset_rtx_timer(sk); - if (do_check_data_fin) + /* Avoid scheduling the rtx timer if no data has been pushed; the timer + * will be updated on positive acks by __mptcp_cleanup_una(). + */ + if (copied) { + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); mptcp_check_send_data_fin(sk); + } } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) @@ -2464,10 +2467,10 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) */ static void __mptcp_subflow_disconnect(struct sock *ssk, struct mptcp_subflow_context *subflow, - unsigned int flags) + bool fastclosing) { if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || - subflow->send_fastclose) { + fastclosing) { /* The MPTCP code never wait on the subflow sockets, TCP-level * disconnect should never fail */ @@ -2535,7 +2538,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { - __mptcp_subflow_disconnect(ssk, subflow, flags); + __mptcp_subflow_disconnect(ssk, subflow, msk->fastclosing); release_sock(ssk); goto out; @@ -2766,10 +2769,13 @@ static void __mptcp_retrans(struct sock *sk) /* * make the whole retrans decision, xmit, disallow - * fallback atomic + * fallback atomic, note that we can't retrans even + * when an infinite fallback is in progress, i.e. new + * subflows are disallowed. */ spin_lock_bh(&msk->fallback_lock); - if (__mptcp_check_fallback(msk)) { + if (__mptcp_check_fallback(msk) || + !msk->allow_subflows) { spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); goto clear_scheduled; @@ -2878,6 +2884,7 @@ static void mptcp_do_fastclose(struct sock *sk) mptcp_set_state(sk, TCP_CLOSE); mptcp_backlog_purge(sk); + msk->fastclosing = 1; /* Explicitly send the fastclose reset as need */ if (__mptcp_check_fallback(msk)) @@ -3412,6 +3419,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->bytes_sent = 0; msk->bytes_retrans = 0; msk->rcvspace_init = 0; + msk->fastclosing = 0; /* for fallback's sake */ WRITE_ONCE(msk->ack_seq, 0); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 9c0d17876b22f..66e9735007912 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -320,7 +320,8 @@ struct mptcp_sock { fastopening:1, in_accept_queue:1, free_first:1, - rcvspace_init:1; + rcvspace_init:1, + fastclosing:1; u32 notsent_lowat; int keepalive_cnt; int keepalive_idle; @@ -1337,10 +1338,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return (1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) && - is_active_ssk(subflow) && - !subflow->conn_finished; + /* Note that the sk state implies !subflow->conn_finished. */ + return sk->sk_state == TCP_SYN_RECV && is_active_ssk(subflow); } #ifdef CONFIG_SYN_COOKIES diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 86ce58ae533df..96d54cb2cd93f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1878,12 +1878,6 @@ static void subflow_state_change(struct sock *sk) __subflow_state_change(sk); - if (subflow_simultaneous_connect(sk)) { - WARN_ON_ONCE(!mptcp_try_fallback(sk, MPTCP_MIB_SIMULTCONNFALLBACK)); - subflow->conn_finished = 1; - mptcp_propagate_state(parent, sk, subflow, NULL); - } - /* as recvmsg() does not acquire the subflow socket for ssk selection * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3162ce3c26404..64c697212578a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -408,6 +408,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, return -1; err_unreach: + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + dst_link_failure(skb); return -1; } diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index f1be4dd5cf85f..3654f1e8976c9 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -172,14 +172,14 @@ static int __nf_conncount_add(struct net *net, struct nf_conn *found_ct; unsigned int collect = 0; bool refcounted = false; + int err = 0; if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) return -ENOENT; if (ct && nf_ct_is_confirmed(ct)) { - if (refcounted) - nf_ct_put(ct); - return -EEXIST; + err = -EEXIST; + goto out_put; } if ((u32)jiffies == list->last_gc) @@ -231,12 +231,16 @@ static int __nf_conncount_add(struct net *net, } add_new_node: - if (WARN_ON_ONCE(list->count > INT_MAX)) - return -EOVERFLOW; + if (WARN_ON_ONCE(list->count > INT_MAX)) { + err = -EOVERFLOW; + goto out_put; + } conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); - if (conn == NULL) - return -ENOMEM; + if (conn == NULL) { + err = -ENOMEM; + goto out_put; + } conn->tuple = tuple; conn->zone = *zone; @@ -249,7 +253,7 @@ static int __nf_conncount_add(struct net *net, out_put: if (refcounted) nf_ct_put(ct); - return 0; + return err; } int nf_conncount_add_skb(struct net *net, @@ -456,11 +460,10 @@ insert_tree(struct net *net, rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_insert_color(&rbconn->node, root); - - if (refcounted) - nf_ct_put(ct); } out_unlock: + if (refcounted) + nf_ct_put(ct); spin_unlock_bh(&nf_conncount_locks[hash]); return count; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0b95f226f2111..d1f8eb725d422 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2487,6 +2487,7 @@ void nf_conntrack_cleanup_net(struct net *net) void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) { struct nf_ct_iter_data iter_data = {}; + unsigned long start = jiffies; struct net *net; int busy; @@ -2507,6 +2508,8 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) busy = 1; } if (busy) { + DEBUG_NET_WARN_ONCE(time_after(jiffies, start + 60 * HZ), + "conntrack cleanup blocked for 60s"); schedule(); goto i_see_dead_people; } diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c index f0984cf69a09b..eb24fe2715dcd 100644 --- a/net/netfilter/nf_flow_table_path.c +++ b/net/netfilter/nf_flow_table_path.c @@ -250,6 +250,9 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0) nft_dev_path_info(&stack, &info, ha, &ft->data); + if (info.outdev) + route->tuple[dir].out.ifindex = info.outdev->ifindex; + if (!info.indev || !nft_flowtable_find_dev(info.indev, ft)) return; @@ -269,7 +272,6 @@ static void nft_dev_forward_path(const struct nft_pktinfo *pkt, route->tuple[!dir].in.num_encaps = info.num_encaps; route->tuple[!dir].in.ingress_vlans = info.ingress_vlans; - route->tuple[dir].out.ifindex = info.outdev->ifindex; if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) { memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 78a61dac4ade8..e6b24586d2fed 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -294,25 +294,13 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, ct = nf_ct_tuplehash_to_ctrack(thash); - /* NB: IP_CT_DIR_ORIGINAL should be impossible because - * nf_nat_used_tuple() handles origin collisions. - * - * Handle remote chance other CPU confirmed its ct right after. - */ - if (thash->tuple.dst.dir != IP_CT_DIR_REPLY) - goto out; - /* clashing connection subject to NAT? Retry with new tuple. */ if (READ_ONCE(ct->status) & uses_nat) goto out; if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) && - nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) { + &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple)) taken = false; - goto out; - } out: nf_ct_put(ct); return taken; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f3de2f9bbebf1..618af6e90773f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -123,6 +123,29 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s table->validate_state = new_validate_state; } + +static bool nft_chain_vstate_valid(const struct nft_ctx *ctx, + const struct nft_chain *chain) +{ + const struct nft_base_chain *base_chain; + enum nft_chain_types type; + u8 hooknum; + + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) + return false; + + base_chain = nft_base_chain(ctx->chain); + hooknum = base_chain->ops.hooknum; + type = base_chain->type->type; + + /* chain is already validated for this call depth */ + if (chain->vstate.depth >= ctx->level && + chain->vstate.hook_mask[type] & BIT(hooknum)) + return true; + + return false; +} + static void nf_tables_trans_destroy_work(struct work_struct *w); static void nft_trans_gc_work(struct work_struct *work); @@ -4079,6 +4102,29 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); } +static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + const struct nft_base_chain *base_chain; + enum nft_chain_types type; + u8 hooknum; + + /* ctx->chain must hold the calling base chain. */ + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) { + memset(&chain->vstate, 0, sizeof(chain->vstate)); + return; + } + + base_chain = nft_base_chain(ctx->chain); + hooknum = base_chain->ops.hooknum; + type = base_chain->type->type; + + BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX); + + chain->vstate.hook_mask[type] |= BIT(hooknum); + if (chain->vstate.depth < ctx->level) + chain->vstate.depth = ctx->level; +} + /** nft_chain_validate - loop detection and hook validation * * @ctx: context containing call depth and base chain @@ -4088,15 +4134,25 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r * and set lookups until either the jump limit is hit or all reachable * chains have been validated. */ -int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) +int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain) { struct nft_expr *expr, *last; struct nft_rule *rule; int err; + BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255); if (ctx->level == NFT_JUMP_STACK_SIZE) return -EMLINK; + if (ctx->level > 0) { + /* jumps to base chains are not allowed. */ + if (nft_is_base_chain(chain)) + return -ELOOP; + + if (nft_chain_vstate_valid(ctx, chain)) + return 0; + } + list_for_each_entry(rule, &chain->rules, list) { if (fatal_signal_pending(current)) return -EINTR; @@ -4115,8 +4171,11 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (err < 0) return err; } + + cond_resched(); } + nft_chain_vstate_update(ctx, chain); return 0; } EXPORT_SYMBOL_GPL(nft_chain_validate); @@ -4128,7 +4187,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) .net = net, .family = table->family, }; - int err; + int err = 0; list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) @@ -4137,12 +4196,14 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) ctx.chain = chain; err = nft_chain_validate(&ctx, chain); if (err < 0) - return err; - - cond_resched(); + goto err; } - return 0; +err: + list_for_each_entry(chain, &table->chains, list) + memset(&chain->vstate, 0, sizeof(chain->vstate)); + + return err; } int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, @@ -11676,21 +11737,10 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_data_types type, unsigned int len) { - int err; - switch (reg) { case NFT_REG_VERDICT: if (type != NFT_DATA_VERDICT) return -EINVAL; - - if (data != NULL && - (data->verdict.code == NFT_GOTO || - data->verdict.code == NFT_JUMP)) { - err = nft_chain_validate(ctx, data->verdict.chain); - if (err < 0) - return err; - } - break; default: if (type != NFT_DATA_VALUE) diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 5e531394a724b..2b3cbceb0b52d 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -43,8 +43,10 @@ void nr_output(struct sock *sk, struct sk_buff *skb) frontlen = skb_headroom(skb); while (skb->len > 0) { - if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) + if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) { + kfree_skb(skb); return; + } skb_reserve(skbn, frontlen); diff --git a/net/nfc/core.c b/net/nfc/core.c index ae1c842f9c642..82f023f377541 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1154,6 +1154,7 @@ EXPORT_SYMBOL(nfc_register_device); void nfc_unregister_device(struct nfc_dev *dev) { int rc; + struct rfkill *rfk = NULL; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); @@ -1164,13 +1165,17 @@ void nfc_unregister_device(struct nfc_dev *dev) device_lock(&dev->dev); if (dev->rfkill) { - rfkill_unregister(dev->rfkill); - rfkill_destroy(dev->rfkill); + rfk = dev->rfkill; dev->rfkill = NULL; } dev->shutting_down = true; device_unlock(&dev->dev); + if (rfk) { + rfkill_unregister(rfk); + rfkill_destroy(rfk); + } + if (dev->ops->check_presence) { timer_delete_sync(&dev->check_pres_timer); cancel_work_sync(&dev->check_pres_work); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index beeb3b4d28cab..f6c1d79f91e4e 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1089,6 +1089,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, if (sk->sk_state == LLCP_CLOSED) { release_sock(sk); nfc_llcp_sock_put(llcp_sock); + return; } /* Pass the payload upstream */ @@ -1177,11 +1178,6 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, nfc_llcp_socket_purge(llcp_sock); - if (sk->sk_state == LLCP_CLOSED) { - release_sock(sk); - nfc_llcp_sock_put(llcp_sock); - } - if (sk->sk_state == LLCP_CONNECTED) { nfc_put_device(local->dev); sk->sk_state = LLCP_CLOSED; diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 418b84e2b2605..0190332cf4546 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -58,7 +58,7 @@ static int nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, struct nci_conn_info *conn_info; int i; - if (skb->len < sizeof(struct nci_core_conn_credit_ntf)) + if (skb->len < offsetofend(struct nci_core_conn_credit_ntf, num_entries)) return -EINVAL; ntf = (struct nci_core_conn_credit_ntf *)skb->data; @@ -68,6 +68,10 @@ static int nci_core_conn_credits_ntf_packet(struct nci_dev *ndev, if (ntf->num_entries > NCI_MAX_NUM_CONN) ntf->num_entries = NCI_MAX_NUM_CONN; + if (skb->len < offsetofend(struct nci_core_conn_credit_ntf, num_entries) + + ntf->num_entries * sizeof(struct conn_credit_entry)) + return -EINVAL; + /* update the credits */ for (i = 0; i < ntf->num_entries; i++) { ntf->conn_entries[i].conn_id = @@ -364,7 +368,7 @@ static int nci_rf_discover_ntf_packet(struct nci_dev *ndev, const __u8 *data; bool add_target = true; - if (skb->len < sizeof(struct nci_rf_discover_ntf)) + if (skb->len < offsetofend(struct nci_rf_discover_ntf, rf_tech_specific_params_len)) return -EINVAL; data = skb->data; @@ -380,6 +384,10 @@ static int nci_rf_discover_ntf_packet(struct nci_dev *ndev, pr_debug("rf_tech_specific_params_len %d\n", ntf.rf_tech_specific_params_len); + if (skb->len < (data - skb->data) + + ntf.rf_tech_specific_params_len + sizeof(ntf.ntf_type)) + return -EINVAL; + if (ntf.rf_tech_specific_params_len > 0) { switch (ntf.rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: @@ -596,7 +604,7 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, const __u8 *data; int err = NCI_STATUS_OK; - if (skb->len < sizeof(struct nci_rf_intf_activated_ntf)) + if (skb->len < offsetofend(struct nci_rf_intf_activated_ntf, rf_tech_specific_params_len)) return -EINVAL; data = skb->data; @@ -628,6 +636,9 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, if (ntf.rf_interface == NCI_RF_INTERFACE_NFCEE_DIRECT) goto listen; + if (skb->len < (data - skb->data) + ntf.rf_tech_specific_params_len) + return -EINVAL; + if (ntf.rf_tech_specific_params_len > 0) { switch (ntf.activation_rf_tech_and_mode) { case NCI_NFC_A_PASSIVE_POLL_MODE: @@ -668,6 +679,11 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, } } + if (skb->len < (data - skb->data) + sizeof(ntf.data_exch_rf_tech_and_mode) + + sizeof(ntf.data_exch_tx_bit_rate) + sizeof (ntf.data_exch_rx_bit_rate) + + sizeof(ntf.activation_params_len)) + return -EINVAL; + ntf.data_exch_rf_tech_and_mode = *data++; ntf.data_exch_tx_bit_rate = *data++; ntf.data_exch_rx_bit_rate = *data++; @@ -679,6 +695,9 @@ static int nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev, pr_debug("data_exch_rx_bit_rate 0x%x\n", ntf.data_exch_rx_bit_rate); pr_debug("activation_params_len %d\n", ntf.activation_params_len); + if (skb->len < (data - skb->data) + ntf.activation_params_len) + return -EINVAL; + if (ntf.activation_params_len > 0) { switch (ntf.rf_interface) { case NCI_RF_INTERFACE_ISO_DEP: diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index a18e2c503da61..9ae138ee91dde 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -159,6 +159,11 @@ static int nfc_genl_dump_targets(struct sk_buff *skb, cb->args[0] = i; + if (rc < 0 || i >= dev->n_targets) { + nfc_put_device(dev); + cb->args[1] = 0; + } + return skb->len; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 1cb4f97335d87..2d536901309ea 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2802,13 +2802,20 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } -static bool validate_push_nsh(const struct nlattr *attr, bool log) +static bool validate_push_nsh(const struct nlattr *a, bool log) { + struct nlattr *nsh_key = nla_data(a); struct sw_flow_match match; struct sw_flow_key key; + /* There must be one and only one NSH header. */ + if (!nla_ok(nsh_key, nla_len(a)) || + nla_total_size(nla_len(nsh_key)) != nla_len(a) || + nla_type(nsh_key) != OVS_KEY_ATTR_NSH) + return false; + ovs_match_init(&match, &key, true, NULL); - return !nsh_key_put_from_nlattr(attr, &match, false, true, log); + return !nsh_key_put_from_nlattr(nsh_key, &match, false, true, log); } /* Return false if there are any non-masked bits set. @@ -3389,7 +3396,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; } mac_proto = MAC_PROTO_NONE; - if (!validate_push_nsh(nla_data(a), log)) + if (!validate_push_nsh(a, log)) return -EINVAL; break; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 91a11067e4588..6574f9bcdc026 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -160,10 +160,19 @@ void ovs_netdev_detach_dev(struct vport *vport) static void netdev_destroy(struct vport *vport) { - rtnl_lock(); - if (netif_is_ovs_port(vport->dev)) - ovs_netdev_detach_dev(vport); - rtnl_unlock(); + /* When called from ovs_db_notify_wq() after a dp_device_event(), the + * port has already been detached, so we can avoid taking the RTNL by + * checking this first. + */ + if (netif_is_ovs_port(vport->dev)) { + rtnl_lock(); + /* Check again while holding the lock to ensure we don't race + * with the netdev notifier and detach twice. + */ + if (netif_is_ovs_port(vport->dev)) + ovs_netdev_detach_dev(vport); + rtnl_unlock(); + } call_rcu(&vport->rcu, vport_netdev_free); } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index fd67494f2815e..c0f5a515a8ce5 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -205,7 +205,7 @@ static void rose_kill_by_device(struct net_device *dev) spin_unlock_bh(&rose_list_lock); for (i = 0; i < cnt; i++) { - sk = array[cnt]; + sk = array[i]; rose = rose_sk(sk); lock_sock(sk); spin_lock_bh(&rose_list_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index f27b583def78e..91c96cc625bd6 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -281,6 +281,15 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (dev == skb->dev && want_ingress == at_ingress) { + pr_notice_once("tc mirred: Loop (%s:%s --> %s:%s)\n", + netdev_name(skb->dev), + at_ingress ? "ingress" : "egress", + netdev_name(dev), + want_ingress ? "ingress" : "egress"); + goto err_cant_do; + } + /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 82635dd2cfa59..306e046276d46 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -652,7 +652,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); for (i = nbands; i < oldbands; i++) { - if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + if (cl_is_active(&q->classes[i])) list_del_init(&q->classes[i].alist); qdisc_purge_queue(q->classes[i].qdisc); } @@ -664,6 +664,10 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } + for (i = q->nstrict; i < nstrict; i++) { + if (cl_is_active(&q->classes[i])) + list_del_init(&q->classes[i].alist); + } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 32a5f33040461..1a2b498ada839 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1055,6 +1055,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, q->loss_model = CLG_RANDOM; } + ret = check_netem_in_tree(sch, qopt->duplicate, extack); + if (ret) + goto unlock; + q->duplicate = qopt->duplicate; + if (delay_dist) swap(q->delay_dist, delay_dist); if (slot_dist) @@ -1068,12 +1073,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, q->counter = 0; q->loss = qopt->loss; - ret = check_netem_in_tree(sch, qopt->duplicate, extack); - if (ret) - goto unlock; - - q->duplicate = qopt->duplicate; - /* for compatibility with earlier versions. * if gap is set, need to assume 100% probability */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 069b7e45d8bda..531cb0690007a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -492,6 +492,8 @@ static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk) struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct ipv6_txoptions *opt; + inet_sk(newsk)->inet_opt = NULL; + newnp = inet6_sk(newsk); rcu_read_lock(); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d808096f5ab17..2493a5b1fa3ca 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4863,8 +4863,6 @@ static struct sock *sctp_clone_sock(struct sock *sk, newsp->pf->to_sk_daddr(&asoc->peer.primary_addr, newsk); newinet->inet_dport = htons(asoc->peer.port); - - newsp->pf->copy_ip_options(sk, newsk); atomic_set(&newinet->inet_id, get_random_u16()); inet_set_bit(MC_LOOP, newsk); @@ -4874,17 +4872,20 @@ static struct sock *sctp_clone_sock(struct sock *sk, #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *newnp = inet6_sk(newsk); + struct ipv6_pinfo *newnp; newinet->pinet6 = &((struct sctp6_sock *)newsk)->inet6; newinet->ipv6_fl_list = NULL; + newnp = inet6_sk(newsk); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; } #endif + newsp->pf->copy_ip_options(sk, newsk); + newsp->do_auto_asconf = 0; skb_queue_head_init(&newsp->pd_lobby); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f97f77b041d97..b0f4405fb714c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3524,6 +3524,9 @@ static int __init smc_init(void) goto out_pernet_subsys_stat; smc_clc_init(); + INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); + INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); + rc = smc_nl_init(); if (rc) goto out_ism; @@ -3581,8 +3584,6 @@ static int __init smc_init(void) pr_err("%s: sock_register fails with %d\n", __func__, rc); goto out_proto6; } - INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); - INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 55cdebfa0da02..72dc5d5bcac89 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3086,12 +3086,16 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, mutex_unlock(&u->iolock); if (msg) { + bool do_cmsg; + scm_recv_unix(sock, msg, &scm, flags); - if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) { + do_cmsg = READ_ONCE(u->recvmsg_inq); + if ((do_cmsg || msg->msg_get_inq) && (copied ?: err) >= 0) { msg->msg_inq = READ_ONCE(u->inq_len); - put_cmsg(msg, SOL_SOCKET, SCM_INQ, - sizeof(msg->msg_inq), &msg->msg_inq); + if (do_cmsg) + put_cmsg(msg, SOL_SOCKET, SCM_INQ, + sizeof(msg->msg_inq), &msg->msg_inq); } } else { scm_destroy(&scm); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 78323d43e63ed..25f65817faab9 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -199,7 +199,7 @@ static void unix_free_vertices(struct scm_fp_list *fpl) } } -static DEFINE_SPINLOCK(unix_gc_lock); +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(unix_gc_lock); void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver) { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 3a028ff287fbb..4e629ca305bcc 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -910,7 +910,7 @@ void __cfg80211_connect_result(struct net_device *dev, ssid_len = min(ssid->datalen, IEEE80211_MAX_SSID_LEN); memcpy(wdev->u.client.ssid, ssid->data, ssid_len); - wdev->u.client.ssid_len = ssid->datalen; + wdev->u.client.ssid_len = ssid_len; break; } rcu_read_unlock(); diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index bf6272d87a7b4..3ca99db5cccf2 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -777,7 +777,6 @@ impl DeviceMask { /// /// ``` /// # mod module_phy_driver_sample { -/// use kernel::c_str; /// use kernel::net::phy::{self, DeviceId}; /// use kernel::prelude::*; /// @@ -796,7 +795,7 @@ impl DeviceMask { /// /// #[vtable] /// impl phy::Driver for PhySample { -/// const NAME: &'static CStr = c_str!("PhySample"); +/// const NAME: &'static CStr = c"PhySample"; /// const PHY_DEVICE_ID: phy::DeviceId = phy::DeviceId::new_with_exact_mask(0x00000001); /// } /// # } @@ -805,7 +804,6 @@ impl DeviceMask { /// This expands to the following code: /// /// ```ignore -/// use kernel::c_str; /// use kernel::net::phy::{self, DeviceId}; /// use kernel::prelude::*; /// @@ -825,7 +823,7 @@ impl DeviceMask { /// /// #[vtable] /// impl phy::Driver for PhySample { -/// const NAME: &'static CStr = c_str!("PhySample"); +/// const NAME: &'static CStr = c"PhySample"; /// const PHY_DEVICE_ID: phy::DeviceId = phy::DeviceId::new_with_exact_mask(0x00000001); /// } /// diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 061791e619074..1a3a60a493fd8 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -788,7 +788,6 @@ config SND_SOC_CS_AMP_LIB config SND_SOC_CS_AMP_LIB_TEST tristate "KUnit test for Cirrus Logic cs-amp-lib" if !KUNIT_ALL_TESTS depends on SND_SOC_CS_AMP_LIB && KUNIT - default KUNIT_ALL_TESTS help This builds KUnit tests for the Cirrus Logic common amplifier library. diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 865fd2e8519ed..08205f9fc5257 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -13,6 +13,7 @@ UAPI_PATH:=../../../../include/uapi/ # need the explicit -D matching what's in /usr, to avoid multiple definitions. get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) +get_hdr_inc2=-D$(1) -D$(2) -include $(UAPI_PATH)/linux/$(3) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) @@ -48,3 +49,4 @@ CFLAGS_tc:= $(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \ $(call get_hdr_inc,_TC_SKBEDIT_H,tc_act/tc_skbedit.h) \ $(call get_hdr_inc,_TC_TUNNEL_KEY_H,tc_act/tc_tunnel_key.h) CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) +CFLAGS_wireguard:=$(call get_hdr_inc2,_LINUX_WIREGUARD_H,_WG_UAPI_WIREGUARD_H,wireguard.h) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 422e186cf3cf1..2f093048d985c 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -36,22 +36,6 @@ CONFIG_MAC80211=y CONFIG_WLAN_VENDOR_INTEL=y CONFIG_IWLWIFI=y -CONFIG_DAMON=y -CONFIG_DAMON_VADDR=y -CONFIG_DAMON_PADDR=y - CONFIG_REGMAP_BUILD=y -CONFIG_AUDIT=y - CONFIG_PRIME_NUMBERS=y - -CONFIG_SECURITY=y -CONFIG_SECURITY_APPARMOR=y -CONFIG_SECURITY_LANDLOCK=y - -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_SOC=y -CONFIG_SND_SOC_TOPOLOGY_BUILD=y -CONFIG_SND_SOC_CS35L56_I2C=y diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py index 06559ef49b9a5..52523bdad2407 100755 --- a/tools/testing/selftests/drivers/net/psp.py +++ b/tools/testing/selftests/drivers/net/psp.py @@ -573,8 +573,9 @@ def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver): """Build test cases for each combo of PSP version and IP version""" def test_case(cfg): cfg.require_ipver(ipver) - test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}" test_func(cfg, psp_ver, ipver) + + test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}" return test_case @@ -582,8 +583,9 @@ def ipver_test_builder(name, test_func, ipver): """Build test cases for each IP version""" def test_case(cfg): cfg.require_ipver(ipver) - test_case.__name__ = f"{name}_ip{ipver}" test_func(cfg, ipver) + + test_case.__name__ = f"{name}_ip{ipver}" return test_case diff --git a/tools/testing/selftests/drivers/net/settings b/tools/testing/selftests/drivers/net/settings new file mode 100644 index 0000000000000..a953c96aa16e1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/settings @@ -0,0 +1 @@ +timeout=180 diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl index 12a7f4ca2684d..b8e4d697e3b3a 100755 --- a/tools/testing/selftests/kselftest/prefix.pl +++ b/tools/testing/selftests/kselftest/prefix.pl @@ -4,12 +4,15 @@ # to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd". use strict; use IO::Handle; +use Time::HiRes qw( time ); binmode STDIN; binmode STDOUT; STDOUT->autoflush(1); +my $start_time = time(); +my $prev_time = $start_time; my $needed = 1; while (1) { my $char; @@ -17,6 +20,11 @@ exit 0 if ($bytes == 0); if ($needed) { print "# "; + if ($ENV{kselftest_profile}) { + my $now = time(); + printf("%.2f [+%.2f] ", $now - $start_time, $now - $prev_time); + $prev_time = $now; + } $needed = 0; } print $char; diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 3a62039fa6217..dfae04819075d 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -95,7 +95,7 @@ run_one() fi field=$(echo "$line" | cut -d= -f1) value=$(echo "$line" | cut -d= -f2-) - eval "kselftest_$field"="$value" + eval "export kselftest_$field"="$value" done < "$settings" fi diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 3cd677b720728..4c0375e28bbee 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,9 @@ -CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end +top_srcdir := ../../../../.. +include $(top_srcdir)/scripts/Makefile.compiler + +cc-option = $(call __cc-option, $(CC),,$(1),$(2)) + +CFLAGS += $(KHDR_INCLUDES) -Wall $(call cc-option,-Wflex-array-member-not-at-end) TEST_GEN_PROGS := \ diag_uid \ diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index ec2d6ceb1f08d..acf6b0617373a 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -207,7 +207,7 @@ setup() ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done - ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} + ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} metric 9999 ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} set +e diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 2b0a90581e2f1..21026b6676670 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -800,6 +800,14 @@ ipv6_fcnal() set +e check_nexthop "dev veth1" "" log_test $? 0 "Nexthops removed on admin down" + + # error routes should be deleted when their nexthop is deleted + run_cmd "$IP li set dev veth1 up" + run_cmd "$IP -6 nexthop add id 58 dev veth1" + run_cmd "$IP ro add blackhole 2001:db8:101::1/128 nhid 58" + run_cmd "$IP nexthop del id 58" + check_route6 "2001:db8:101::1" "" + log_test $? 0 "Error route removed on nexthop deletion" } ipv6_grp_refs() @@ -1459,6 +1467,13 @@ ipv4_fcnal() run_cmd "$IP ro del 172.16.102.0/24" log_test $? 0 "Delete route when not specifying nexthop attributes" + + # error routes should be deleted when their nexthop is deleted + run_cmd "$IP nexthop add id 23 dev veth1" + run_cmd "$IP ro add blackhole 172.16.102.100/32 nhid 23" + run_cmd "$IP nexthop del id 23" + check_route "172.16.102.100" "" + log_test $? 0 "Error route removed on nexthop deletion" } ipv4_grp_fcnal() diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index a88f797c549a7..c5694cc4ddd26 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -12,7 +12,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \ - fib6_ra_to_static" + ipv4_mpath_balance_preferred fib6_ra_to_static" VERBOSE=0 PAUSE_ON_FAIL=no @@ -2751,6 +2751,73 @@ ipv4_mpath_balance_test() forwarding_cleanup } +get_route_dev_src() +{ + local pfx="$1" + local src="$2" + local out + + if out=$($IP -j route get "$pfx" from "$src" | jq -re ".[0].dev"); then + echo "$out" + fi +} + +ipv4_mpath_preferred() +{ + local src_ip=$1 + local pref_dev=$2 + local dev routes + local route0=0 + local route1=0 + local pref_route=0 + num_routes=254 + + for i in $(seq 1 $num_routes) ; do + dev=$(get_route_dev_src 172.16.105.$i $src_ip) + if [ "$dev" = "$pref_dev" ]; then + pref_route=$((pref_route+1)) + elif [ "$dev" = "veth1" ]; then + route0=$((route0+1)) + elif [ "$dev" = "veth3" ]; then + route1=$((route1+1)) + fi + done + + routes=$((route0+route1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: routes seen: ($route0,$route1,$pref_route)" + + if [ x"$pref_dev" = x"" ]; then + [[ $routes -ge $num_routes ]] && [[ $route0 -gt 0 ]] && [[ $route1 -gt 0 ]] + else + [[ $pref_route -ge $num_routes ]] + fi + +} + +ipv4_mpath_balance_preferred_test() +{ + echo + echo "IPv4 multipath load balance preferred route" + + forwarding_setup + + $IP route add 172.16.105.0/24 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ipv4_mpath_preferred 172.16.101.1 veth1 + log_test $? 0 "IPv4 multipath loadbalance from veth1" + + ipv4_mpath_preferred 172.16.103.1 veth3 + log_test $? 0 "IPv4 multipath loadbalance from veth3" + + ipv4_mpath_preferred 198.51.100.1 + log_test $? 0 "IPv4 multipath loadbalance from dummy" + + forwarding_cleanup +} + ipv6_mpath_balance_test() { echo @@ -2861,6 +2928,7 @@ do ipv6_mpath_list) ipv6_mpath_list_test;; ipv4_mpath_balance) ipv4_mpath_balance_test;; ipv6_mpath_balance) ipv6_mpath_balance_test;; + ipv4_mpath_balance_preferred) ipv4_mpath_balance_preferred_test;; fib6_ra_to_static) fib6_ra_to_static;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index ce64518aaa111..75a6c3d3c1da3 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -29,6 +29,7 @@ CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_U32=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_META=m CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings index e7b9417537fbc..ff3de4936a009 100644 --- a/tools/testing/selftests/net/forwarding/settings +++ b/tools/testing/selftests/net/forwarding/settings @@ -1 +1,2 @@ -timeout=0 +timeout=10800 +profile=1 diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index ea89e558672db..5823d94b19e9b 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -223,19 +223,32 @@ mirred_egress_to_ingress_tcp_test() ip_proto icmp \ action drop - ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & - local rpid=$! - ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 - wait -n $rpid - cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2 - check_err $? "server output check failed" + echo P2 $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \ -t icmp "ping,id=42,seq=5" -q + echo P2.1 tc_check_packets "dev $h1 egress" 101 10 check_err $? "didn't mirred redirect ICMP" + echo P2.2 tc_check_packets "dev $h1 ingress" 102 10 check_err $? "didn't drop mirred ICMP" + echo P2.3 + + echo P1 + + ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & + local rpid=$! + sleep 0.2 + echo P1.1 + ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 + echo P1.2 + sleep 0.2 + wait -n $rpid + cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2 + check_err $? "server output check failed" + + echo P3 tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower @@ -359,4 +372,5 @@ else tests_run fi +dmesg exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh index 6a570d256e07b..2cf4c6d9245ba 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -138,13 +138,18 @@ install_capture() defer tc qdisc del dev "$dev" clsact tc filter add dev "$dev" ingress proto ip pref 104 \ - flower skip_hw ip_proto udp dst_port "$VXPORT" \ - action pass + u32 match ip protocol 0x11 0xff \ + match u16 "$VXPORT" 0xffff at 0x16 \ + match u16 0x0800 0xffff at 0x30 \ + action pass defer tc filter del dev "$dev" ingress proto ip pref 104 tc filter add dev "$dev" ingress proto ipv6 pref 106 \ - flower skip_hw ip_proto udp dst_port "$VXPORT" \ - action pass + u32 match ip6 protocol 0x11 0xff \ + match u16 "$VXPORT" 0xffff at 0x2a \ + match u16 0x86dd 0xffff at 0x44 \ + match u8 0x11 0xff at 0x4c \ + action pass defer tc filter del dev "$dev" ingress proto ipv6 pref 106 } @@ -248,13 +253,6 @@ vx_create() } export -f vx_create -vx_wait() -{ - # Wait for all the ARP, IGMP etc. noise to settle down so that the - # tunnel is clear for measurements. - sleep 10 -} - vx10_create() { vx_create vx10 10 id 1000 "$@" @@ -267,18 +265,6 @@ vx20_create() } export -f vx20_create -vx10_create_wait() -{ - vx10_create "$@" - vx_wait -} - -vx20_create_wait() -{ - vx20_create "$@" - vx_wait -} - ns_init_common() { local ns=$1; shift @@ -554,7 +540,7 @@ ipv4_nomcroute() # Install a misleading (S,G) rule to attempt to trick the system into # pushing the packets elsewhere. adf_install_broken_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2" + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" do_test 4 10 0 "IPv4 nomcroute" } @@ -562,7 +548,7 @@ ipv6_nomcroute() { # Like for IPv4, install a misleading (S,G). adf_install_broken_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" do_test 6 10 0 "IPv6 nomcroute" } @@ -581,35 +567,35 @@ ipv6_nomcroute_rx() ipv4_mcroute() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute do_test 4 10 10 "IPv4 mcroute" } ipv6_mcroute() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute do_test 6 10 10 "IPv6 mcroute" } ipv4_mcroute_rx() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute ipv4_do_test_rx 0 "IPv4 mcroute ping" } ipv6_mcroute_rx() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute ipv6_do_test_rx 0 "IPv6 mcroute ping" } ipv4_mcroute_changelink() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" ip link set dev vx10 type vxlan mcroute sleep 1 do_test 4 10 10 "IPv4 mcroute changelink" @@ -618,7 +604,7 @@ ipv4_mcroute_changelink() ipv6_mcroute_changelink() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute ip link set dev vx20 type vxlan mcroute sleep 1 do_test 6 10 10 "IPv6 mcroute changelink" @@ -627,47 +613,47 @@ ipv6_mcroute_changelink() ipv4_mcroute_starg() { adf_install_starg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute do_test 4 10 10 "IPv4 mcroute (*,G)" } ipv6_mcroute_starg() { adf_install_starg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute do_test 6 10 10 "IPv6 mcroute (*,G)" } ipv4_mcroute_starg_rx() { adf_install_starg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" } ipv6_mcroute_starg_rx() { adf_install_starg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" } ipv4_mcroute_noroute() { - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute do_test 4 0 0 "IPv4 mcroute, no route" } ipv6_mcroute_noroute() { - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute do_test 6 0 0 "IPv6 mcroute, no route" } ipv4_mcroute_fdb() { adf_install_sg - vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute + vx10_create local 192.0.2.100 dev "$IPMR" mcroute bridge fdb add dev vx10 \ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" do_test 4 10 10 "IPv4 mcroute FDB" @@ -676,7 +662,7 @@ ipv4_mcroute_fdb() ipv6_mcroute_fdb() { adf_install_sg - vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 dev "$IPMR" mcroute bridge -6 fdb add dev vx20 \ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" do_test 6 10 10 "IPv6 mcroute FDB" @@ -686,7 +672,7 @@ ipv6_mcroute_fdb() ipv4_mcroute_fdb_oif0() { adf_install_sg - vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" do_test 4 10 10 "IPv4 mcroute oif=0" @@ -703,7 +689,7 @@ ipv6_mcroute_fdb_oif0() defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" adf_install_sg - vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute bridge -6 fdb del dev vx20 00:00:00:00:00:00 bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" do_test 6 10 10 "IPv6 mcroute oif=0" @@ -716,7 +702,7 @@ ipv4_mcroute_fdb_oif0_sep() adf_install_sg_sep adf_ip_addr_add lo 192.0.2.120/28 - vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" @@ -727,7 +713,7 @@ ipv4_mcroute_fdb_oif0_sep_rx() adf_install_sg_sep_rx lo adf_ip_addr_add lo 192.0.2.120/28 - vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" @@ -738,7 +724,7 @@ ipv4_mcroute_fdb_sep_rx() adf_install_sg_sep_rx lo adf_ip_addr_add lo 192.0.2.120/28 - vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add \ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo @@ -750,7 +736,7 @@ ipv6_mcroute_fdb_sep_rx() adf_install_sg_sep_rx "X$IPMR" adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64 - vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + vx20_create local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute bridge -6 fdb del dev vx20 00:00:00:00:00:00 bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ self static dst "$GROUP6" via "X$IPMR" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index f448bafb3f208..0ec131b339bc4 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -280,7 +280,8 @@ tc_rule_stats_get() local selector=${1:-.packets}; shift tc -j -s filter show dev $dev $dir pref $pref \ - | jq ".[1].options.actions[].stats$selector" + | jq ".[] | select(.options.actions) | + .options.actions[].stats$selector" } tc_rule_handle_stats_get() diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h index 17dc34a612c64..03912902a6d30 100644 --- a/tools/testing/selftests/net/lib/ksft.h +++ b/tools/testing/selftests/net/lib/ksft.h @@ -24,7 +24,8 @@ static inline void ksft_ready(void) fd = STDOUT_FILENO; } - write(fd, msg, sizeof(msg)); + if (write(fd, msg, sizeof(msg)) < 0) + perror("write()"); if (fd != STDOUT_FILENO) close(fd); } @@ -48,7 +49,8 @@ static inline void ksft_wait(void) fd = STDIN_FILENO; } - read(fd, &byte, sizeof(byte)); + if (read(fd, &byte, sizeof(byte)) < 0) + perror("read()"); if (fd != STDIN_FILENO) close(fd); } diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ec6a875881919..123d9d7a0278c 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -192,6 +192,10 @@ check "show_endpoints" \ flush_endpoint check "show_endpoints" "" "flush addrs" +add_endpoint 10.0.1.1 flags unknown +check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" +flush_endpoint + set_limits 9 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit" diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 65b374232ff5a..99eecccbf0c87 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -24,6 +24,8 @@ #define IPPROTO_MPTCP 262 #endif +#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7) + static void syntax(char *argv[]) { fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept []\n", argv[0]); @@ -836,6 +838,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else if (!strcmp(tok, "fullmesh")) flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (!strcmp(tok, "unknown")) + flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN; else error(1, errno, "unknown flag %s", argv[arg]); @@ -1048,6 +1052,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) { + printf("unknown"); + flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN; + if (flags) + printf(","); + } + /* bump unknown flags, if any */ if (flags) printf("0x%x", flags); diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 7fc6c5dbd5516..84b8eb12143ae 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -116,7 +116,7 @@ run_one_clash_test() # not a failure: clash resolution logic did not trigger. # With right timing, xmit completed sequentially and # no parallel insertion occurs. - return $ksft_skip + return $ksft_xfail } run_clash_test() @@ -133,12 +133,12 @@ run_clash_test() if [ $rv -eq 0 ];then echo "PASS: clash resolution test for $daddr:$dport on attempt $i" return 0 - elif [ $rv -eq $ksft_skip ]; then + elif [ $rv -eq $ksft_xfail ]; then softerr=1 fi done - [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger" + [ $softerr -eq 1 ] && echo "XFAIL: clash resolution for $daddr:$dport did not trigger" } ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" @@ -167,8 +167,7 @@ load_simple_ruleset "$nsclient2" run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 if [ $clash_resolution_active -eq 0 ];then - [ "$ret" -eq 0 ] && ret=$ksft_skip - echo "SKIP: Clash resolution did not trigger" + [ "$ret" -eq 0 ] && ret=$ksft_xfail fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c index 507930cee8cb6..462d628cc3bdb 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c @@ -33,9 +33,14 @@ static void die(const char *e) exit(111); } -static void die_port(uint16_t got, uint16_t want) +static void die_port(const struct sockaddr_in *sin, uint16_t want) { - fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got)); + uint16_t got = ntohs(sin->sin_port); + char str[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str)); + + fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str); exit(1); } @@ -100,7 +105,7 @@ int main(int argc, char *argv[]) die("child recvfrom"); if (peer.sin_port != htons(PORT)) - die_port(peer.sin_port, PORT); + die_port(&peer, PORT); } else { if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN) continue; @@ -109,7 +114,7 @@ int main(int argc, char *argv[]) die("parent recvfrom"); if (peer.sin_port != htons((PORT + 1))) - die_port(peer.sin_port, PORT + 1); + die_port(&peer, PORT + 1); } } diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh index a24c896347a88..dc7e9d6da0624 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh @@ -45,6 +45,8 @@ if ip netns exec "$ns0" ./conntrack_reverse_clash; then echo "PASS: No SNAT performed for null bindings" else echo "ERROR: SNAT performed without any matching snat rule" + ip netns exec "$ns0" conntrack -L + ip netns exec "$ns0" conntrack -S exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt index 3442cd29bc932..cdb3910af95b4 100644 --- a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt @@ -26,7 +26,7 @@ +0.01 > R 643160523:643160523(0) win 0 -+0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` ++0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` // Must go through. +0.01 > S 0:0(0) win 65535 diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index ed8418e8217a0..a38764182822e 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1,2 @@ timeout=3600 +profile=1 diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c index 9ec1c9b50e772..a0c9418132c82 100644 --- a/tools/testing/selftests/net/tap.c +++ b/tools/testing/selftests/net/tap.c @@ -56,18 +56,12 @@ static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type, const char *s) { - struct rtattr *rta = rtattr_add(nh, type, strlen(s)); + unsigned int strsz = strlen(s) + 1; + struct rtattr *rta; - memcpy(RTA_DATA(rta), s, strlen(s)); - return rta; -} - -static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type, - const char *s) -{ - struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1); + rta = rtattr_add(nh, type, strsz); - strcpy(RTA_DATA(rta), s); + memcpy(RTA_DATA(rta), s, strsz); return rta; } @@ -119,7 +113,7 @@ static int dev_create(const char *dev, const char *link_type, link_info = rtattr_begin(&req.nh, IFLA_LINKINFO); - rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type); + rtattr_add_str(&req.nh, IFLA_INFO_KIND, link_type); if (fill_info_data) { info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA); diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c index eb3cac5e583c9..8d82140f0f767 100644 --- a/tools/testing/selftests/net/tfo.c +++ b/tools/testing/selftests/net/tfo.c @@ -81,7 +81,8 @@ static void run_server(void) if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); - read(connfd, buf, 64); + if (read(connfd, buf, 64) < 0) + perror("read()"); fprintf(outfile, "%d\n", opt); fclose(outfile); diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index a3ef4b57eb5f8..a4d16a460fbe4 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2786,10 +2786,10 @@ TEST_F(tls_err, epoll_partial_rec) TEST_F(tls_err, poll_partial_rec_async) { struct pollfd pfd = { }; + char token = '\0'; ssize_t rec_len; char rec[256]; char buf[128]; - char token; int p[2]; int ret; diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index bcc14688661dc..4b4bbc2ce5c9b 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -62,6 +62,7 @@ static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; static int cfg_poll_timeout = 100; +static bool cfg_wake_every_msec = true; static int cfg_delay_snd; static int cfg_delay_ack; static int cfg_delay_tolerance_usec = 500; @@ -286,6 +287,16 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); } +static int64_t get_time_now_us64(void) +{ + static struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) + error(1, errno, "clock_gettime"); + + return timespec_to_us64(&ts); +} + static void __epoll(int epfd) { struct epoll_event events; @@ -300,11 +311,19 @@ static void __epoll(int epfd) static void __poll(int fd) { struct pollfd pollfd; + int64_t end_of_wait; + int timeout; int ret; - memset(&pollfd, 0, sizeof(pollfd)); - pollfd.fd = fd; - ret = poll(&pollfd, 1, cfg_poll_timeout); + timeout = cfg_wake_every_msec ? 1 : cfg_poll_timeout; + end_of_wait = get_time_now_us64() + cfg_poll_timeout * 1000; + + do { + memset(&pollfd, 0, sizeof(pollfd)); + pollfd.fd = fd; + ret = poll(&pollfd, 1, timeout); + } while (!ret && get_time_now_us64() < end_of_wait); + if (ret != 1) error(1, errno, "poll"); } @@ -707,6 +726,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" + " -s: single sleep until timeout (from -S), by default wake every 1msec\n" " -S N: usec to sleep before reading error queue\n" " -t N: tolerance (usec) for timestamp validation\n" " -u: use udp\n" @@ -723,7 +743,7 @@ static void parse_opt(int argc, char **argv) int c; while ((c = getopt(argc, argv, - "46bc:CeEFhIl:LnNo:p:PrRS:t:uv:V:x")) != -1) { + "46bc:CeEFhIl:LnNo:p:PrRsS:t:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -787,6 +807,9 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_RAW; cfg_ipproto = IPPROTO_RAW; break; + case 's': /* sleep 'till timeout */ + cfg_wake_every_msec = false; + break; case 'S': cfg_sleep_usec = strtoul(optarg, NULL, 10); break; @@ -825,6 +848,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "cannot ask for pktinfo over pf_packet"); if (cfg_busy_poll && cfg_use_epoll) error(1, 0, "pass epoll or busy_poll, not both"); + if (cfg_wake_every_msec && cfg_use_epoll) + error(1, 0, "periodic wake not implemented for epoll, use -s"); if (cfg_proto == SOCK_STREAM && cfg_use_cmsg_opt_id) error(1, 0, "TCP sockets don't support SCM_TS_OPT_ID"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index b73bd255ea36f..da156feabcbff 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -1052,5 +1052,51 @@ "$TC qdisc del dev $DEV1 ingress_block 21 clsact", "$TC actions flush action mirred" ] + }, + { + "id": "7eba", + "name": "Redirect multiport: dummy egress -> dummy egress (Loop)", + "category": [ + "filter", + "mirred" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root drr", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 10 matchall action mirred egress redirect dev $DUMMY index 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -j -s actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "index": 1, + "stats": { + "packets": 1, + "overlimits": 1 + }, + "not_in_hw": true + } + ] + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 47de27fd4f90f..6a39640aa2a86 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1033,5 +1033,83 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "6e4f", + "name": "Try to delete ets drr class' qdisc while still keeping it in the active list", + "category": [ + "qdisc", + "ets", + "tbf" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1", + "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s", + "$TC filter add dev $DUMMY parent 1: basic classid 1:2", + "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 1 strict 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root", + "matchJSON": [ + { + "kind": "ets", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1:" + ] + }, + { + "id": "0b8f", + "name": "Try to add ets class to the active list twice", + "category": [ + "qdisc", + "ets", + "tbf" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: ets bands 2 strict 1", + "$TC qdisc add dev $DUMMY parent 1:2 handle 20: tbf rate 8bit burst 100b latency 1s", + "$TC filter add dev $DUMMY parent 1: basic classid 1:2", + "ping -c2 -W0.01 -s 56 -I $DUMMY 10.10.11.11 || true", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 2", + "$TC qdisc change dev $DUMMY root handle 1: ets bands 2 strict 1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -s 56 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY root", + "matchJSON": [ + { + "kind": "ets", + "handle": "1:", + "bytes": 98, + "packets": 1 + } + ], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1:" + ] } ]